1 1.1.1.11 mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>. 3 1.1 mrg 4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg /* This file contains the support of offloading. */ 27 1.1 mrg 28 1.1 mrg #include "libgomp.h" 29 1.1 mrg #include "oacc-plugin.h" 30 1.1 mrg #include "oacc-int.h" 31 1.1 mrg #include "gomp-constants.h" 32 1.1 mrg #include <limits.h> 33 1.1 mrg #include <stdbool.h> 34 1.1 mrg #include <stdlib.h> 35 1.1 mrg #ifdef HAVE_INTTYPES_H 36 1.1 mrg # include <inttypes.h> /* For PRIu64. */ 37 1.1 mrg #endif 38 1.1 mrg #include <string.h> 39 1.1.1.11 mrg #include <stdio.h> /* For snprintf. */ 40 1.1 mrg #include <assert.h> 41 1.1.1.2 mrg #include <errno.h> 42 1.1 mrg 43 1.1 mrg #ifdef PLUGIN_SUPPORT 44 1.1 mrg #include <dlfcn.h> 45 1.1 mrg #include "plugin-suffix.h" 46 1.1 mrg #endif 47 1.1 mrg 48 1.1.1.11 mrg /* Define another splay tree instantiation - for reverse offload. */ 49 1.1.1.11 mrg #define splay_tree_prefix reverse 50 1.1.1.11 mrg #define splay_tree_static 51 1.1.1.11 mrg #define splay_tree_c 52 1.1.1.11 mrg #include "splay-tree.h" 53 1.1.1.11 mrg 54 1.1.1.11 mrg 55 1.1.1.10 mrg typedef uintptr_t *hash_entry_type; 56 1.1.1.10 mrg static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } 57 1.1.1.10 mrg static inline void htab_free (void *ptr) { free (ptr); } 58 1.1.1.10 mrg #include "hashtab.h" 59 1.1.1.10 mrg 60 1.1.1.11 mrg ialias_redirect (GOMP_task) 61 1.1.1.11 mrg 62 1.1.1.10 mrg static inline hashval_t 63 1.1.1.10 mrg htab_hash (hash_entry_type element) 64 1.1.1.10 mrg { 65 1.1.1.10 mrg return hash_pointer ((void *) element); 66 1.1.1.10 mrg } 67 1.1.1.10 mrg 68 1.1.1.10 mrg static inline bool 69 1.1.1.10 mrg htab_eq (hash_entry_type x, hash_entry_type y) 70 1.1.1.10 mrg { 71 1.1.1.10 mrg return x == y; 72 1.1.1.10 mrg } 73 1.1.1.10 mrg 74 1.1.1.8 mrg #define FIELD_TGT_EMPTY (~(size_t) 0) 75 1.1.1.8 mrg 76 1.1 mrg static void gomp_target_init (void); 77 1.1 mrg 78 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */ 79 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 80 1.1 mrg 81 1.1 mrg /* Mutex for offload image registration. */ 82 1.1 mrg static gomp_mutex_t register_lock; 83 1.1 mrg 84 1.1 mrg /* This structure describes an offload image. 85 1.1 mrg It contains type of the target device, pointer to host table descriptor, and 86 1.1 mrg pointer to target data. */ 87 1.1 mrg struct offload_image_descr { 88 1.1.1.2 mrg unsigned version; 89 1.1 mrg enum offload_target_type type; 90 1.1.1.2 mrg const void *host_table; 91 1.1.1.2 mrg const void *target_data; 92 1.1 mrg }; 93 1.1 mrg 94 1.1 mrg /* Array of descriptors of offload images. */ 95 1.1 mrg static struct offload_image_descr *offload_images; 96 1.1 mrg 97 1.1 mrg /* Total number of offload images. */ 98 1.1 mrg static int num_offload_images; 99 1.1 mrg 100 1.1 mrg /* Array of descriptors for all available devices. */ 101 1.1 mrg static struct gomp_device_descr *devices; 102 1.1 mrg 103 1.1 mrg /* Total number of available devices. */ 104 1.1 mrg static int num_devices; 105 1.1 mrg 106 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 107 1.1 mrg static int num_devices_openmp; 108 1.1 mrg 109 1.1.1.11 mrg /* OpenMP requires mask. */ 110 1.1.1.11 mrg static int omp_requires_mask; 111 1.1.1.11 mrg 112 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 113 1.1 mrg 114 1.1 mrg static void * 115 1.1 mrg gomp_realloc_unlock (void *old, size_t size) 116 1.1 mrg { 117 1.1 mrg void *ret = realloc (old, size); 118 1.1 mrg if (ret == NULL) 119 1.1 mrg { 120 1.1 mrg gomp_mutex_unlock (®ister_lock); 121 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 122 1.1 mrg } 123 1.1 mrg return ret; 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg attribute_hidden void 127 1.1 mrg gomp_init_targets_once (void) 128 1.1 mrg { 129 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init); 130 1.1 mrg } 131 1.1 mrg 132 1.1 mrg attribute_hidden int 133 1.1 mrg gomp_get_num_devices (void) 134 1.1 mrg { 135 1.1 mrg gomp_init_targets_once (); 136 1.1 mrg return num_devices_openmp; 137 1.1 mrg } 138 1.1 mrg 139 1.1 mrg static struct gomp_device_descr * 140 1.1.1.11 mrg resolve_device (int device_id, bool remapped) 141 1.1 mrg { 142 1.1.1.11 mrg /* Get number of devices and thus ensure that 'gomp_init_targets_once' was 143 1.1.1.11 mrg called, which must be done before using default_device_var. */ 144 1.1.1.11 mrg int num_devices = gomp_get_num_devices (); 145 1.1.1.11 mrg 146 1.1.1.11 mrg if (remapped && device_id == GOMP_DEVICE_ICV) 147 1.1 mrg { 148 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false); 149 1.1 mrg device_id = icv->default_device_var; 150 1.1.1.11 mrg remapped = false; 151 1.1 mrg } 152 1.1 mrg 153 1.1.1.11 mrg if (device_id < 0) 154 1.1.1.11 mrg { 155 1.1.1.11 mrg if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK 156 1.1.1.11 mrg : omp_initial_device)) 157 1.1.1.11 mrg return NULL; 158 1.1.1.11 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 159 1.1.1.11 mrg && num_devices == 0) 160 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 161 1.1.1.11 mrg "but only the host device is available"); 162 1.1.1.11 mrg else if (device_id == omp_invalid_device) 163 1.1.1.11 mrg gomp_fatal ("omp_invalid_device encountered"); 164 1.1.1.11 mrg else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) 165 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 166 1.1.1.11 mrg "but device not found"); 167 1.1.1.11 mrg 168 1.1.1.11 mrg return NULL; 169 1.1.1.11 mrg } 170 1.1.1.11 mrg else if (device_id >= num_devices) 171 1.1.1.10 mrg { 172 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 173 1.1.1.11 mrg && device_id != num_devices) 174 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 175 1.1.1.10 mrg "but device not found"); 176 1.1.1.10 mrg 177 1.1.1.10 mrg return NULL; 178 1.1.1.10 mrg } 179 1.1 mrg 180 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock); 181 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) 182 1.1.1.2 mrg gomp_init_device (&devices[device_id]); 183 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) 184 1.1.1.2 mrg { 185 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock); 186 1.1.1.10 mrg 187 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) 188 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 189 1.1.1.10 mrg "but device is finalized"); 190 1.1.1.10 mrg 191 1.1.1.2 mrg return NULL; 192 1.1.1.2 mrg } 193 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock); 194 1.1.1.2 mrg 195 1.1 mrg return &devices[device_id]; 196 1.1 mrg } 197 1.1 mrg 198 1.1 mrg 199 1.1.1.2 mrg static inline splay_tree_key 200 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key) 201 1.1.1.2 mrg { 202 1.1.1.2 mrg if (key->host_start != key->host_end) 203 1.1.1.2 mrg return splay_tree_lookup (mem_map, key); 204 1.1.1.2 mrg 205 1.1.1.2 mrg key->host_end++; 206 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key); 207 1.1.1.2 mrg key->host_end--; 208 1.1.1.2 mrg if (n) 209 1.1.1.2 mrg return n; 210 1.1.1.2 mrg key->host_start--; 211 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key); 212 1.1.1.2 mrg key->host_start++; 213 1.1.1.2 mrg if (n) 214 1.1.1.2 mrg return n; 215 1.1.1.2 mrg return splay_tree_lookup (mem_map, key); 216 1.1.1.2 mrg } 217 1.1.1.2 mrg 218 1.1.1.11 mrg static inline reverse_splay_tree_key 219 1.1.1.11 mrg gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key) 220 1.1.1.11 mrg { 221 1.1.1.11 mrg return reverse_splay_tree_lookup (mem_map_rev, key); 222 1.1.1.11 mrg } 223 1.1.1.11 mrg 224 1.1.1.2 mrg static inline splay_tree_key 225 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) 226 1.1.1.2 mrg { 227 1.1.1.2 mrg if (key->host_start != key->host_end) 228 1.1.1.2 mrg return splay_tree_lookup (mem_map, key); 229 1.1.1.2 mrg 230 1.1.1.2 mrg key->host_end++; 231 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key); 232 1.1.1.2 mrg key->host_end--; 233 1.1.1.2 mrg return n; 234 1.1.1.2 mrg } 235 1.1.1.2 mrg 236 1.1.1.3 mrg static inline void 237 1.1.1.3 mrg gomp_device_copy (struct gomp_device_descr *devicep, 238 1.1.1.3 mrg bool (*copy_func) (int, void *, const void *, size_t), 239 1.1.1.3 mrg const char *dst, void *dstaddr, 240 1.1.1.3 mrg const char *src, const void *srcaddr, 241 1.1.1.3 mrg size_t size) 242 1.1.1.3 mrg { 243 1.1.1.3 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) 244 1.1.1.3 mrg { 245 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock); 246 1.1.1.3 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 247 1.1.1.3 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 248 1.1.1.3 mrg } 249 1.1.1.3 mrg } 250 1.1.1.3 mrg 251 1.1.1.8 mrg static inline void 252 1.1.1.8 mrg goacc_device_copy_async (struct gomp_device_descr *devicep, 253 1.1.1.8 mrg bool (*copy_func) (int, void *, const void *, size_t, 254 1.1.1.8 mrg struct goacc_asyncqueue *), 255 1.1.1.8 mrg const char *dst, void *dstaddr, 256 1.1.1.8 mrg const char *src, const void *srcaddr, 257 1.1.1.10 mrg const void *srcaddr_orig, 258 1.1.1.8 mrg size_t size, struct goacc_asyncqueue *aq) 259 1.1.1.8 mrg { 260 1.1.1.8 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) 261 1.1.1.8 mrg { 262 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 263 1.1.1.10 mrg if (srcaddr_orig && srcaddr_orig != srcaddr) 264 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)" 265 1.1.1.10 mrg " via buffer %s object [%p..%p)" 266 1.1.1.10 mrg " to %s object [%p..%p) failed", 267 1.1.1.10 mrg src, srcaddr_orig, srcaddr_orig + size, 268 1.1.1.10 mrg src, srcaddr, srcaddr + size, 269 1.1.1.10 mrg dst, dstaddr, dstaddr + size); 270 1.1.1.10 mrg else 271 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)" 272 1.1.1.10 mrg " to %s object [%p..%p) failed", 273 1.1.1.10 mrg src, srcaddr, srcaddr + size, 274 1.1.1.10 mrg dst, dstaddr, dstaddr + size); 275 1.1.1.8 mrg } 276 1.1.1.8 mrg } 277 1.1.1.8 mrg 278 1.1.1.6 mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) 279 1.1.1.6 mrg host to device memory transfers. */ 280 1.1.1.6 mrg 281 1.1.1.7 mrg struct gomp_coalesce_chunk 282 1.1.1.7 mrg { 283 1.1.1.7 mrg /* The starting and ending point of a coalesced chunk of memory. */ 284 1.1.1.7 mrg size_t start, end; 285 1.1.1.7 mrg }; 286 1.1.1.7 mrg 287 1.1.1.6 mrg struct gomp_coalesce_buf 288 1.1.1.6 mrg { 289 1.1.1.6 mrg /* Buffer into which gomp_copy_host2dev will memcpy data and from which 290 1.1.1.6 mrg it will be copied to the device. */ 291 1.1.1.6 mrg void *buf; 292 1.1.1.6 mrg struct target_mem_desc *tgt; 293 1.1.1.7 mrg /* Array with offsets, chunks[i].start is the starting offset and 294 1.1.1.7 mrg chunks[i].end ending offset relative to tgt->tgt_start device address 295 1.1.1.6 mrg of chunks which are to be copied to buf and later copied to device. */ 296 1.1.1.7 mrg struct gomp_coalesce_chunk *chunks; 297 1.1.1.6 mrg /* Number of chunks in chunks array, or -1 if coalesce buffering should not 298 1.1.1.6 mrg be performed. */ 299 1.1.1.6 mrg long chunk_cnt; 300 1.1.1.6 mrg /* During construction of chunks array, how many memory regions are within 301 1.1.1.6 mrg the last chunk. If there is just one memory region for a chunk, we copy 302 1.1.1.6 mrg it directly to device rather than going through buf. */ 303 1.1.1.6 mrg long use_cnt; 304 1.1.1.6 mrg }; 305 1.1.1.6 mrg 306 1.1.1.6 mrg /* Maximum size of memory region considered for coalescing. Larger copies 307 1.1.1.6 mrg are performed directly. */ 308 1.1.1.6 mrg #define MAX_COALESCE_BUF_SIZE (32 * 1024) 309 1.1.1.6 mrg 310 1.1.1.6 mrg /* Maximum size of a gap in between regions to consider them being copied 311 1.1.1.6 mrg within the same chunk. All the device offsets considered are within 312 1.1.1.6 mrg newly allocated device memory, so it isn't fatal if we copy some padding 313 1.1.1.6 mrg in between from host to device. The gaps come either from alignment 314 1.1.1.6 mrg padding or from memory regions which are not supposed to be copied from 315 1.1.1.6 mrg host to device (e.g. map(alloc:), map(from:) etc.). */ 316 1.1.1.6 mrg #define MAX_COALESCE_BUF_GAP (4 * 1024) 317 1.1.1.6 mrg 318 1.1.1.10 mrg /* Add region with device tgt_start relative offset and length to CBUF. 319 1.1.1.10 mrg 320 1.1.1.10 mrg This must not be used for asynchronous copies, because the host data might 321 1.1.1.10 mrg not be computed yet (by an earlier asynchronous compute region, for 322 1.1.1.11 mrg example). The exception is for EPHEMERAL data, that we know is available 323 1.1.1.11 mrg already "by construction". */ 324 1.1.1.6 mrg 325 1.1.1.6 mrg static inline void 326 1.1.1.6 mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) 327 1.1.1.6 mrg { 328 1.1.1.6 mrg if (len > MAX_COALESCE_BUF_SIZE || len == 0) 329 1.1.1.6 mrg return; 330 1.1.1.6 mrg if (cbuf->chunk_cnt) 331 1.1.1.6 mrg { 332 1.1.1.6 mrg if (cbuf->chunk_cnt < 0) 333 1.1.1.6 mrg return; 334 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) 335 1.1.1.6 mrg { 336 1.1.1.6 mrg cbuf->chunk_cnt = -1; 337 1.1.1.6 mrg return; 338 1.1.1.6 mrg } 339 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) 340 1.1.1.6 mrg { 341 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; 342 1.1.1.6 mrg cbuf->use_cnt++; 343 1.1.1.6 mrg return; 344 1.1.1.6 mrg } 345 1.1.1.6 mrg /* If the last chunk is only used by one mapping, discard it, 346 1.1.1.6 mrg as it will be one host to device copy anyway and 347 1.1.1.6 mrg memcpying it around will only waste cycles. */ 348 1.1.1.6 mrg if (cbuf->use_cnt == 1) 349 1.1.1.6 mrg cbuf->chunk_cnt--; 350 1.1.1.6 mrg } 351 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].start = start; 352 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].end = start + len; 353 1.1.1.6 mrg cbuf->chunk_cnt++; 354 1.1.1.6 mrg cbuf->use_cnt = 1; 355 1.1.1.6 mrg } 356 1.1.1.6 mrg 357 1.1.1.6 mrg /* Return true for mapping kinds which need to copy data from the 358 1.1.1.6 mrg host to device for regions that weren't previously mapped. */ 359 1.1.1.6 mrg 360 1.1.1.6 mrg static inline bool 361 1.1.1.6 mrg gomp_to_device_kind_p (int kind) 362 1.1.1.6 mrg { 363 1.1.1.6 mrg switch (kind) 364 1.1.1.6 mrg { 365 1.1.1.6 mrg case GOMP_MAP_ALLOC: 366 1.1.1.6 mrg case GOMP_MAP_FROM: 367 1.1.1.6 mrg case GOMP_MAP_FORCE_ALLOC: 368 1.1.1.8 mrg case GOMP_MAP_FORCE_FROM: 369 1.1.1.6 mrg case GOMP_MAP_ALWAYS_FROM: 370 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM: 371 1.1.1.11 mrg case GOMP_MAP_FORCE_PRESENT: 372 1.1.1.6 mrg return false; 373 1.1.1.6 mrg default: 374 1.1.1.6 mrg return true; 375 1.1.1.6 mrg } 376 1.1.1.6 mrg } 377 1.1.1.6 mrg 378 1.1.1.10 mrg /* Copy host memory to an offload device. In asynchronous mode (if AQ is 379 1.1.1.10 mrg non-NULL), when the source data is stack or may otherwise be deallocated 380 1.1.1.10 mrg before the asynchronous copy takes place, EPHEMERAL must be passed as 381 1.1.1.10 mrg TRUE. */ 382 1.1.1.10 mrg 383 1.1.1.8 mrg attribute_hidden void 384 1.1.1.3 mrg gomp_copy_host2dev (struct gomp_device_descr *devicep, 385 1.1.1.8 mrg struct goacc_asyncqueue *aq, 386 1.1.1.6 mrg void *d, const void *h, size_t sz, 387 1.1.1.10 mrg bool ephemeral, struct gomp_coalesce_buf *cbuf) 388 1.1.1.3 mrg { 389 1.1.1.6 mrg if (cbuf) 390 1.1.1.6 mrg { 391 1.1.1.6 mrg uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; 392 1.1.1.7 mrg if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) 393 1.1.1.6 mrg { 394 1.1.1.6 mrg long first = 0; 395 1.1.1.6 mrg long last = cbuf->chunk_cnt - 1; 396 1.1.1.6 mrg while (first <= last) 397 1.1.1.6 mrg { 398 1.1.1.6 mrg long middle = (first + last) >> 1; 399 1.1.1.7 mrg if (cbuf->chunks[middle].end <= doff) 400 1.1.1.6 mrg first = middle + 1; 401 1.1.1.7 mrg else if (cbuf->chunks[middle].start <= doff) 402 1.1.1.6 mrg { 403 1.1.1.7 mrg if (doff + sz > cbuf->chunks[middle].end) 404 1.1.1.10 mrg { 405 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock); 406 1.1.1.10 mrg gomp_fatal ("internal libgomp cbuf error"); 407 1.1.1.10 mrg } 408 1.1.1.11 mrg 409 1.1.1.11 mrg /* In an asynchronous context, verify that CBUF isn't used 410 1.1.1.11 mrg with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */ 411 1.1.1.11 mrg if (__builtin_expect (aq != NULL, 0)) 412 1.1.1.11 mrg assert (ephemeral); 413 1.1.1.11 mrg 414 1.1.1.7 mrg memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), 415 1.1.1.6 mrg h, sz); 416 1.1.1.6 mrg return; 417 1.1.1.6 mrg } 418 1.1.1.6 mrg else 419 1.1.1.6 mrg last = middle - 1; 420 1.1.1.6 mrg } 421 1.1.1.6 mrg } 422 1.1.1.6 mrg } 423 1.1.1.10 mrg 424 1.1.1.11 mrg if (__builtin_expect (aq != NULL, 0)) 425 1.1.1.11 mrg { 426 1.1.1.11 mrg void *h_buf = (void *) h; 427 1.1.1.11 mrg if (ephemeral) 428 1.1.1.11 mrg { 429 1.1.1.11 mrg /* We're queueing up an asynchronous copy from data that may 430 1.1.1.11 mrg disappear before the transfer takes place (i.e. because it is a 431 1.1.1.11 mrg stack local in a function that is no longer executing). As we've 432 1.1.1.11 mrg not been able to use CBUF, make a copy of the data into a 433 1.1.1.11 mrg temporary buffer. */ 434 1.1.1.11 mrg h_buf = gomp_malloc (sz); 435 1.1.1.11 mrg memcpy (h_buf, h, sz); 436 1.1.1.11 mrg } 437 1.1.1.11 mrg goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, 438 1.1.1.11 mrg "dev", d, "host", h_buf, h, sz, aq); 439 1.1.1.11 mrg if (ephemeral) 440 1.1.1.11 mrg /* Free once the transfer has completed. */ 441 1.1.1.11 mrg devicep->openacc.async.queue_callback_func (aq, free, h_buf); 442 1.1.1.11 mrg } 443 1.1.1.11 mrg else 444 1.1.1.11 mrg gomp_device_copy (devicep, devicep->host2dev_func, 445 1.1.1.11 mrg "dev", d, "host", h, sz); 446 1.1.1.3 mrg } 447 1.1.1.3 mrg 448 1.1.1.8 mrg attribute_hidden void 449 1.1.1.3 mrg gomp_copy_dev2host (struct gomp_device_descr *devicep, 450 1.1.1.8 mrg struct goacc_asyncqueue *aq, 451 1.1.1.3 mrg void *h, const void *d, size_t sz) 452 1.1.1.3 mrg { 453 1.1.1.8 mrg if (__builtin_expect (aq != NULL, 0)) 454 1.1.1.8 mrg goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, 455 1.1.1.10 mrg "host", h, "dev", d, NULL, sz, aq); 456 1.1.1.8 mrg else 457 1.1.1.8 mrg gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); 458 1.1.1.3 mrg } 459 1.1.1.3 mrg 460 1.1.1.3 mrg static void 461 1.1.1.3 mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) 462 1.1.1.3 mrg { 463 1.1.1.3 mrg if (!devicep->free_func (devicep->target_id, devptr)) 464 1.1.1.3 mrg { 465 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock); 466 1.1.1.3 mrg gomp_fatal ("error in freeing device memory block at %p", devptr); 467 1.1.1.3 mrg } 468 1.1.1.3 mrg } 469 1.1.1.3 mrg 470 1.1.1.10 mrg /* Increment reference count of a splay_tree_key region K by 1. 471 1.1.1.10 mrg If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only 472 1.1.1.10 mrg increment the value if refcount is not yet contained in the set (used for 473 1.1.1.10 mrg OpenMP 5.0, which specifies that a region's refcount is adjusted at most 474 1.1.1.10 mrg once for each construct). */ 475 1.1.1.10 mrg 476 1.1.1.10 mrg static inline void 477 1.1.1.10 mrg gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) 478 1.1.1.10 mrg { 479 1.1.1.11 mrg if (k == NULL 480 1.1.1.11 mrg || k->refcount == REFCOUNT_INFINITY 481 1.1.1.11 mrg || k->refcount == REFCOUNT_ACC_MAP_DATA) 482 1.1.1.10 mrg return; 483 1.1.1.10 mrg 484 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount; 485 1.1.1.10 mrg 486 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) 487 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount; 488 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount)) 489 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr; 490 1.1.1.10 mrg 491 1.1.1.10 mrg if (refcount_set) 492 1.1.1.10 mrg { 493 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr)) 494 1.1.1.10 mrg return; 495 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); 496 1.1.1.10 mrg *slot = refcount_ptr; 497 1.1.1.10 mrg } 498 1.1.1.10 mrg 499 1.1.1.10 mrg *refcount_ptr += 1; 500 1.1.1.10 mrg return; 501 1.1.1.10 mrg } 502 1.1.1.10 mrg 503 1.1.1.10 mrg /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P 504 1.1.1.10 mrg is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to 505 1.1.1.10 mrg track already seen refcounts, and only adjust the value if refcount is not 506 1.1.1.10 mrg yet contained in the set (like gomp_increment_refcount). 507 1.1.1.10 mrg 508 1.1.1.10 mrg Return out-values: set *DO_COPY to true if we set the refcount to zero, or 509 1.1.1.10 mrg it is already zero and we know we decremented it earlier. This signals that 510 1.1.1.10 mrg associated maps should be copied back to host. 511 1.1.1.10 mrg 512 1.1.1.10 mrg *DO_REMOVE is set to true when we this is the first handling of this refcount 513 1.1.1.10 mrg and we are setting it to zero. This signals a removal of this key from the 514 1.1.1.10 mrg splay-tree map. 515 1.1.1.10 mrg 516 1.1.1.10 mrg Copy and removal are separated due to cases like handling of structure 517 1.1.1.10 mrg elements, e.g. each map of a structure element representing a possible copy 518 1.1.1.10 mrg out of a structure field has to be handled individually, but we only signal 519 1.1.1.10 mrg removal for one (the first encountered) sibing map. */ 520 1.1.1.10 mrg 521 1.1.1.10 mrg static inline void 522 1.1.1.10 mrg gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, 523 1.1.1.10 mrg bool *do_copy, bool *do_remove) 524 1.1.1.10 mrg { 525 1.1.1.11 mrg if (k == NULL 526 1.1.1.11 mrg || k->refcount == REFCOUNT_INFINITY 527 1.1.1.11 mrg || k->refcount == REFCOUNT_ACC_MAP_DATA) 528 1.1.1.10 mrg { 529 1.1.1.10 mrg *do_copy = *do_remove = false; 530 1.1.1.10 mrg return; 531 1.1.1.10 mrg } 532 1.1.1.10 mrg 533 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount; 534 1.1.1.10 mrg 535 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) 536 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount; 537 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount)) 538 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr; 539 1.1.1.10 mrg 540 1.1.1.10 mrg bool new_encountered_refcount; 541 1.1.1.10 mrg bool set_to_zero = false; 542 1.1.1.10 mrg bool is_zero = false; 543 1.1.1.10 mrg 544 1.1.1.10 mrg uintptr_t orig_refcount = *refcount_ptr; 545 1.1.1.10 mrg 546 1.1.1.10 mrg if (refcount_set) 547 1.1.1.10 mrg { 548 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr)) 549 1.1.1.10 mrg { 550 1.1.1.10 mrg new_encountered_refcount = false; 551 1.1.1.10 mrg goto end; 552 1.1.1.10 mrg } 553 1.1.1.10 mrg 554 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); 555 1.1.1.10 mrg *slot = refcount_ptr; 556 1.1.1.10 mrg new_encountered_refcount = true; 557 1.1.1.10 mrg } 558 1.1.1.10 mrg else 559 1.1.1.10 mrg /* If no refcount_set being used, assume all keys are being decremented 560 1.1.1.10 mrg for the first time. */ 561 1.1.1.10 mrg new_encountered_refcount = true; 562 1.1.1.10 mrg 563 1.1.1.10 mrg if (delete_p) 564 1.1.1.10 mrg *refcount_ptr = 0; 565 1.1.1.10 mrg else if (*refcount_ptr > 0) 566 1.1.1.10 mrg *refcount_ptr -= 1; 567 1.1.1.10 mrg 568 1.1.1.10 mrg end: 569 1.1.1.10 mrg if (*refcount_ptr == 0) 570 1.1.1.10 mrg { 571 1.1.1.10 mrg if (orig_refcount > 0) 572 1.1.1.10 mrg set_to_zero = true; 573 1.1.1.10 mrg 574 1.1.1.10 mrg is_zero = true; 575 1.1.1.10 mrg } 576 1.1.1.10 mrg 577 1.1.1.10 mrg *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); 578 1.1.1.10 mrg *do_remove = (new_encountered_refcount && set_to_zero); 579 1.1.1.10 mrg } 580 1.1.1.10 mrg 581 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or 582 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn. 583 1.1 mrg Helper function of gomp_map_vars. */ 584 1.1 mrg 585 1.1 mrg static inline void 586 1.1.1.8 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep, 587 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key oldn, 588 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var, 589 1.1.1.10 mrg unsigned char kind, bool always_to_flag, bool implicit, 590 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf, 591 1.1.1.10 mrg htab_t *refcount_set) 592 1.1 mrg { 593 1.1.1.10 mrg assert (kind != GOMP_MAP_ATTACH 594 1.1.1.10 mrg || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); 595 1.1.1.8 mrg 596 1.1.1.2 mrg tgt_var->key = oldn; 597 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); 598 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); 599 1.1.1.8 mrg tgt_var->is_attach = false; 600 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start; 601 1.1.1.10 mrg 602 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */ 603 1.1.1.10 mrg bool implicit_subset = (implicit 604 1.1.1.10 mrg && newn->host_start <= oldn->host_start 605 1.1.1.10 mrg && oldn->host_end <= newn->host_end); 606 1.1.1.10 mrg if (implicit_subset) 607 1.1.1.10 mrg tgt_var->length = oldn->host_end - oldn->host_start; 608 1.1.1.10 mrg else 609 1.1.1.10 mrg tgt_var->length = newn->host_end - newn->host_start; 610 1.1.1.2 mrg 611 1.1.1.11 mrg if (GOMP_MAP_FORCE_P (kind) 612 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */ 613 1.1.1.10 mrg || !(implicit_subset 614 1.1.1.10 mrg /* Otherwise, new contained inside old is considered valid. */ 615 1.1.1.10 mrg || (oldn->host_start <= newn->host_start 616 1.1.1.10 mrg && newn->host_end <= oldn->host_end))) 617 1.1 mrg { 618 1.1 mrg gomp_mutex_unlock (&devicep->lock); 619 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when " 620 1.1 mrg "[%p..%p) is already mapped", 621 1.1 mrg (void *) newn->host_start, (void *) newn->host_end, 622 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end); 623 1.1 mrg } 624 1.1.1.2 mrg 625 1.1.1.10 mrg if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag) 626 1.1.1.10 mrg { 627 1.1.1.10 mrg /* Implicit + always should not happen. If this does occur, below 628 1.1.1.10 mrg address/length adjustment is a TODO. */ 629 1.1.1.10 mrg assert (!implicit_subset); 630 1.1.1.10 mrg 631 1.1.1.10 mrg if (oldn->aux && oldn->aux->attach_count) 632 1.1.1.10 mrg { 633 1.1.1.10 mrg /* We have to be careful not to overwrite still attached pointers 634 1.1.1.10 mrg during the copyback to host. */ 635 1.1.1.10 mrg uintptr_t addr = newn->host_start; 636 1.1.1.10 mrg while (addr < newn->host_end) 637 1.1.1.10 mrg { 638 1.1.1.10 mrg size_t i = (addr - oldn->host_start) / sizeof (void *); 639 1.1.1.10 mrg if (oldn->aux->attach_count[i] == 0) 640 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq, 641 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start 642 1.1.1.10 mrg + oldn->tgt_offset 643 1.1.1.10 mrg + addr - oldn->host_start), 644 1.1.1.10 mrg (void *) addr, 645 1.1.1.10 mrg sizeof (void *), false, cbuf); 646 1.1.1.10 mrg addr += sizeof (void *); 647 1.1.1.10 mrg } 648 1.1.1.10 mrg } 649 1.1.1.10 mrg else 650 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq, 651 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start + oldn->tgt_offset 652 1.1.1.10 mrg + newn->host_start - oldn->host_start), 653 1.1.1.10 mrg (void *) newn->host_start, 654 1.1.1.10 mrg newn->host_end - newn->host_start, false, cbuf); 655 1.1.1.10 mrg } 656 1.1.1.3 mrg 657 1.1.1.10 mrg gomp_increment_refcount (oldn, refcount_set); 658 1.1 mrg } 659 1.1 mrg 660 1.1 mrg static int 661 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx) 662 1.1.1.2 mrg { 663 1.1.1.10 mrg if (!short_mapkind) 664 1.1.1.10 mrg return ((unsigned char *) kinds)[idx]; 665 1.1.1.10 mrg 666 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx]; 667 1.1.1.10 mrg if (GOMP_MAP_IMPLICIT_P (val)) 668 1.1.1.10 mrg val &= ~GOMP_MAP_IMPLICIT; 669 1.1.1.10 mrg return val; 670 1.1.1.10 mrg } 671 1.1.1.10 mrg 672 1.1.1.10 mrg 673 1.1.1.10 mrg static bool 674 1.1.1.10 mrg get_implicit (bool short_mapkind, void *kinds, int idx) 675 1.1.1.10 mrg { 676 1.1.1.10 mrg if (!short_mapkind) 677 1.1.1.10 mrg return false; 678 1.1.1.10 mrg 679 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx]; 680 1.1.1.10 mrg return GOMP_MAP_IMPLICIT_P (val); 681 1.1.1.2 mrg } 682 1.1.1.2 mrg 683 1.1.1.2 mrg static void 684 1.1.1.8 mrg gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, 685 1.1.1.8 mrg uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, 686 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf, 687 1.1.1.10 mrg bool allow_zero_length_array_sections) 688 1.1.1.2 mrg { 689 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr; 690 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 691 1.1.1.2 mrg struct splay_tree_key_s cur_node; 692 1.1.1.2 mrg 693 1.1.1.2 mrg cur_node.host_start = host_ptr; 694 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL) 695 1.1.1.2 mrg { 696 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL; 697 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 698 1.1.1.3 mrg (void *) (tgt->tgt_start + target_offset), 699 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *), 700 1.1.1.10 mrg true, cbuf); 701 1.1.1.2 mrg return; 702 1.1.1.2 mrg } 703 1.1.1.2 mrg /* Add bias to the pointer value. */ 704 1.1.1.2 mrg cur_node.host_start += bias; 705 1.1.1.2 mrg cur_node.host_end = cur_node.host_start; 706 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 707 1.1.1.2 mrg if (n == NULL) 708 1.1.1.2 mrg { 709 1.1.1.10 mrg if (allow_zero_length_array_sections) 710 1.1.1.11 mrg cur_node.tgt_offset = cur_node.host_start; 711 1.1.1.10 mrg else 712 1.1.1.10 mrg { 713 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock); 714 1.1.1.10 mrg gomp_fatal ("Pointer target of array section wasn't mapped"); 715 1.1.1.10 mrg } 716 1.1.1.10 mrg } 717 1.1.1.10 mrg else 718 1.1.1.10 mrg { 719 1.1.1.10 mrg cur_node.host_start -= n->host_start; 720 1.1.1.10 mrg cur_node.tgt_offset 721 1.1.1.10 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; 722 1.1.1.10 mrg /* At this point tgt_offset is target address of the 723 1.1.1.10 mrg array section. Now subtract bias to get what we want 724 1.1.1.10 mrg to initialize the pointer with. */ 725 1.1.1.10 mrg cur_node.tgt_offset -= bias; 726 1.1.1.2 mrg } 727 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), 728 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *), 729 1.1.1.10 mrg true, cbuf); 730 1.1.1.2 mrg } 731 1.1.1.2 mrg 732 1.1.1.2 mrg static void 733 1.1.1.8 mrg gomp_map_fields_existing (struct target_mem_desc *tgt, 734 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n, 735 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs, 736 1.1.1.6 mrg size_t *sizes, void *kinds, 737 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) 738 1.1 mrg { 739 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr; 740 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 741 1.1.1.2 mrg struct splay_tree_key_s cur_node; 742 1.1.1.2 mrg int kind; 743 1.1.1.10 mrg bool implicit; 744 1.1.1.2 mrg const bool short_mapkind = true; 745 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7; 746 1.1.1.2 mrg 747 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 748 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i]; 749 1.1.1.11 mrg splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node); 750 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i); 751 1.1.1.10 mrg implicit = get_implicit (short_mapkind, kinds, i); 752 1.1.1.2 mrg if (n2 753 1.1.1.2 mrg && n2->tgt == n->tgt 754 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 755 1.1.1.2 mrg { 756 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 757 1.1.1.10 mrg kind & typemask, false, implicit, cbuf, 758 1.1.1.10 mrg refcount_set); 759 1.1.1.2 mrg return; 760 1.1.1.2 mrg } 761 1.1.1.2 mrg if (sizes[i] == 0) 762 1.1.1.2 mrg { 763 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) 764 1.1.1.2 mrg { 765 1.1.1.2 mrg cur_node.host_start--; 766 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node); 767 1.1.1.2 mrg cur_node.host_start++; 768 1.1.1.2 mrg if (n2 769 1.1.1.2 mrg && n2->tgt == n->tgt 770 1.1.1.2 mrg && n2->host_start - n->host_start 771 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset) 772 1.1.1.2 mrg { 773 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 774 1.1.1.10 mrg kind & typemask, false, implicit, cbuf, 775 1.1.1.10 mrg refcount_set); 776 1.1.1.2 mrg return; 777 1.1.1.2 mrg } 778 1.1.1.2 mrg } 779 1.1.1.2 mrg cur_node.host_end++; 780 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node); 781 1.1.1.2 mrg cur_node.host_end--; 782 1.1.1.2 mrg if (n2 783 1.1.1.2 mrg && n2->tgt == n->tgt 784 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 785 1.1.1.2 mrg { 786 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 787 1.1.1.10 mrg kind & typemask, false, implicit, cbuf, 788 1.1.1.10 mrg refcount_set); 789 1.1.1.2 mrg return; 790 1.1.1.2 mrg } 791 1.1.1.2 mrg } 792 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 793 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when " 794 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped " 795 1.1.1.2 mrg "together with it", (void *) cur_node.host_start, 796 1.1.1.2 mrg (void *) cur_node.host_end); 797 1.1.1.2 mrg } 798 1.1.1.2 mrg 799 1.1.1.8 mrg attribute_hidden void 800 1.1.1.8 mrg gomp_attach_pointer (struct gomp_device_descr *devicep, 801 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree mem_map, 802 1.1.1.8 mrg splay_tree_key n, uintptr_t attach_to, size_t bias, 803 1.1.1.10 mrg struct gomp_coalesce_buf *cbufp, 804 1.1.1.10 mrg bool allow_zero_length_array_sections) 805 1.1.1.8 mrg { 806 1.1.1.8 mrg struct splay_tree_key_s s; 807 1.1.1.8 mrg size_t size, idx; 808 1.1.1.8 mrg 809 1.1.1.8 mrg if (n == NULL) 810 1.1.1.8 mrg { 811 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 812 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for attach"); 813 1.1.1.8 mrg } 814 1.1.1.8 mrg 815 1.1.1.8 mrg size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); 816 1.1.1.8 mrg /* We might have a pointer in a packed struct: however we cannot have more 817 1.1.1.8 mrg than one such pointer in each pointer-sized portion of the struct, so 818 1.1.1.8 mrg this is safe. */ 819 1.1.1.8 mrg idx = (attach_to - n->host_start) / sizeof (void *); 820 1.1.1.8 mrg 821 1.1.1.8 mrg if (!n->aux) 822 1.1.1.8 mrg n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 823 1.1.1.8 mrg 824 1.1.1.8 mrg if (!n->aux->attach_count) 825 1.1.1.8 mrg n->aux->attach_count 826 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); 827 1.1.1.8 mrg 828 1.1.1.8 mrg if (n->aux->attach_count[idx] < UINTPTR_MAX) 829 1.1.1.8 mrg n->aux->attach_count[idx]++; 830 1.1.1.8 mrg else 831 1.1.1.8 mrg { 832 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 833 1.1.1.8 mrg gomp_fatal ("attach count overflow"); 834 1.1.1.8 mrg } 835 1.1.1.8 mrg 836 1.1.1.8 mrg if (n->aux->attach_count[idx] == 1) 837 1.1.1.8 mrg { 838 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to 839 1.1.1.8 mrg - n->host_start; 840 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) attach_to; 841 1.1.1.8 mrg splay_tree_key tn; 842 1.1.1.8 mrg uintptr_t data; 843 1.1.1.8 mrg 844 1.1.1.8 mrg if ((void *) target == NULL) 845 1.1.1.8 mrg { 846 1.1.1.11 mrg /* As a special case, allow attaching NULL host pointers. This 847 1.1.1.11 mrg allows e.g. unassociated Fortran pointers to be mapped 848 1.1.1.11 mrg properly. */ 849 1.1.1.11 mrg data = 0; 850 1.1.1.11 mrg 851 1.1.1.11 mrg gomp_debug (1, 852 1.1.1.11 mrg "%s: attaching NULL host pointer, target %p " 853 1.1.1.11 mrg "(struct base %p)\n", __FUNCTION__, (void *) devptr, 854 1.1.1.11 mrg (void *) (n->tgt->tgt_start + n->tgt_offset)); 855 1.1.1.11 mrg 856 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, 857 1.1.1.11 mrg sizeof (void *), true, cbufp); 858 1.1.1.11 mrg 859 1.1.1.11 mrg return; 860 1.1.1.8 mrg } 861 1.1.1.8 mrg 862 1.1.1.8 mrg s.host_start = target + bias; 863 1.1.1.8 mrg s.host_end = s.host_start + 1; 864 1.1.1.8 mrg tn = splay_tree_lookup (mem_map, &s); 865 1.1.1.8 mrg 866 1.1.1.8 mrg if (!tn) 867 1.1.1.8 mrg { 868 1.1.1.10 mrg if (allow_zero_length_array_sections) 869 1.1.1.10 mrg /* When allowing attachment to zero-length array sections, we 870 1.1.1.11 mrg copy the host pointer when the target region is not mapped. */ 871 1.1.1.11 mrg data = target; 872 1.1.1.10 mrg else 873 1.1.1.10 mrg { 874 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock); 875 1.1.1.10 mrg gomp_fatal ("pointer target not mapped for attach"); 876 1.1.1.10 mrg } 877 1.1.1.8 mrg } 878 1.1.1.10 mrg else 879 1.1.1.10 mrg data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; 880 1.1.1.8 mrg 881 1.1.1.8 mrg gomp_debug (1, 882 1.1.1.8 mrg "%s: attaching host %p, target %p (struct base %p) to %p\n", 883 1.1.1.8 mrg __FUNCTION__, (void *) attach_to, (void *) devptr, 884 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); 885 1.1.1.8 mrg 886 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, 887 1.1.1.10 mrg sizeof (void *), true, cbufp); 888 1.1.1.8 mrg } 889 1.1.1.8 mrg else 890 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 891 1.1.1.8 mrg (void *) attach_to, (int) n->aux->attach_count[idx]); 892 1.1.1.8 mrg } 893 1.1.1.8 mrg 894 1.1.1.8 mrg attribute_hidden void 895 1.1.1.8 mrg gomp_detach_pointer (struct gomp_device_descr *devicep, 896 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n, 897 1.1.1.8 mrg uintptr_t detach_from, bool finalize, 898 1.1.1.8 mrg struct gomp_coalesce_buf *cbufp) 899 1.1.1.8 mrg { 900 1.1.1.8 mrg size_t idx; 901 1.1.1.8 mrg 902 1.1.1.8 mrg if (n == NULL) 903 1.1.1.8 mrg { 904 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 905 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for detach"); 906 1.1.1.8 mrg } 907 1.1.1.8 mrg 908 1.1.1.8 mrg idx = (detach_from - n->host_start) / sizeof (void *); 909 1.1.1.8 mrg 910 1.1.1.8 mrg if (!n->aux || !n->aux->attach_count) 911 1.1.1.8 mrg { 912 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 913 1.1.1.8 mrg gomp_fatal ("no attachment counters for struct"); 914 1.1.1.8 mrg } 915 1.1.1.8 mrg 916 1.1.1.8 mrg if (finalize) 917 1.1.1.8 mrg n->aux->attach_count[idx] = 1; 918 1.1.1.8 mrg 919 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0) 920 1.1.1.8 mrg { 921 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 922 1.1.1.8 mrg gomp_fatal ("attach count underflow"); 923 1.1.1.8 mrg } 924 1.1.1.8 mrg else 925 1.1.1.8 mrg n->aux->attach_count[idx]--; 926 1.1.1.8 mrg 927 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0) 928 1.1.1.8 mrg { 929 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from 930 1.1.1.8 mrg - n->host_start; 931 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) detach_from; 932 1.1.1.8 mrg 933 1.1.1.8 mrg gomp_debug (1, 934 1.1.1.8 mrg "%s: detaching host %p, target %p (struct base %p) to %p\n", 935 1.1.1.8 mrg __FUNCTION__, (void *) detach_from, (void *) devptr, 936 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset), 937 1.1.1.8 mrg (void *) target); 938 1.1.1.8 mrg 939 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, 940 1.1.1.10 mrg sizeof (void *), true, cbufp); 941 1.1.1.8 mrg } 942 1.1.1.8 mrg else 943 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 944 1.1.1.8 mrg (void *) detach_from, (int) n->aux->attach_count[idx]); 945 1.1.1.8 mrg } 946 1.1.1.8 mrg 947 1.1.1.8 mrg attribute_hidden uintptr_t 948 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) 949 1.1.1.2 mrg { 950 1.1.1.2 mrg if (tgt->list[i].key != NULL) 951 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start 952 1.1.1.2 mrg + tgt->list[i].key->tgt_offset 953 1.1.1.2 mrg + tgt->list[i].offset; 954 1.1.1.8 mrg 955 1.1.1.8 mrg switch (tgt->list[i].offset) 956 1.1.1.8 mrg { 957 1.1.1.8 mrg case OFFSET_INLINED: 958 1.1.1.8 mrg return (uintptr_t) hostaddrs[i]; 959 1.1.1.8 mrg 960 1.1.1.8 mrg case OFFSET_POINTER: 961 1.1.1.8 mrg return 0; 962 1.1.1.8 mrg 963 1.1.1.8 mrg case OFFSET_STRUCT: 964 1.1.1.8 mrg return tgt->list[i + 1].key->tgt->tgt_start 965 1.1.1.8 mrg + tgt->list[i + 1].key->tgt_offset 966 1.1.1.8 mrg + tgt->list[i + 1].offset 967 1.1.1.8 mrg + (uintptr_t) hostaddrs[i] 968 1.1.1.8 mrg - (uintptr_t) hostaddrs[i + 1]; 969 1.1.1.8 mrg 970 1.1.1.8 mrg default: 971 1.1.1.8 mrg return tgt->tgt_start + tgt->list[i].offset; 972 1.1.1.8 mrg } 973 1.1 mrg } 974 1.1 mrg 975 1.1.1.8 mrg static inline __attribute__((always_inline)) struct target_mem_desc * 976 1.1.1.8 mrg gomp_map_vars_internal (struct gomp_device_descr *devicep, 977 1.1.1.8 mrg struct goacc_asyncqueue *aq, size_t mapnum, 978 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes, 979 1.1.1.8 mrg void *kinds, bool short_mapkind, 980 1.1.1.10 mrg htab_t *refcount_set, 981 1.1.1.8 mrg enum gomp_map_vars_kind pragma_kind) 982 1.1 mrg { 983 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0; 984 1.1.1.2 mrg bool has_firstprivate = false; 985 1.1.1.10 mrg bool has_always_ptrset = false; 986 1.1.1.10 mrg bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; 987 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3; 988 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7; 989 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 990 1.1 mrg struct splay_tree_key_s cur_node; 991 1.1 mrg struct target_mem_desc *tgt 992 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 993 1.1 mrg tgt->list_count = mapnum; 994 1.1.1.10 mrg tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1; 995 1.1 mrg tgt->device_descr = devicep; 996 1.1.1.8 mrg tgt->prev = NULL; 997 1.1.1.6 mrg struct gomp_coalesce_buf cbuf, *cbufp = NULL; 998 1.1 mrg 999 1.1 mrg if (mapnum == 0) 1000 1.1.1.2 mrg { 1001 1.1.1.2 mrg tgt->tgt_start = 0; 1002 1.1.1.2 mrg tgt->tgt_end = 0; 1003 1.1.1.2 mrg return tgt; 1004 1.1.1.2 mrg } 1005 1.1 mrg 1006 1.1 mrg tgt_align = sizeof (void *); 1007 1.1 mrg tgt_size = 0; 1008 1.1.1.6 mrg cbuf.chunks = NULL; 1009 1.1.1.6 mrg cbuf.chunk_cnt = -1; 1010 1.1.1.6 mrg cbuf.use_cnt = 0; 1011 1.1.1.6 mrg cbuf.buf = NULL; 1012 1.1.1.11 mrg if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET)) 1013 1.1.1.6 mrg { 1014 1.1.1.7 mrg size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); 1015 1.1.1.7 mrg cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); 1016 1.1.1.6 mrg cbuf.chunk_cnt = 0; 1017 1.1.1.6 mrg } 1018 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET) 1019 1.1 mrg { 1020 1.1 mrg size_t align = 4 * sizeof (void *); 1021 1.1 mrg tgt_align = align; 1022 1.1 mrg tgt_size = mapnum * sizeof (void *); 1023 1.1.1.6 mrg cbuf.chunk_cnt = 1; 1024 1.1.1.6 mrg cbuf.use_cnt = 1 + (mapnum > 1); 1025 1.1.1.7 mrg cbuf.chunks[0].start = 0; 1026 1.1.1.7 mrg cbuf.chunks[0].end = tgt_size; 1027 1.1 mrg } 1028 1.1 mrg 1029 1.1 mrg gomp_mutex_lock (&devicep->lock); 1030 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED) 1031 1.1.1.2 mrg { 1032 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 1033 1.1.1.2 mrg free (tgt); 1034 1.1.1.2 mrg return NULL; 1035 1.1.1.2 mrg } 1036 1.1 mrg 1037 1.1 mrg for (i = 0; i < mapnum; i++) 1038 1.1 mrg { 1039 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i); 1040 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i); 1041 1.1.1.2 mrg if (hostaddrs[i] == NULL 1042 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) 1043 1.1.1.2 mrg { 1044 1.1.1.2 mrg tgt->list[i].key = NULL; 1045 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED; 1046 1.1.1.2 mrg continue; 1047 1.1.1.2 mrg } 1048 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR 1049 1.1.1.8 mrg || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1050 1.1.1.2 mrg { 1051 1.1.1.8 mrg tgt->list[i].key = NULL; 1052 1.1.1.8 mrg if (!not_found_cnt) 1053 1.1.1.2 mrg { 1054 1.1.1.8 mrg /* In OpenMP < 5.0 and OpenACC the mapping has to be done 1055 1.1.1.8 mrg on a separate construct prior to using use_device_{addr,ptr}. 1056 1.1.1.8 mrg In OpenMP 5.0, map directives need to be ordered by the 1057 1.1.1.8 mrg middle-end before the use_device_* clauses. If 1058 1.1.1.8 mrg !not_found_cnt, all mappings requested (if any) are already 1059 1.1.1.8 mrg mapped, so use_device_{addr,ptr} can be resolved right away. 1060 1.1.1.8 mrg Otherwise, if not_found_cnt, gomp_map_lookup might fail 1061 1.1.1.8 mrg now but would succeed after performing the mappings in the 1062 1.1.1.8 mrg following loop. We can't defer this always to the second 1063 1.1.1.8 mrg loop, because it is not even invoked when !not_found_cnt 1064 1.1.1.8 mrg after the first loop. */ 1065 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1066 1.1.1.8 mrg cur_node.host_end = cur_node.host_start; 1067 1.1.1.8 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 1068 1.1.1.8 mrg if (n != NULL) 1069 1.1.1.8 mrg { 1070 1.1.1.8 mrg cur_node.host_start -= n->host_start; 1071 1.1.1.8 mrg hostaddrs[i] 1072 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset 1073 1.1.1.8 mrg + cur_node.host_start); 1074 1.1.1.8 mrg } 1075 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 1076 1.1.1.8 mrg { 1077 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 1078 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped"); 1079 1.1.1.8 mrg } 1080 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1081 1.1.1.8 mrg /* If not present, continue using the host address. */ 1082 1.1.1.8 mrg ; 1083 1.1.1.8 mrg else 1084 1.1.1.8 mrg __builtin_unreachable (); 1085 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED; 1086 1.1.1.2 mrg } 1087 1.1.1.8 mrg else 1088 1.1.1.8 mrg tgt->list[i].offset = 0; 1089 1.1.1.2 mrg continue; 1090 1.1.1.2 mrg } 1091 1.1.1.11 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT 1092 1.1.1.11 mrg || (kind & typemask) == GOMP_MAP_STRUCT_UNORD) 1093 1.1.1.2 mrg { 1094 1.1.1.2 mrg size_t first = i + 1; 1095 1.1.1.2 mrg size_t last = i + sizes[i]; 1096 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1097 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last] 1098 1.1.1.2 mrg + sizes[last]; 1099 1.1.1.2 mrg tgt->list[i].key = NULL; 1100 1.1.1.8 mrg tgt->list[i].offset = OFFSET_STRUCT; 1101 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 1102 1.1.1.2 mrg if (n == NULL) 1103 1.1.1.2 mrg { 1104 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift); 1105 1.1.1.2 mrg if (tgt_align < align) 1106 1.1.1.2 mrg tgt_align = align; 1107 1.1.1.6 mrg tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; 1108 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1109 1.1.1.6 mrg tgt_size += cur_node.host_end - cur_node.host_start; 1110 1.1.1.2 mrg not_found_cnt += last - i; 1111 1.1.1.2 mrg for (i = first; i <= last; i++) 1112 1.1.1.6 mrg { 1113 1.1.1.6 mrg tgt->list[i].key = NULL; 1114 1.1.1.10 mrg if (!aq 1115 1.1.1.10 mrg && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) 1116 1.1.1.11 mrg & typemask) 1117 1.1.1.11 mrg && sizes[i] != 0) 1118 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, 1119 1.1.1.6 mrg tgt_size - cur_node.host_end 1120 1.1.1.6 mrg + (uintptr_t) hostaddrs[i], 1121 1.1.1.6 mrg sizes[i]); 1122 1.1.1.6 mrg } 1123 1.1.1.2 mrg i--; 1124 1.1.1.2 mrg continue; 1125 1.1.1.2 mrg } 1126 1.1.1.2 mrg for (i = first; i <= last; i++) 1127 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 1128 1.1.1.10 mrg sizes, kinds, NULL, refcount_set); 1129 1.1.1.2 mrg i--; 1130 1.1.1.2 mrg continue; 1131 1.1.1.2 mrg } 1132 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) 1133 1.1 mrg { 1134 1.1.1.2 mrg tgt->list[i].key = NULL; 1135 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER; 1136 1.1.1.8 mrg has_firstprivate = true; 1137 1.1.1.8 mrg continue; 1138 1.1.1.8 mrg } 1139 1.1.1.10 mrg else if ((kind & typemask) == GOMP_MAP_ATTACH 1140 1.1.1.10 mrg || ((kind & typemask) 1141 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)) 1142 1.1.1.8 mrg { 1143 1.1.1.8 mrg tgt->list[i].key = NULL; 1144 1.1.1.2 mrg has_firstprivate = true; 1145 1.1 mrg continue; 1146 1.1 mrg } 1147 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1148 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask)) 1149 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i]; 1150 1.1 mrg else 1151 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *); 1152 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) 1153 1.1.1.2 mrg { 1154 1.1.1.2 mrg tgt->list[i].key = NULL; 1155 1.1.1.2 mrg 1156 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift); 1157 1.1.1.2 mrg if (tgt_align < align) 1158 1.1.1.2 mrg tgt_align = align; 1159 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1160 1.1.1.10 mrg if (!aq) 1161 1.1.1.10 mrg gomp_coalesce_buf_add (&cbuf, tgt_size, 1162 1.1.1.10 mrg cur_node.host_end - cur_node.host_start); 1163 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start; 1164 1.1.1.2 mrg has_firstprivate = true; 1165 1.1.1.2 mrg continue; 1166 1.1.1.2 mrg } 1167 1.1.1.2 mrg splay_tree_key n; 1168 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 1169 1.1 mrg { 1170 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node); 1171 1.1.1.2 mrg if (!n) 1172 1.1.1.2 mrg { 1173 1.1.1.2 mrg tgt->list[i].key = NULL; 1174 1.1.1.11 mrg tgt->list[i].offset = OFFSET_INLINED; 1175 1.1.1.2 mrg continue; 1176 1.1.1.2 mrg } 1177 1.1 mrg } 1178 1.1 mrg else 1179 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node); 1180 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK) 1181 1.1.1.10 mrg { 1182 1.1.1.10 mrg int always_to_cnt = 0; 1183 1.1.1.10 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET) 1184 1.1.1.10 mrg { 1185 1.1.1.10 mrg bool has_nullptr = false; 1186 1.1.1.10 mrg size_t j; 1187 1.1.1.10 mrg for (j = 0; j < n->tgt->list_count; j++) 1188 1.1.1.10 mrg if (n->tgt->list[j].key == n) 1189 1.1.1.10 mrg { 1190 1.1.1.10 mrg has_nullptr = n->tgt->list[j].has_null_ptr_assoc; 1191 1.1.1.10 mrg break; 1192 1.1.1.10 mrg } 1193 1.1.1.10 mrg if (n->tgt->list_count == 0) 1194 1.1.1.10 mrg { 1195 1.1.1.10 mrg /* 'declare target'; assume has_nullptr; it could also be 1196 1.1.1.10 mrg statically assigned pointer, but that it should be to 1197 1.1.1.10 mrg the equivalent variable on the host. */ 1198 1.1.1.10 mrg assert (n->refcount == REFCOUNT_INFINITY); 1199 1.1.1.10 mrg has_nullptr = true; 1200 1.1.1.10 mrg } 1201 1.1.1.10 mrg else 1202 1.1.1.10 mrg assert (j < n->tgt->list_count); 1203 1.1.1.10 mrg /* Re-map the data if there is an 'always' modifier or if it a 1204 1.1.1.10 mrg null pointer was there and non a nonnull has been found; that 1205 1.1.1.10 mrg permits transparent re-mapping for Fortran array descriptors 1206 1.1.1.10 mrg which were previously mapped unallocated. */ 1207 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++) 1208 1.1.1.10 mrg { 1209 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; 1210 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) 1211 1.1.1.10 mrg && (!has_nullptr 1212 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind) 1213 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL)) 1214 1.1.1.10 mrg break; 1215 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 1216 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1217 1.1.1.10 mrg > cur_node.host_end)) 1218 1.1.1.10 mrg break; 1219 1.1.1.10 mrg else 1220 1.1.1.10 mrg { 1221 1.1.1.10 mrg has_always_ptrset = true; 1222 1.1.1.10 mrg ++always_to_cnt; 1223 1.1.1.10 mrg } 1224 1.1.1.10 mrg } 1225 1.1.1.10 mrg } 1226 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], 1227 1.1.1.10 mrg kind & typemask, always_to_cnt > 0, implicit, 1228 1.1.1.10 mrg NULL, refcount_set); 1229 1.1.1.10 mrg i += always_to_cnt; 1230 1.1.1.10 mrg } 1231 1.1.1.2 mrg else 1232 1.1 mrg { 1233 1.1.1.2 mrg tgt->list[i].key = NULL; 1234 1.1 mrg 1235 1.1.1.8 mrg if ((kind & typemask) == GOMP_MAP_IF_PRESENT) 1236 1.1.1.8 mrg { 1237 1.1.1.8 mrg /* Not present, hence, skip entry - including its MAP_POINTER, 1238 1.1.1.8 mrg when existing. */ 1239 1.1.1.11 mrg tgt->list[i].offset = OFFSET_INLINED; 1240 1.1.1.8 mrg if (i + 1 < mapnum 1241 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 1242 1.1.1.8 mrg == GOMP_MAP_POINTER)) 1243 1.1.1.8 mrg { 1244 1.1.1.8 mrg ++i; 1245 1.1.1.8 mrg tgt->list[i].key = NULL; 1246 1.1.1.8 mrg tgt->list[i].offset = 0; 1247 1.1.1.8 mrg } 1248 1.1.1.8 mrg continue; 1249 1.1.1.8 mrg } 1250 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift); 1251 1.1 mrg not_found_cnt++; 1252 1.1 mrg if (tgt_align < align) 1253 1.1 mrg tgt_align = align; 1254 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1255 1.1.1.10 mrg if (!aq 1256 1.1.1.10 mrg && gomp_to_device_kind_p (kind & typemask)) 1257 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size, 1258 1.1.1.6 mrg cur_node.host_end - cur_node.host_start); 1259 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start; 1260 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET) 1261 1.1 mrg { 1262 1.1 mrg size_t j; 1263 1.1.1.10 mrg int kind; 1264 1.1 mrg for (j = i + 1; j < mapnum; j++) 1265 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind, 1266 1.1.1.10 mrg kinds, j)) & typemask)) 1267 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (kind)) 1268 1.1 mrg break; 1269 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 1270 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1271 1.1 mrg > cur_node.host_end)) 1272 1.1 mrg break; 1273 1.1 mrg else 1274 1.1 mrg { 1275 1.1.1.2 mrg tgt->list[j].key = NULL; 1276 1.1 mrg i++; 1277 1.1 mrg } 1278 1.1 mrg } 1279 1.1 mrg } 1280 1.1 mrg } 1281 1.1 mrg 1282 1.1 mrg if (devaddrs) 1283 1.1 mrg { 1284 1.1 mrg if (mapnum != 1) 1285 1.1 mrg { 1286 1.1 mrg gomp_mutex_unlock (&devicep->lock); 1287 1.1 mrg gomp_fatal ("unexpected aggregation"); 1288 1.1 mrg } 1289 1.1 mrg tgt->to_free = devaddrs[0]; 1290 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free; 1291 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0]; 1292 1.1 mrg } 1293 1.1.1.11 mrg else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET)) 1294 1.1 mrg { 1295 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */ 1296 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned 1297 1.1 mrg memory. */ 1298 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id, 1299 1.1 mrg tgt_size + tgt_align - 1); 1300 1.1.1.3 mrg if (!tgt->to_free) 1301 1.1.1.3 mrg { 1302 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock); 1303 1.1.1.3 mrg gomp_fatal ("device memory allocation fail"); 1304 1.1.1.3 mrg } 1305 1.1.1.3 mrg 1306 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free; 1307 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 1308 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size; 1309 1.1.1.6 mrg 1310 1.1.1.6 mrg if (cbuf.use_cnt == 1) 1311 1.1.1.6 mrg cbuf.chunk_cnt--; 1312 1.1.1.6 mrg if (cbuf.chunk_cnt > 0) 1313 1.1.1.6 mrg { 1314 1.1.1.6 mrg cbuf.buf 1315 1.1.1.7 mrg = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); 1316 1.1.1.6 mrg if (cbuf.buf) 1317 1.1.1.6 mrg { 1318 1.1.1.6 mrg cbuf.tgt = tgt; 1319 1.1.1.6 mrg cbufp = &cbuf; 1320 1.1.1.6 mrg } 1321 1.1.1.6 mrg } 1322 1.1 mrg } 1323 1.1 mrg else 1324 1.1 mrg { 1325 1.1 mrg tgt->to_free = NULL; 1326 1.1 mrg tgt->tgt_start = 0; 1327 1.1 mrg tgt->tgt_end = 0; 1328 1.1 mrg } 1329 1.1 mrg 1330 1.1 mrg tgt_size = 0; 1331 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET) 1332 1.1 mrg tgt_size = mapnum * sizeof (void *); 1333 1.1 mrg 1334 1.1 mrg tgt->array = NULL; 1335 1.1.1.10 mrg if (not_found_cnt || has_firstprivate || has_always_ptrset) 1336 1.1 mrg { 1337 1.1.1.2 mrg if (not_found_cnt) 1338 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 1339 1.1 mrg splay_tree_node array = tgt->array; 1340 1.1.1.10 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; 1341 1.1.1.2 mrg uintptr_t field_tgt_base = 0; 1342 1.1.1.10 mrg splay_tree_key field_tgt_structelem_first = NULL; 1343 1.1 mrg 1344 1.1 mrg for (i = 0; i < mapnum; i++) 1345 1.1.1.10 mrg if (has_always_ptrset 1346 1.1.1.10 mrg && tgt->list[i].key 1347 1.1.1.10 mrg && (get_kind (short_mapkind, kinds, i) & typemask) 1348 1.1.1.10 mrg == GOMP_MAP_TO_PSET) 1349 1.1.1.10 mrg { 1350 1.1.1.10 mrg splay_tree_key k = tgt->list[i].key; 1351 1.1.1.10 mrg bool has_nullptr = false; 1352 1.1.1.10 mrg size_t j; 1353 1.1.1.10 mrg for (j = 0; j < k->tgt->list_count; j++) 1354 1.1.1.10 mrg if (k->tgt->list[j].key == k) 1355 1.1.1.10 mrg { 1356 1.1.1.10 mrg has_nullptr = k->tgt->list[j].has_null_ptr_assoc; 1357 1.1.1.10 mrg break; 1358 1.1.1.10 mrg } 1359 1.1.1.10 mrg if (k->tgt->list_count == 0) 1360 1.1.1.10 mrg has_nullptr = true; 1361 1.1.1.10 mrg else 1362 1.1.1.10 mrg assert (j < k->tgt->list_count); 1363 1.1.1.10 mrg 1364 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false; 1365 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++) 1366 1.1.1.10 mrg { 1367 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; 1368 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) 1369 1.1.1.10 mrg && (!has_nullptr 1370 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind) 1371 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL)) 1372 1.1.1.10 mrg break; 1373 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start 1374 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1375 1.1.1.10 mrg > k->host_end)) 1376 1.1.1.10 mrg break; 1377 1.1.1.10 mrg else 1378 1.1.1.10 mrg { 1379 1.1.1.10 mrg if (*(void **) hostaddrs[j] == NULL) 1380 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = true; 1381 1.1.1.10 mrg tgt->list[j].key = k; 1382 1.1.1.10 mrg tgt->list[j].copy_from = false; 1383 1.1.1.10 mrg tgt->list[j].always_copy_from = false; 1384 1.1.1.10 mrg tgt->list[j].is_attach = false; 1385 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set); 1386 1.1.1.10 mrg gomp_map_pointer (k->tgt, aq, 1387 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j], 1388 1.1.1.10 mrg k->tgt_offset + ((uintptr_t) hostaddrs[j] 1389 1.1.1.10 mrg - k->host_start), 1390 1.1.1.10 mrg sizes[j], cbufp, false); 1391 1.1.1.10 mrg } 1392 1.1.1.10 mrg } 1393 1.1.1.10 mrg i = j - 1; 1394 1.1.1.10 mrg } 1395 1.1.1.10 mrg else if (tgt->list[i].key == NULL) 1396 1.1 mrg { 1397 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i); 1398 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i); 1399 1.1 mrg if (hostaddrs[i] == NULL) 1400 1.1 mrg continue; 1401 1.1.1.2 mrg switch (kind & typemask) 1402 1.1.1.2 mrg { 1403 1.1.1.2 mrg size_t align, len, first, last; 1404 1.1.1.2 mrg splay_tree_key n; 1405 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE: 1406 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift); 1407 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1408 1.1.1.2 mrg tgt->list[i].offset = tgt_size; 1409 1.1.1.2 mrg len = sizes[i]; 1410 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1411 1.1.1.3 mrg (void *) (tgt->tgt_start + tgt_size), 1412 1.1.1.10 mrg (void *) hostaddrs[i], len, false, cbufp); 1413 1.1.1.11 mrg /* Save device address in hostaddr to permit latter availablity 1414 1.1.1.11 mrg when doing a deep-firstprivate with pointer attach. */ 1415 1.1.1.11 mrg hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size); 1416 1.1.1.2 mrg tgt_size += len; 1417 1.1.1.11 mrg 1418 1.1.1.11 mrg /* If followed by GOMP_MAP_ATTACH, pointer assign this 1419 1.1.1.11 mrg firstprivate to hostaddrs[i+1], which is assumed to contain a 1420 1.1.1.11 mrg device address. */ 1421 1.1.1.11 mrg if (i + 1 < mapnum 1422 1.1.1.11 mrg && (GOMP_MAP_ATTACH 1423 1.1.1.11 mrg == (typemask & get_kind (short_mapkind, kinds, i+1)))) 1424 1.1.1.11 mrg { 1425 1.1.1.11 mrg uintptr_t target = (uintptr_t) hostaddrs[i]; 1426 1.1.1.11 mrg void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1]; 1427 1.1.1.11 mrg /* Per 1428 1.1.1.11 mrg <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net> 1429 1.1.1.11 mrg "OpenMP: Handle descriptors in target's firstprivate [PR104949]" 1430 1.1.1.11 mrg this probably needs revision for 'aq' usage. */ 1431 1.1.1.11 mrg assert (!aq); 1432 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq, devptr, &target, 1433 1.1.1.11 mrg sizeof (void *), false, cbufp); 1434 1.1.1.11 mrg ++i; 1435 1.1.1.11 mrg } 1436 1.1.1.2 mrg continue; 1437 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT: 1438 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 1439 1.1.1.2 mrg continue; 1440 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: 1441 1.1.1.8 mrg /* The OpenACC 'host_data' construct only allows 'use_device' 1442 1.1.1.8 mrg "mapping" clauses, so in the first loop, 'not_found_cnt' 1443 1.1.1.8 mrg must always have been zero, so all OpenACC 'use_device' 1444 1.1.1.8 mrg clauses have already been handled. (We can only easily test 1445 1.1.1.8 mrg 'use_device' with 'if_present' clause here.) */ 1446 1.1.1.8 mrg assert (tgt->list[i].offset == OFFSET_INLINED); 1447 1.1.1.8 mrg /* Nevertheless, FALLTHRU to the normal handling, to keep the 1448 1.1.1.8 mrg code conceptually simple, similar to the first loop. */ 1449 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR: 1450 1.1.1.8 mrg if (tgt->list[i].offset == 0) 1451 1.1.1.8 mrg { 1452 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1453 1.1.1.8 mrg cur_node.host_end = cur_node.host_start; 1454 1.1.1.8 mrg n = gomp_map_lookup (mem_map, &cur_node); 1455 1.1.1.8 mrg if (n != NULL) 1456 1.1.1.8 mrg { 1457 1.1.1.8 mrg cur_node.host_start -= n->host_start; 1458 1.1.1.8 mrg hostaddrs[i] 1459 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset 1460 1.1.1.8 mrg + cur_node.host_start); 1461 1.1.1.8 mrg } 1462 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 1463 1.1.1.8 mrg { 1464 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 1465 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped"); 1466 1.1.1.8 mrg } 1467 1.1.1.8 mrg else if ((kind & typemask) 1468 1.1.1.8 mrg == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1469 1.1.1.8 mrg /* If not present, continue using the host address. */ 1470 1.1.1.8 mrg ; 1471 1.1.1.8 mrg else 1472 1.1.1.8 mrg __builtin_unreachable (); 1473 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED; 1474 1.1.1.8 mrg } 1475 1.1.1.8 mrg continue; 1476 1.1.1.11 mrg case GOMP_MAP_STRUCT_UNORD: 1477 1.1.1.11 mrg if (sizes[i] > 1) 1478 1.1.1.11 mrg { 1479 1.1.1.11 mrg void *first = hostaddrs[i + 1]; 1480 1.1.1.11 mrg for (size_t j = i + 1; j < i + sizes[i]; j++) 1481 1.1.1.11 mrg if (hostaddrs[j + 1] != first) 1482 1.1.1.11 mrg { 1483 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 1484 1.1.1.11 mrg gomp_fatal ("Mapped array elements must be the " 1485 1.1.1.11 mrg "same (%p vs %p)", first, 1486 1.1.1.11 mrg hostaddrs[j + 1]); 1487 1.1.1.11 mrg } 1488 1.1.1.11 mrg } 1489 1.1.1.11 mrg /* Fallthrough. */ 1490 1.1.1.2 mrg case GOMP_MAP_STRUCT: 1491 1.1.1.2 mrg first = i + 1; 1492 1.1.1.2 mrg last = i + sizes[i]; 1493 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1494 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last] 1495 1.1.1.2 mrg + sizes[last]; 1496 1.1.1.2 mrg if (tgt->list[first].key != NULL) 1497 1.1.1.2 mrg continue; 1498 1.1.1.11 mrg if (sizes[last] == 0) 1499 1.1.1.11 mrg cur_node.host_end++; 1500 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node); 1501 1.1.1.11 mrg if (sizes[last] == 0) 1502 1.1.1.11 mrg cur_node.host_end--; 1503 1.1.1.11 mrg if (n == NULL && cur_node.host_start == cur_node.host_end) 1504 1.1.1.11 mrg { 1505 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 1506 1.1.1.11 mrg gomp_fatal ("Struct pointer member not mapped (%p)", 1507 1.1.1.11 mrg (void*) hostaddrs[first]); 1508 1.1.1.11 mrg } 1509 1.1.1.2 mrg if (n == NULL) 1510 1.1.1.2 mrg { 1511 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift); 1512 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first] 1513 1.1.1.2 mrg - (uintptr_t) hostaddrs[i]; 1514 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1515 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first] 1516 1.1.1.2 mrg - (uintptr_t) hostaddrs[i]; 1517 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first]; 1518 1.1.1.2 mrg field_tgt_offset = tgt_size; 1519 1.1.1.2 mrg field_tgt_clear = last; 1520 1.1.1.10 mrg field_tgt_structelem_first = NULL; 1521 1.1.1.2 mrg tgt_size += cur_node.host_end 1522 1.1.1.2 mrg - (uintptr_t) hostaddrs[first]; 1523 1.1.1.2 mrg continue; 1524 1.1.1.2 mrg } 1525 1.1.1.2 mrg for (i = first; i <= last; i++) 1526 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 1527 1.1.1.10 mrg sizes, kinds, cbufp, refcount_set); 1528 1.1.1.2 mrg i--; 1529 1.1.1.2 mrg continue; 1530 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER: 1531 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1532 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *); 1533 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node); 1534 1.1.1.2 mrg if (n == NULL 1535 1.1.1.2 mrg || n->host_start > cur_node.host_start 1536 1.1.1.2 mrg || n->host_end < cur_node.host_end) 1537 1.1.1.2 mrg { 1538 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 1539 1.1.1.2 mrg gomp_fatal ("always pointer not mapped"); 1540 1.1.1.2 mrg } 1541 1.1.1.11 mrg if (i > 0 1542 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i - 1) & typemask) 1543 1.1.1.11 mrg != GOMP_MAP_ALWAYS_POINTER)) 1544 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); 1545 1.1.1.2 mrg if (cur_node.tgt_offset) 1546 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i]; 1547 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1548 1.1.1.3 mrg (void *) (n->tgt->tgt_start 1549 1.1.1.3 mrg + n->tgt_offset 1550 1.1.1.3 mrg + cur_node.host_start 1551 1.1.1.3 mrg - n->host_start), 1552 1.1.1.3 mrg (void *) &cur_node.tgt_offset, 1553 1.1.1.10 mrg sizeof (void *), true, cbufp); 1554 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 1555 1.1.1.2 mrg + cur_node.host_start - n->host_start; 1556 1.1.1.2 mrg continue; 1557 1.1.1.8 mrg case GOMP_MAP_IF_PRESENT: 1558 1.1.1.8 mrg /* Not present - otherwise handled above. Skip over its 1559 1.1.1.8 mrg MAP_POINTER as well. */ 1560 1.1.1.8 mrg if (i + 1 < mapnum 1561 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 1562 1.1.1.8 mrg == GOMP_MAP_POINTER)) 1563 1.1.1.8 mrg ++i; 1564 1.1.1.8 mrg continue; 1565 1.1.1.8 mrg case GOMP_MAP_ATTACH: 1566 1.1.1.10 mrg case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: 1567 1.1.1.8 mrg { 1568 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 1569 1.1.1.8 mrg cur_node.host_end = cur_node.host_start + sizeof (void *); 1570 1.1.1.8 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 1571 1.1.1.8 mrg if (n != NULL) 1572 1.1.1.8 mrg { 1573 1.1.1.8 mrg tgt->list[i].key = n; 1574 1.1.1.8 mrg tgt->list[i].offset = cur_node.host_start - n->host_start; 1575 1.1.1.8 mrg tgt->list[i].length = n->host_end - n->host_start; 1576 1.1.1.8 mrg tgt->list[i].copy_from = false; 1577 1.1.1.8 mrg tgt->list[i].always_copy_from = false; 1578 1.1.1.8 mrg tgt->list[i].is_attach = true; 1579 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect 1580 1.1.1.8 mrg structured/dynamic reference counts ('n->refcount', 1581 1.1.1.8 mrg 'n->dynamic_refcount'). */ 1582 1.1.1.10 mrg 1583 1.1.1.10 mrg bool zlas 1584 1.1.1.10 mrg = ((kind & typemask) 1585 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); 1586 1.1.1.10 mrg gomp_attach_pointer (devicep, aq, mem_map, n, 1587 1.1.1.10 mrg (uintptr_t) hostaddrs[i], sizes[i], 1588 1.1.1.10 mrg cbufp, zlas); 1589 1.1.1.8 mrg } 1590 1.1.1.10 mrg else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) 1591 1.1.1.8 mrg { 1592 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock); 1593 1.1.1.8 mrg gomp_fatal ("outer struct not mapped for attach"); 1594 1.1.1.8 mrg } 1595 1.1.1.8 mrg continue; 1596 1.1.1.8 mrg } 1597 1.1.1.2 mrg default: 1598 1.1.1.2 mrg break; 1599 1.1.1.2 mrg } 1600 1.1 mrg splay_tree_key k = &array->key; 1601 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i]; 1602 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask)) 1603 1.1 mrg k->host_end = k->host_start + sizes[i]; 1604 1.1 mrg else 1605 1.1 mrg k->host_end = k->host_start + sizeof (void *); 1606 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k); 1607 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK) 1608 1.1.1.11 mrg { 1609 1.1.1.11 mrg if (field_tgt_clear != FIELD_TGT_EMPTY) 1610 1.1.1.11 mrg { 1611 1.1.1.11 mrg /* For this condition to be true, there must be a 1612 1.1.1.11 mrg duplicate struct element mapping. This can happen with 1613 1.1.1.11 mrg GOMP_MAP_STRUCT_UNORD mappings, for example. */ 1614 1.1.1.11 mrg tgt->list[i].key = n; 1615 1.1.1.11 mrg if (openmp_p) 1616 1.1.1.11 mrg { 1617 1.1.1.11 mrg assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0); 1618 1.1.1.11 mrg assert (field_tgt_structelem_first != NULL); 1619 1.1.1.11 mrg 1620 1.1.1.11 mrg if (i == field_tgt_clear) 1621 1.1.1.11 mrg { 1622 1.1.1.11 mrg n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; 1623 1.1.1.11 mrg field_tgt_structelem_first = NULL; 1624 1.1.1.11 mrg } 1625 1.1.1.11 mrg } 1626 1.1.1.11 mrg if (i == field_tgt_clear) 1627 1.1.1.11 mrg field_tgt_clear = FIELD_TGT_EMPTY; 1628 1.1.1.11 mrg gomp_increment_refcount (n, refcount_set); 1629 1.1.1.11 mrg tgt->list[i].copy_from 1630 1.1.1.11 mrg = GOMP_MAP_COPY_FROM_P (kind & typemask); 1631 1.1.1.11 mrg tgt->list[i].always_copy_from 1632 1.1.1.11 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 1633 1.1.1.11 mrg tgt->list[i].is_attach = false; 1634 1.1.1.11 mrg tgt->list[i].offset = 0; 1635 1.1.1.11 mrg tgt->list[i].length = k->host_end - k->host_start; 1636 1.1.1.11 mrg } 1637 1.1.1.11 mrg else 1638 1.1.1.11 mrg gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], 1639 1.1.1.11 mrg kind & typemask, false, implicit, 1640 1.1.1.11 mrg cbufp, refcount_set); 1641 1.1.1.11 mrg } 1642 1.1 mrg else 1643 1.1 mrg { 1644 1.1.1.8 mrg k->aux = NULL; 1645 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK) 1646 1.1.1.2 mrg { 1647 1.1.1.2 mrg /* Replace target address of the pointer with target address 1648 1.1.1.2 mrg of mapped object in the splay tree. */ 1649 1.1.1.2 mrg splay_tree_remove (mem_map, n); 1650 1.1.1.8 mrg k->aux 1651 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 1652 1.1.1.8 mrg k->aux->link_key = n; 1653 1.1.1.2 mrg } 1654 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift); 1655 1.1.1.2 mrg tgt->list[i].key = k; 1656 1.1 mrg k->tgt = tgt; 1657 1.1.1.10 mrg k->refcount = 0; 1658 1.1.1.10 mrg k->dynamic_refcount = 0; 1659 1.1.1.8 mrg if (field_tgt_clear != FIELD_TGT_EMPTY) 1660 1.1.1.2 mrg { 1661 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base 1662 1.1.1.2 mrg + field_tgt_offset; 1663 1.1.1.10 mrg if (openmp_p) 1664 1.1.1.10 mrg { 1665 1.1.1.10 mrg k->refcount = REFCOUNT_STRUCTELEM; 1666 1.1.1.10 mrg if (field_tgt_structelem_first == NULL) 1667 1.1.1.10 mrg { 1668 1.1.1.10 mrg /* Set to first structure element of sequence. */ 1669 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; 1670 1.1.1.10 mrg field_tgt_structelem_first = k; 1671 1.1.1.10 mrg } 1672 1.1.1.10 mrg else 1673 1.1.1.10 mrg /* Point to refcount of leading element, but do not 1674 1.1.1.10 mrg increment again. */ 1675 1.1.1.10 mrg k->structelem_refcount_ptr 1676 1.1.1.10 mrg = &field_tgt_structelem_first->structelem_refcount; 1677 1.1.1.10 mrg 1678 1.1.1.10 mrg if (i == field_tgt_clear) 1679 1.1.1.10 mrg { 1680 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; 1681 1.1.1.10 mrg field_tgt_structelem_first = NULL; 1682 1.1.1.10 mrg } 1683 1.1.1.10 mrg } 1684 1.1.1.2 mrg if (i == field_tgt_clear) 1685 1.1.1.8 mrg field_tgt_clear = FIELD_TGT_EMPTY; 1686 1.1.1.2 mrg } 1687 1.1.1.2 mrg else 1688 1.1.1.2 mrg { 1689 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 1690 1.1.1.2 mrg k->tgt_offset = tgt_size; 1691 1.1.1.2 mrg tgt_size += k->host_end - k->host_start; 1692 1.1.1.2 mrg } 1693 1.1.1.10 mrg /* First increment, from 0 to 1. gomp_increment_refcount 1694 1.1.1.10 mrg encapsulates the different increment cases, so use this 1695 1.1.1.10 mrg instead of directly setting 1 during initialization. */ 1696 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set); 1697 1.1.1.10 mrg 1698 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 1699 1.1.1.2 mrg tgt->list[i].always_copy_from 1700 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 1701 1.1.1.8 mrg tgt->list[i].is_attach = false; 1702 1.1.1.2 mrg tgt->list[i].offset = 0; 1703 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start; 1704 1.1 mrg tgt->refcount++; 1705 1.1 mrg array->left = NULL; 1706 1.1 mrg array->right = NULL; 1707 1.1 mrg splay_tree_insert (mem_map, array); 1708 1.1 mrg switch (kind & typemask) 1709 1.1 mrg { 1710 1.1 mrg case GOMP_MAP_ALLOC: 1711 1.1 mrg case GOMP_MAP_FROM: 1712 1.1 mrg case GOMP_MAP_FORCE_ALLOC: 1713 1.1 mrg case GOMP_MAP_FORCE_FROM: 1714 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM: 1715 1.1 mrg break; 1716 1.1 mrg case GOMP_MAP_TO: 1717 1.1 mrg case GOMP_MAP_TOFROM: 1718 1.1 mrg case GOMP_MAP_FORCE_TO: 1719 1.1 mrg case GOMP_MAP_FORCE_TOFROM: 1720 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO: 1721 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM: 1722 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1723 1.1.1.3 mrg (void *) (tgt->tgt_start 1724 1.1.1.3 mrg + k->tgt_offset), 1725 1.1.1.3 mrg (void *) k->host_start, 1726 1.1.1.10 mrg k->host_end - k->host_start, 1727 1.1.1.10 mrg false, cbufp); 1728 1.1 mrg break; 1729 1.1 mrg case GOMP_MAP_POINTER: 1730 1.1.1.10 mrg case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: 1731 1.1.1.10 mrg gomp_map_pointer 1732 1.1.1.10 mrg (tgt, aq, (uintptr_t) *(void **) k->host_start, 1733 1.1.1.10 mrg k->tgt_offset, sizes[i], cbufp, 1734 1.1.1.10 mrg ((kind & typemask) 1735 1.1.1.10 mrg == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)); 1736 1.1 mrg break; 1737 1.1 mrg case GOMP_MAP_TO_PSET: 1738 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1739 1.1.1.3 mrg (void *) (tgt->tgt_start 1740 1.1.1.3 mrg + k->tgt_offset), 1741 1.1.1.3 mrg (void *) k->host_start, 1742 1.1.1.10 mrg k->host_end - k->host_start, 1743 1.1.1.10 mrg false, cbufp); 1744 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false; 1745 1.1 mrg 1746 1.1 mrg for (j = i + 1; j < mapnum; j++) 1747 1.1.1.10 mrg { 1748 1.1.1.10 mrg int ptr_kind = (get_kind (short_mapkind, kinds, j) 1749 1.1.1.10 mrg & typemask); 1750 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (ptr_kind) 1751 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)) 1752 1.1.1.10 mrg break; 1753 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start 1754 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1755 1.1.1.10 mrg > k->host_end)) 1756 1.1.1.10 mrg break; 1757 1.1.1.10 mrg else 1758 1.1.1.10 mrg { 1759 1.1.1.10 mrg tgt->list[j].key = k; 1760 1.1.1.10 mrg tgt->list[j].copy_from = false; 1761 1.1.1.10 mrg tgt->list[j].always_copy_from = false; 1762 1.1.1.10 mrg tgt->list[j].is_attach = false; 1763 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); 1764 1.1.1.10 mrg /* For OpenMP, the use of refcount_sets causes 1765 1.1.1.10 mrg errors if we set k->refcount = 1 above but also 1766 1.1.1.10 mrg increment it again here, for decrementing will 1767 1.1.1.10 mrg not properly match, since we decrement only once 1768 1.1.1.10 mrg for each key's refcount. Therefore avoid this 1769 1.1.1.10 mrg increment for OpenMP constructs. */ 1770 1.1.1.10 mrg if (!openmp_p) 1771 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set); 1772 1.1.1.10 mrg gomp_map_pointer (tgt, aq, 1773 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j], 1774 1.1.1.10 mrg k->tgt_offset 1775 1.1.1.10 mrg + ((uintptr_t) hostaddrs[j] 1776 1.1.1.10 mrg - k->host_start), 1777 1.1.1.10 mrg sizes[j], cbufp, false); 1778 1.1.1.10 mrg } 1779 1.1.1.11 mrg } 1780 1.1.1.10 mrg i = j - 1; 1781 1.1 mrg break; 1782 1.1 mrg case GOMP_MAP_FORCE_PRESENT: 1783 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TO: 1784 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM: 1785 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM: 1786 1.1 mrg { 1787 1.1 mrg /* We already looked up the memory region above and it 1788 1.1 mrg was missing. */ 1789 1.1 mrg size_t size = k->host_end - k->host_start; 1790 1.1 mrg gomp_mutex_unlock (&devicep->lock); 1791 1.1 mrg #ifdef HAVE_INTTYPES_H 1792 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device " 1793 1.1.1.11 mrg "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), " 1794 1.1.1.11 mrg "dev: %d)", (void *) k->host_start, 1795 1.1.1.11 mrg (uint64_t) size, (uint64_t) size, 1796 1.1.1.11 mrg devicep->target_id); 1797 1.1 mrg #else 1798 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device " 1799 1.1.1.11 mrg "(addr: %p, size: %lu (0x%lx), dev: %d)", 1800 1.1.1.11 mrg (void *) k->host_start, 1801 1.1.1.11 mrg (unsigned long) size, (unsigned long) size, 1802 1.1.1.11 mrg devicep->target_id); 1803 1.1 mrg #endif 1804 1.1 mrg } 1805 1.1 mrg break; 1806 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR: 1807 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *)); 1808 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1809 1.1.1.3 mrg (void *) (tgt->tgt_start 1810 1.1.1.3 mrg + k->tgt_offset), 1811 1.1.1.3 mrg (void *) k->host_start, 1812 1.1.1.10 mrg sizeof (void *), false, cbufp); 1813 1.1 mrg break; 1814 1.1 mrg default: 1815 1.1 mrg gomp_mutex_unlock (&devicep->lock); 1816 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 1817 1.1 mrg kind); 1818 1.1 mrg } 1819 1.1.1.2 mrg 1820 1.1.1.8 mrg if (k->aux && k->aux->link_key) 1821 1.1.1.2 mrg { 1822 1.1.1.2 mrg /* Set link pointer on target to the device address of the 1823 1.1.1.2 mrg mapped object. */ 1824 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); 1825 1.1.1.7 mrg /* We intentionally do not use coalescing here, as it's not 1826 1.1.1.7 mrg data allocated by the current call to this function. */ 1827 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, 1828 1.1.1.10 mrg &tgt_addr, sizeof (void *), true, NULL); 1829 1.1.1.2 mrg } 1830 1.1 mrg array++; 1831 1.1 mrg } 1832 1.1 mrg } 1833 1.1 mrg } 1834 1.1 mrg 1835 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET) 1836 1.1 mrg { 1837 1.1 mrg for (i = 0; i < mapnum; i++) 1838 1.1 mrg { 1839 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); 1840 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1841 1.1.1.3 mrg (void *) (tgt->tgt_start + i * sizeof (void *)), 1842 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *), 1843 1.1.1.10 mrg true, cbufp); 1844 1.1 mrg } 1845 1.1 mrg } 1846 1.1 mrg 1847 1.1.1.6 mrg if (cbufp) 1848 1.1.1.6 mrg { 1849 1.1.1.6 mrg long c = 0; 1850 1.1.1.6 mrg for (c = 0; c < cbuf.chunk_cnt; ++c) 1851 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, 1852 1.1.1.7 mrg (void *) (tgt->tgt_start + cbuf.chunks[c].start), 1853 1.1.1.7 mrg (char *) cbuf.buf + (cbuf.chunks[c].start 1854 1.1.1.7 mrg - cbuf.chunks[0].start), 1855 1.1.1.10 mrg cbuf.chunks[c].end - cbuf.chunks[c].start, 1856 1.1.1.11 mrg false, NULL); 1857 1.1.1.11 mrg if (aq) 1858 1.1.1.11 mrg /* Free once the transfer has completed. */ 1859 1.1.1.11 mrg devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf); 1860 1.1.1.11 mrg else 1861 1.1.1.11 mrg free (cbuf.buf); 1862 1.1.1.7 mrg cbuf.buf = NULL; 1863 1.1.1.7 mrg cbufp = NULL; 1864 1.1.1.6 mrg } 1865 1.1.1.6 mrg 1866 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped, 1867 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or 1868 1.1.1.2 mrg gomp_exit_data. */ 1869 1.1.1.10 mrg if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0) 1870 1.1.1.2 mrg { 1871 1.1.1.2 mrg free (tgt); 1872 1.1.1.2 mrg tgt = NULL; 1873 1.1.1.2 mrg } 1874 1.1.1.2 mrg 1875 1.1 mrg gomp_mutex_unlock (&devicep->lock); 1876 1.1 mrg return tgt; 1877 1.1 mrg } 1878 1.1 mrg 1879 1.1.1.10 mrg static struct target_mem_desc * 1880 1.1.1.8 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 1881 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 1882 1.1.1.10 mrg bool short_mapkind, htab_t *refcount_set, 1883 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind) 1884 1.1.1.8 mrg { 1885 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers 1886 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */ 1887 1.1.1.10 mrg htab_t local_refcount_set = NULL; 1888 1.1.1.10 mrg if (refcount_set == NULL) 1889 1.1.1.10 mrg { 1890 1.1.1.10 mrg local_refcount_set = htab_create (mapnum); 1891 1.1.1.10 mrg refcount_set = &local_refcount_set; 1892 1.1.1.10 mrg } 1893 1.1.1.10 mrg 1894 1.1.1.10 mrg struct target_mem_desc *tgt; 1895 1.1.1.10 mrg tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, 1896 1.1.1.10 mrg sizes, kinds, short_mapkind, refcount_set, 1897 1.1.1.10 mrg pragma_kind); 1898 1.1.1.10 mrg if (local_refcount_set) 1899 1.1.1.10 mrg htab_free (local_refcount_set); 1900 1.1.1.10 mrg 1901 1.1.1.10 mrg return tgt; 1902 1.1.1.8 mrg } 1903 1.1.1.8 mrg 1904 1.1.1.8 mrg attribute_hidden struct target_mem_desc * 1905 1.1.1.10 mrg goacc_map_vars (struct gomp_device_descr *devicep, 1906 1.1.1.10 mrg struct goacc_asyncqueue *aq, size_t mapnum, 1907 1.1.1.10 mrg void **hostaddrs, void **devaddrs, size_t *sizes, 1908 1.1.1.10 mrg void *kinds, bool short_mapkind, 1909 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind) 1910 1.1.1.8 mrg { 1911 1.1.1.8 mrg return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, 1912 1.1.1.10 mrg sizes, kinds, short_mapkind, NULL, 1913 1.1.1.10 mrg GOMP_MAP_VARS_OPENACC | pragma_kind); 1914 1.1.1.8 mrg } 1915 1.1.1.8 mrg 1916 1.1 mrg static void 1917 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt) 1918 1.1 mrg { 1919 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 1920 1.1 mrg if (tgt->tgt_end) 1921 1.1.1.3 mrg gomp_free_device_memory (tgt->device_descr, tgt->to_free); 1922 1.1 mrg 1923 1.1 mrg free (tgt->array); 1924 1.1 mrg free (tgt); 1925 1.1 mrg } 1926 1.1 mrg 1927 1.1.1.8 mrg static bool 1928 1.1.1.8 mrg gomp_unref_tgt (void *ptr) 1929 1.1.1.7 mrg { 1930 1.1.1.7 mrg bool is_tgt_unmapped = false; 1931 1.1.1.8 mrg 1932 1.1.1.8 mrg struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; 1933 1.1.1.8 mrg 1934 1.1.1.8 mrg if (tgt->refcount > 1) 1935 1.1.1.8 mrg tgt->refcount--; 1936 1.1.1.7 mrg else 1937 1.1.1.7 mrg { 1938 1.1.1.8 mrg gomp_unmap_tgt (tgt); 1939 1.1.1.7 mrg is_tgt_unmapped = true; 1940 1.1.1.7 mrg } 1941 1.1.1.8 mrg 1942 1.1.1.8 mrg return is_tgt_unmapped; 1943 1.1.1.8 mrg } 1944 1.1.1.8 mrg 1945 1.1.1.8 mrg static void 1946 1.1.1.8 mrg gomp_unref_tgt_void (void *ptr) 1947 1.1.1.8 mrg { 1948 1.1.1.8 mrg (void) gomp_unref_tgt (ptr); 1949 1.1.1.8 mrg } 1950 1.1.1.8 mrg 1951 1.1.1.10 mrg static void 1952 1.1.1.10 mrg gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) 1953 1.1.1.8 mrg { 1954 1.1.1.10 mrg splay_tree_remove (sp, k); 1955 1.1.1.8 mrg if (k->aux) 1956 1.1.1.8 mrg { 1957 1.1.1.8 mrg if (k->aux->link_key) 1958 1.1.1.10 mrg splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); 1959 1.1.1.8 mrg if (k->aux->attach_count) 1960 1.1.1.8 mrg free (k->aux->attach_count); 1961 1.1.1.8 mrg free (k->aux); 1962 1.1.1.8 mrg k->aux = NULL; 1963 1.1.1.8 mrg } 1964 1.1.1.10 mrg } 1965 1.1.1.10 mrg 1966 1.1.1.10 mrg static inline __attribute__((always_inline)) bool 1967 1.1.1.10 mrg gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, 1968 1.1.1.10 mrg struct goacc_asyncqueue *aq) 1969 1.1.1.10 mrg { 1970 1.1.1.10 mrg bool is_tgt_unmapped = false; 1971 1.1.1.10 mrg 1972 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_P (k->refcount)) 1973 1.1.1.10 mrg { 1974 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) 1975 1.1.1.10 mrg /* Infer the splay_tree_key of the first structelem key using the 1976 1.1.1.10 mrg pointer to the first structleme_refcount. */ 1977 1.1.1.10 mrg k = (splay_tree_key) ((char *) k->structelem_refcount_ptr 1978 1.1.1.10 mrg - offsetof (struct splay_tree_key_s, 1979 1.1.1.10 mrg structelem_refcount)); 1980 1.1.1.10 mrg assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); 1981 1.1.1.10 mrg 1982 1.1.1.10 mrg /* The array created by gomp_map_vars is an array of splay_tree_nodes, 1983 1.1.1.10 mrg with the splay_tree_keys embedded inside. */ 1984 1.1.1.10 mrg splay_tree_node node = 1985 1.1.1.10 mrg (splay_tree_node) ((char *) k 1986 1.1.1.10 mrg - offsetof (struct splay_tree_node_s, key)); 1987 1.1.1.10 mrg while (true) 1988 1.1.1.10 mrg { 1989 1.1.1.10 mrg /* Starting from the _FIRST key, and continue for all following 1990 1.1.1.10 mrg sibling keys. */ 1991 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k); 1992 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) 1993 1.1.1.10 mrg break; 1994 1.1.1.10 mrg else 1995 1.1.1.10 mrg k = &(++node)->key; 1996 1.1.1.10 mrg } 1997 1.1.1.10 mrg } 1998 1.1.1.10 mrg else 1999 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k); 2000 1.1.1.10 mrg 2001 1.1.1.8 mrg if (aq) 2002 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 2003 1.1.1.8 mrg (void *) k->tgt); 2004 1.1.1.8 mrg else 2005 1.1.1.8 mrg is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); 2006 1.1.1.7 mrg return is_tgt_unmapped; 2007 1.1.1.7 mrg } 2008 1.1.1.7 mrg 2009 1.1.1.8 mrg attribute_hidden bool 2010 1.1.1.8 mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) 2011 1.1.1.8 mrg { 2012 1.1.1.8 mrg return gomp_remove_var_internal (devicep, k, NULL); 2013 1.1.1.8 mrg } 2014 1.1.1.8 mrg 2015 1.1.1.8 mrg /* Remove a variable asynchronously. This actually removes the variable 2016 1.1.1.8 mrg mapping immediately, but retains the linked target_mem_desc until the 2017 1.1.1.8 mrg asynchronous operation has completed (as it may still refer to target 2018 1.1.1.8 mrg memory). The device lock must be held before entry, and remains locked on 2019 1.1.1.8 mrg exit. */ 2020 1.1.1.8 mrg 2021 1.1.1.8 mrg attribute_hidden void 2022 1.1.1.8 mrg gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, 2023 1.1.1.8 mrg struct goacc_asyncqueue *aq) 2024 1.1.1.8 mrg { 2025 1.1.1.8 mrg (void) gomp_remove_var_internal (devicep, k, aq); 2026 1.1.1.8 mrg } 2027 1.1.1.8 mrg 2028 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 2029 1.1 mrg variables back from device to host: if it is false, it is assumed that this 2030 1.1.1.3 mrg has been done already. */ 2031 1.1 mrg 2032 1.1.1.8 mrg static inline __attribute__((always_inline)) void 2033 1.1.1.8 mrg gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, 2034 1.1.1.10 mrg htab_t *refcount_set, struct goacc_asyncqueue *aq) 2035 1.1 mrg { 2036 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr; 2037 1.1 mrg 2038 1.1 mrg if (tgt->list_count == 0) 2039 1.1 mrg { 2040 1.1 mrg free (tgt); 2041 1.1 mrg return; 2042 1.1 mrg } 2043 1.1 mrg 2044 1.1 mrg gomp_mutex_lock (&devicep->lock); 2045 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED) 2046 1.1.1.2 mrg { 2047 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 2048 1.1.1.2 mrg free (tgt->array); 2049 1.1.1.2 mrg free (tgt); 2050 1.1.1.2 mrg return; 2051 1.1.1.2 mrg } 2052 1.1 mrg 2053 1.1 mrg size_t i; 2054 1.1.1.8 mrg 2055 1.1.1.8 mrg /* We must perform detachments before any copies back to the host. */ 2056 1.1.1.8 mrg for (i = 0; i < tgt->list_count; i++) 2057 1.1.1.8 mrg { 2058 1.1.1.8 mrg splay_tree_key k = tgt->list[i].key; 2059 1.1.1.8 mrg 2060 1.1.1.8 mrg if (k != NULL && tgt->list[i].is_attach) 2061 1.1.1.8 mrg gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start 2062 1.1.1.8 mrg + tgt->list[i].offset, 2063 1.1.1.8 mrg false, NULL); 2064 1.1.1.8 mrg } 2065 1.1.1.8 mrg 2066 1.1 mrg for (i = 0; i < tgt->list_count; i++) 2067 1.1.1.2 mrg { 2068 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key; 2069 1.1.1.2 mrg if (k == NULL) 2070 1.1.1.2 mrg continue; 2071 1.1.1.2 mrg 2072 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference 2073 1.1.1.8 mrg counts ('n->refcount', 'n->dynamic_refcount'). */ 2074 1.1.1.8 mrg if (tgt->list[i].is_attach) 2075 1.1.1.8 mrg continue; 2076 1.1.1.8 mrg 2077 1.1.1.10 mrg bool do_copy, do_remove; 2078 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); 2079 1.1.1.2 mrg 2080 1.1.1.10 mrg if ((do_copy && do_copyfrom && tgt->list[i].copy_from) 2081 1.1.1.2 mrg || tgt->list[i].always_copy_from) 2082 1.1.1.8 mrg gomp_copy_dev2host (devicep, aq, 2083 1.1.1.3 mrg (void *) (k->host_start + tgt->list[i].offset), 2084 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset 2085 1.1.1.3 mrg + tgt->list[i].offset), 2086 1.1.1.3 mrg tgt->list[i].length); 2087 1.1.1.10 mrg if (do_remove) 2088 1.1.1.8 mrg { 2089 1.1.1.8 mrg struct target_mem_desc *k_tgt = k->tgt; 2090 1.1.1.8 mrg bool is_tgt_unmapped = gomp_remove_var (devicep, k); 2091 1.1.1.8 mrg /* It would be bad if TGT got unmapped while we're still iterating 2092 1.1.1.8 mrg over its LIST_COUNT, and also expect to use it in the following 2093 1.1.1.8 mrg code. */ 2094 1.1.1.8 mrg assert (!is_tgt_unmapped 2095 1.1.1.8 mrg || k_tgt != tgt); 2096 1.1.1.8 mrg } 2097 1.1.1.2 mrg } 2098 1.1 mrg 2099 1.1.1.8 mrg if (aq) 2100 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 2101 1.1.1.8 mrg (void *) tgt); 2102 1.1 mrg else 2103 1.1.1.8 mrg gomp_unref_tgt ((void *) tgt); 2104 1.1 mrg 2105 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2106 1.1 mrg } 2107 1.1 mrg 2108 1.1.1.10 mrg static void 2109 1.1.1.10 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, 2110 1.1.1.10 mrg htab_t *refcount_set) 2111 1.1.1.8 mrg { 2112 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers 2113 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */ 2114 1.1.1.10 mrg htab_t local_refcount_set = NULL; 2115 1.1.1.10 mrg if (refcount_set == NULL) 2116 1.1.1.10 mrg { 2117 1.1.1.10 mrg local_refcount_set = htab_create (tgt->list_count); 2118 1.1.1.10 mrg refcount_set = &local_refcount_set; 2119 1.1.1.10 mrg } 2120 1.1.1.10 mrg 2121 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); 2122 1.1.1.10 mrg 2123 1.1.1.10 mrg if (local_refcount_set) 2124 1.1.1.10 mrg htab_free (local_refcount_set); 2125 1.1.1.8 mrg } 2126 1.1.1.8 mrg 2127 1.1.1.8 mrg attribute_hidden void 2128 1.1.1.10 mrg goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, 2129 1.1.1.10 mrg struct goacc_asyncqueue *aq) 2130 1.1.1.8 mrg { 2131 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); 2132 1.1.1.8 mrg } 2133 1.1.1.8 mrg 2134 1.1 mrg static void 2135 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 2136 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind) 2137 1.1 mrg { 2138 1.1 mrg size_t i; 2139 1.1 mrg struct splay_tree_key_s cur_node; 2140 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7; 2141 1.1 mrg 2142 1.1 mrg if (!devicep) 2143 1.1 mrg return; 2144 1.1 mrg 2145 1.1 mrg if (mapnum == 0) 2146 1.1 mrg return; 2147 1.1 mrg 2148 1.1 mrg gomp_mutex_lock (&devicep->lock); 2149 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED) 2150 1.1.1.2 mrg { 2151 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 2152 1.1.1.2 mrg return; 2153 1.1.1.2 mrg } 2154 1.1.1.2 mrg 2155 1.1 mrg for (i = 0; i < mapnum; i++) 2156 1.1 mrg if (sizes[i]) 2157 1.1 mrg { 2158 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 2159 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i]; 2160 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 2161 1.1 mrg if (n) 2162 1.1 mrg { 2163 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i); 2164 1.1 mrg if (n->host_start > cur_node.host_start 2165 1.1 mrg || n->host_end < cur_node.host_end) 2166 1.1 mrg { 2167 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2168 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when " 2169 1.1 mrg "only [%p..%p) is mapped", 2170 1.1 mrg (void *) cur_node.host_start, 2171 1.1 mrg (void *) cur_node.host_end, 2172 1.1 mrg (void *) n->host_start, 2173 1.1 mrg (void *) n->host_end); 2174 1.1 mrg } 2175 1.1.1.3 mrg 2176 1.1.1.10 mrg if (n->aux && n->aux->attach_count) 2177 1.1.1.10 mrg { 2178 1.1.1.10 mrg uintptr_t addr = cur_node.host_start; 2179 1.1.1.10 mrg while (addr < cur_node.host_end) 2180 1.1.1.10 mrg { 2181 1.1.1.10 mrg /* We have to be careful not to overwrite still attached 2182 1.1.1.10 mrg pointers during host<->device updates. */ 2183 1.1.1.10 mrg size_t i = (addr - cur_node.host_start) / sizeof (void *); 2184 1.1.1.10 mrg if (n->aux->attach_count[i] == 0) 2185 1.1.1.10 mrg { 2186 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start 2187 1.1.1.10 mrg + n->tgt_offset 2188 1.1.1.10 mrg + addr - n->host_start); 2189 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask)) 2190 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL, 2191 1.1.1.10 mrg devaddr, (void *) addr, 2192 1.1.1.10 mrg sizeof (void *), false, NULL); 2193 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 2194 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, 2195 1.1.1.10 mrg (void *) addr, devaddr, 2196 1.1.1.10 mrg sizeof (void *)); 2197 1.1.1.10 mrg } 2198 1.1.1.10 mrg addr += sizeof (void *); 2199 1.1.1.10 mrg } 2200 1.1.1.10 mrg } 2201 1.1.1.10 mrg else 2202 1.1.1.10 mrg { 2203 1.1.1.10 mrg void *hostaddr = (void *) cur_node.host_start; 2204 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset 2205 1.1.1.10 mrg + cur_node.host_start 2206 1.1.1.10 mrg - n->host_start); 2207 1.1.1.10 mrg size_t size = cur_node.host_end - cur_node.host_start; 2208 1.1.1.3 mrg 2209 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask)) 2210 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, 2211 1.1.1.10 mrg false, NULL); 2212 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 2213 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); 2214 1.1.1.10 mrg } 2215 1.1 mrg } 2216 1.1.1.11 mrg else 2217 1.1.1.11 mrg { 2218 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i); 2219 1.1.1.11 mrg 2220 1.1.1.11 mrg if (GOMP_MAP_PRESENT_P (kind)) 2221 1.1.1.11 mrg { 2222 1.1.1.11 mrg /* We already looked up the memory region above and it 2223 1.1.1.11 mrg was missing. */ 2224 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 2225 1.1.1.11 mrg #ifdef HAVE_INTTYPES_H 2226 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device " 2227 1.1.1.11 mrg "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), " 2228 1.1.1.11 mrg "dev: %d)", (void *) hostaddrs[i], 2229 1.1.1.11 mrg (uint64_t) sizes[i], (uint64_t) sizes[i], 2230 1.1.1.11 mrg devicep->target_id); 2231 1.1.1.11 mrg #else 2232 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device " 2233 1.1.1.11 mrg "(addr: %p, size: %lu (0x%lx), dev: %d)", 2234 1.1.1.11 mrg (void *) hostaddrs[i], (unsigned long) sizes[i], 2235 1.1.1.11 mrg (unsigned long) sizes[i], devicep->target_id); 2236 1.1.1.11 mrg #endif 2237 1.1.1.11 mrg } 2238 1.1.1.11 mrg } 2239 1.1 mrg } 2240 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2241 1.1 mrg } 2242 1.1 mrg 2243 1.1.1.11 mrg static struct gomp_offload_icv_list * 2244 1.1.1.11 mrg gomp_get_offload_icv_item (int dev_num) 2245 1.1.1.11 mrg { 2246 1.1.1.11 mrg struct gomp_offload_icv_list *l = gomp_offload_icv_list; 2247 1.1.1.11 mrg while (l != NULL && l->device_num != dev_num) 2248 1.1.1.11 mrg l = l->next; 2249 1.1.1.11 mrg 2250 1.1.1.11 mrg return l; 2251 1.1.1.11 mrg } 2252 1.1.1.11 mrg 2253 1.1.1.11 mrg /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values 2254 1.1.1.11 mrg depending on the device num and the variable hierarchy 2255 1.1.1.11 mrg (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given 2256 1.1.1.11 mrg device and thus no item with that device number is contained in 2257 1.1.1.11 mrg gomp_offload_icv_list, then a new item is created and added to the list. */ 2258 1.1.1.11 mrg 2259 1.1.1.11 mrg static struct gomp_offload_icvs * 2260 1.1.1.11 mrg get_gomp_offload_icvs (int dev_num) 2261 1.1.1.11 mrg { 2262 1.1.1.11 mrg struct gomp_icv_list *dev 2263 1.1.1.11 mrg = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV); 2264 1.1.1.11 mrg struct gomp_icv_list *all 2265 1.1.1.11 mrg = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL); 2266 1.1.1.11 mrg struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num); 2267 1.1.1.11 mrg struct gomp_offload_icv_list *offload_icvs 2268 1.1.1.11 mrg = gomp_get_offload_icv_item (dev_num); 2269 1.1.1.11 mrg 2270 1.1.1.11 mrg if (offload_icvs != NULL) 2271 1.1.1.11 mrg return &offload_icvs->icvs; 2272 1.1.1.11 mrg 2273 1.1.1.11 mrg struct gomp_offload_icv_list *new 2274 1.1.1.11 mrg = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list)); 2275 1.1.1.11 mrg 2276 1.1.1.11 mrg new->device_num = dev_num; 2277 1.1.1.11 mrg new->icvs.device_num = dev_num; 2278 1.1.1.11 mrg new->next = gomp_offload_icv_list; 2279 1.1.1.11 mrg 2280 1.1.1.11 mrg if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS)) 2281 1.1.1.11 mrg new->icvs.nteams = dev_x->icvs.nteams_var; 2282 1.1.1.11 mrg else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS)) 2283 1.1.1.11 mrg new->icvs.nteams = dev->icvs.nteams_var; 2284 1.1.1.11 mrg else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS)) 2285 1.1.1.11 mrg new->icvs.nteams = all->icvs.nteams_var; 2286 1.1.1.11 mrg else 2287 1.1.1.11 mrg new->icvs.nteams = gomp_default_icv_values.nteams_var; 2288 1.1.1.11 mrg 2289 1.1.1.11 mrg if (dev_x != NULL 2290 1.1.1.11 mrg && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) 2291 1.1.1.11 mrg new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var; 2292 1.1.1.11 mrg else if (dev != NULL 2293 1.1.1.11 mrg && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) 2294 1.1.1.11 mrg new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var; 2295 1.1.1.11 mrg else if (all != NULL 2296 1.1.1.11 mrg && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) 2297 1.1.1.11 mrg new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var; 2298 1.1.1.11 mrg else 2299 1.1.1.11 mrg new->icvs.teams_thread_limit 2300 1.1.1.11 mrg = gomp_default_icv_values.teams_thread_limit_var; 2301 1.1.1.11 mrg 2302 1.1.1.11 mrg if (dev_x != NULL 2303 1.1.1.11 mrg && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE)) 2304 1.1.1.11 mrg new->icvs.default_device = dev_x->icvs.default_device_var; 2305 1.1.1.11 mrg else if (dev != NULL 2306 1.1.1.11 mrg && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE)) 2307 1.1.1.11 mrg new->icvs.default_device = dev->icvs.default_device_var; 2308 1.1.1.11 mrg else if (all != NULL 2309 1.1.1.11 mrg && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE)) 2310 1.1.1.11 mrg new->icvs.default_device = all->icvs.default_device_var; 2311 1.1.1.11 mrg else 2312 1.1.1.11 mrg new->icvs.default_device = gomp_default_icv_values.default_device_var; 2313 1.1.1.11 mrg 2314 1.1.1.11 mrg gomp_offload_icv_list = new; 2315 1.1.1.11 mrg return &new->icvs; 2316 1.1.1.11 mrg } 2317 1.1.1.11 mrg 2318 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 2319 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and 2320 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler 2321 1.1.1.2 mrg emitting variable and functions in the same order. */ 2322 1.1 mrg 2323 1.1 mrg static void 2324 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, 2325 1.1.1.2 mrg const void *host_table, const void *target_data, 2326 1.1.1.2 mrg bool is_register_lock) 2327 1.1 mrg { 2328 1.1 mrg void **host_func_table = ((void ***) host_table)[0]; 2329 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1]; 2330 1.1 mrg void **host_var_table = ((void ***) host_table)[2]; 2331 1.1 mrg void **host_vars_end = ((void ***) host_table)[3]; 2332 1.1.1.11 mrg void **host_ind_func_table = NULL; 2333 1.1.1.11 mrg void **host_ind_funcs_end = NULL; 2334 1.1 mrg 2335 1.1.1.11 mrg if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)) 2336 1.1.1.11 mrg { 2337 1.1.1.11 mrg host_ind_func_table = ((void ***) host_table)[4]; 2338 1.1.1.11 mrg host_ind_funcs_end = ((void ***) host_table)[5]; 2339 1.1.1.11 mrg } 2340 1.1.1.11 mrg 2341 1.1.1.11 mrg /* The func and ind_func tables contain only addresses, the var table 2342 1.1.1.11 mrg contains addresses and corresponding sizes. */ 2343 1.1 mrg int num_funcs = host_funcs_end - host_func_table; 2344 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2; 2345 1.1.1.11 mrg int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table); 2346 1.1.1.10 mrg 2347 1.1 mrg /* Load image to device and get target addresses for the image. */ 2348 1.1 mrg struct addr_pair *target_table = NULL; 2349 1.1.1.11 mrg uint64_t *rev_target_fn_table = NULL; 2350 1.1.1.2 mrg int i, num_target_entries; 2351 1.1.1.2 mrg 2352 1.1.1.11 mrg /* With reverse offload, insert also target-host addresses. */ 2353 1.1.1.11 mrg bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD; 2354 1.1.1.11 mrg 2355 1.1.1.2 mrg num_target_entries 2356 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version, 2357 1.1.1.11 mrg target_data, &target_table, 2358 1.1.1.11 mrg rev_lookup ? &rev_target_fn_table : NULL, 2359 1.1.1.11 mrg num_ind_funcs 2360 1.1.1.11 mrg ? (uint64_t *) host_ind_func_table : NULL); 2361 1.1 mrg 2362 1.1.1.10 mrg if (num_target_entries != num_funcs + num_vars 2363 1.1.1.11 mrg /* "+1" due to the additional ICV struct. */ 2364 1.1.1.11 mrg && num_target_entries != num_funcs + num_vars + 1) 2365 1.1 mrg { 2366 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2367 1.1 mrg if (is_register_lock) 2368 1.1 mrg gomp_mutex_unlock (®ister_lock); 2369 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables" 2370 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars, 2371 1.1.1.2 mrg num_target_entries); 2372 1.1 mrg } 2373 1.1 mrg 2374 1.1 mrg /* Insert host-target address mapping into splay tree. */ 2375 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 2376 1.1.1.11 mrg /* "+1" due to the additional ICV struct. */ 2377 1.1.1.11 mrg tgt->array = gomp_malloc ((num_funcs + num_vars + 1) 2378 1.1.1.11 mrg * sizeof (*tgt->array)); 2379 1.1.1.11 mrg if (rev_target_fn_table) 2380 1.1.1.11 mrg tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array)); 2381 1.1.1.11 mrg else 2382 1.1.1.11 mrg tgt->rev_array = NULL; 2383 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY; 2384 1.1 mrg tgt->tgt_start = 0; 2385 1.1 mrg tgt->tgt_end = 0; 2386 1.1 mrg tgt->to_free = NULL; 2387 1.1 mrg tgt->prev = NULL; 2388 1.1 mrg tgt->list_count = 0; 2389 1.1 mrg tgt->device_descr = devicep; 2390 1.1 mrg splay_tree_node array = tgt->array; 2391 1.1.1.11 mrg reverse_splay_tree_node rev_array = tgt->rev_array; 2392 1.1 mrg 2393 1.1 mrg for (i = 0; i < num_funcs; i++) 2394 1.1 mrg { 2395 1.1 mrg splay_tree_key k = &array->key; 2396 1.1 mrg k->host_start = (uintptr_t) host_func_table[i]; 2397 1.1 mrg k->host_end = k->host_start + 1; 2398 1.1 mrg k->tgt = tgt; 2399 1.1 mrg k->tgt_offset = target_table[i].start; 2400 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY; 2401 1.1.1.7 mrg k->dynamic_refcount = 0; 2402 1.1.1.8 mrg k->aux = NULL; 2403 1.1 mrg array->left = NULL; 2404 1.1 mrg array->right = NULL; 2405 1.1 mrg splay_tree_insert (&devicep->mem_map, array); 2406 1.1.1.11 mrg if (rev_target_fn_table) 2407 1.1.1.11 mrg { 2408 1.1.1.11 mrg reverse_splay_tree_key k2 = &rev_array->key; 2409 1.1.1.11 mrg k2->dev = rev_target_fn_table[i]; 2410 1.1.1.11 mrg k2->k = k; 2411 1.1.1.11 mrg rev_array->left = NULL; 2412 1.1.1.11 mrg rev_array->right = NULL; 2413 1.1.1.11 mrg if (k2->dev != 0) 2414 1.1.1.11 mrg reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array); 2415 1.1.1.11 mrg rev_array++; 2416 1.1.1.11 mrg } 2417 1.1 mrg array++; 2418 1.1 mrg } 2419 1.1 mrg 2420 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks 2421 1.1.1.2 mrg "omp declare target link" variables. */ 2422 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 2423 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit; 2424 1.1.1.2 mrg 2425 1.1 mrg for (i = 0; i < num_vars; i++) 2426 1.1 mrg { 2427 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i]; 2428 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start; 2429 1.1.1.8 mrg bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; 2430 1.1.1.2 mrg 2431 1.1.1.8 mrg if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) 2432 1.1 mrg { 2433 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2434 1.1 mrg if (is_register_lock) 2435 1.1 mrg gomp_mutex_unlock (®ister_lock); 2436 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)"); 2437 1.1 mrg } 2438 1.1 mrg 2439 1.1 mrg splay_tree_key k = &array->key; 2440 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2]; 2441 1.1.1.2 mrg k->host_end 2442 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 2443 1.1 mrg k->tgt = tgt; 2444 1.1 mrg k->tgt_offset = target_var->start; 2445 1.1.1.8 mrg k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; 2446 1.1.1.7 mrg k->dynamic_refcount = 0; 2447 1.1.1.8 mrg k->aux = NULL; 2448 1.1 mrg array->left = NULL; 2449 1.1 mrg array->right = NULL; 2450 1.1 mrg splay_tree_insert (&devicep->mem_map, array); 2451 1.1 mrg array++; 2452 1.1 mrg } 2453 1.1 mrg 2454 1.1.1.11 mrg /* Last entry is for a ICVs variable. 2455 1.1.1.11 mrg Tolerate case where plugin does not return those entries. */ 2456 1.1.1.10 mrg if (num_funcs + num_vars < num_target_entries) 2457 1.1.1.10 mrg { 2458 1.1.1.11 mrg struct addr_pair *var = &target_table[num_funcs + num_vars]; 2459 1.1.1.11 mrg 2460 1.1.1.11 mrg /* Start address will be non-zero for the ICVs variable if 2461 1.1.1.11 mrg the variable was found in this image. */ 2462 1.1.1.11 mrg if (var->start != 0) 2463 1.1.1.10 mrg { 2464 1.1.1.10 mrg /* The index of the devicep within devices[] is regarded as its 2465 1.1.1.10 mrg 'device number', which is different from the per-device type 2466 1.1.1.10 mrg devicep->target_id. */ 2467 1.1.1.11 mrg int dev_num = (int) (devicep - &devices[0]); 2468 1.1.1.11 mrg struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num); 2469 1.1.1.11 mrg size_t var_size = var->end - var->start; 2470 1.1.1.11 mrg if (var_size != sizeof (struct gomp_offload_icvs)) 2471 1.1.1.10 mrg { 2472 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock); 2473 1.1.1.10 mrg if (is_register_lock) 2474 1.1.1.10 mrg gomp_mutex_unlock (®ister_lock); 2475 1.1.1.11 mrg gomp_fatal ("offload plugin managed 'icv struct' not of expected " 2476 1.1.1.10 mrg "format"); 2477 1.1.1.10 mrg } 2478 1.1.1.11 mrg /* Copy the ICVs variable to place on device memory, hereby 2479 1.1.1.11 mrg actually designating its device number into effect. */ 2480 1.1.1.11 mrg gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs, 2481 1.1.1.11 mrg var_size, false, NULL); 2482 1.1.1.11 mrg splay_tree_key k = &array->key; 2483 1.1.1.11 mrg k->host_start = (uintptr_t) icvs; 2484 1.1.1.11 mrg k->host_end = 2485 1.1.1.11 mrg k->host_start + (size_mask & sizeof (struct gomp_offload_icvs)); 2486 1.1.1.11 mrg k->tgt = tgt; 2487 1.1.1.11 mrg k->tgt_offset = var->start; 2488 1.1.1.11 mrg k->refcount = REFCOUNT_INFINITY; 2489 1.1.1.11 mrg k->dynamic_refcount = 0; 2490 1.1.1.11 mrg k->aux = NULL; 2491 1.1.1.11 mrg array->left = NULL; 2492 1.1.1.11 mrg array->right = NULL; 2493 1.1.1.11 mrg splay_tree_insert (&devicep->mem_map, array); 2494 1.1.1.11 mrg array++; 2495 1.1.1.10 mrg } 2496 1.1.1.10 mrg } 2497 1.1.1.10 mrg 2498 1.1 mrg free (target_table); 2499 1.1 mrg } 2500 1.1 mrg 2501 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P. 2502 1.1.1.2 mrg The device must be locked. */ 2503 1.1.1.2 mrg 2504 1.1.1.2 mrg static void 2505 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep, 2506 1.1.1.2 mrg unsigned version, 2507 1.1.1.2 mrg const void *host_table, const void *target_data) 2508 1.1.1.2 mrg { 2509 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0]; 2510 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1]; 2511 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2]; 2512 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3]; 2513 1.1.1.2 mrg 2514 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses 2515 1.1.1.2 mrg and corresponding sizes. */ 2516 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table; 2517 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2; 2518 1.1.1.2 mrg 2519 1.1.1.2 mrg struct splay_tree_key_s k; 2520 1.1.1.2 mrg splay_tree_key node = NULL; 2521 1.1.1.2 mrg 2522 1.1.1.2 mrg /* Find mapping at start of node array */ 2523 1.1.1.2 mrg if (num_funcs || num_vars) 2524 1.1.1.2 mrg { 2525 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] 2526 1.1.1.2 mrg : (uintptr_t) host_var_table[0]); 2527 1.1.1.2 mrg k.host_end = k.host_start + 1; 2528 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k); 2529 1.1.1.2 mrg } 2530 1.1.1.2 mrg 2531 1.1.1.3 mrg if (!devicep->unload_image_func (devicep->target_id, version, target_data)) 2532 1.1.1.3 mrg { 2533 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock); 2534 1.1.1.3 mrg gomp_fatal ("image unload fail"); 2535 1.1.1.3 mrg } 2536 1.1.1.11 mrg if (devicep->mem_map_rev.root) 2537 1.1.1.11 mrg { 2538 1.1.1.11 mrg /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only 2539 1.1.1.11 mrg real allocation. */ 2540 1.1.1.11 mrg assert (node && node->tgt && node->tgt->rev_array); 2541 1.1.1.11 mrg assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt); 2542 1.1.1.11 mrg free (node->tgt->rev_array); 2543 1.1.1.11 mrg devicep->mem_map_rev.root = NULL; 2544 1.1.1.11 mrg } 2545 1.1.1.2 mrg 2546 1.1.1.2 mrg /* Remove mappings from splay tree. */ 2547 1.1.1.2 mrg int i; 2548 1.1.1.2 mrg for (i = 0; i < num_funcs; i++) 2549 1.1.1.2 mrg { 2550 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i]; 2551 1.1.1.2 mrg k.host_end = k.host_start + 1; 2552 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k); 2553 1.1.1.2 mrg } 2554 1.1.1.2 mrg 2555 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks 2556 1.1.1.2 mrg "omp declare target link" variables. */ 2557 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 2558 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit; 2559 1.1.1.2 mrg bool is_tgt_unmapped = false; 2560 1.1.1.2 mrg 2561 1.1.1.2 mrg for (i = 0; i < num_vars; i++) 2562 1.1.1.2 mrg { 2563 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2]; 2564 1.1.1.2 mrg k.host_end 2565 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 2566 1.1.1.2 mrg 2567 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) 2568 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k); 2569 1.1.1.2 mrg else 2570 1.1.1.2 mrg { 2571 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); 2572 1.1.1.7 mrg is_tgt_unmapped = gomp_remove_var (devicep, n); 2573 1.1.1.2 mrg } 2574 1.1.1.2 mrg } 2575 1.1.1.2 mrg 2576 1.1.1.2 mrg if (node && !is_tgt_unmapped) 2577 1.1.1.2 mrg { 2578 1.1.1.2 mrg free (node->tgt); 2579 1.1.1.2 mrg free (node); 2580 1.1.1.2 mrg } 2581 1.1.1.2 mrg } 2582 1.1.1.2 mrg 2583 1.1.1.11 mrg static void 2584 1.1.1.11 mrg gomp_requires_to_name (char *buf, size_t size, int requires_mask) 2585 1.1.1.11 mrg { 2586 1.1.1.11 mrg char *end = buf + size, *p = buf; 2587 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS) 2588 1.1.1.11 mrg p += snprintf (p, end - p, "unified_address"); 2589 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY) 2590 1.1.1.11 mrg p += snprintf (p, end - p, "%sunified_shared_memory", 2591 1.1.1.11 mrg (p == buf ? "" : ", ")); 2592 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD) 2593 1.1.1.11 mrg p += snprintf (p, end - p, "%sreverse_offload", 2594 1.1.1.11 mrg (p == buf ? "" : ", ")); 2595 1.1.1.11 mrg } 2596 1.1.1.11 mrg 2597 1.1 mrg /* This function should be called from every offload image while loading. 2598 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 2599 1.1.1.11 mrg the target, and DATA. */ 2600 1.1 mrg 2601 1.1 mrg void 2602 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table, 2603 1.1.1.11 mrg int target_type, const void *data) 2604 1.1 mrg { 2605 1.1 mrg int i; 2606 1.1.1.2 mrg 2607 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 2608 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)", 2609 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version)); 2610 1.1.1.11 mrg 2611 1.1.1.11 mrg int omp_req; 2612 1.1.1.11 mrg const void *target_data; 2613 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > 1) 2614 1.1.1.11 mrg { 2615 1.1.1.11 mrg omp_req = (int) (size_t) ((void **) data)[0]; 2616 1.1.1.11 mrg target_data = &((void **) data)[1]; 2617 1.1.1.11 mrg } 2618 1.1.1.11 mrg else 2619 1.1.1.11 mrg { 2620 1.1.1.11 mrg omp_req = 0; 2621 1.1.1.11 mrg target_data = data; 2622 1.1.1.11 mrg } 2623 1.1.1.11 mrg 2624 1.1 mrg gomp_mutex_lock (®ister_lock); 2625 1.1 mrg 2626 1.1.1.11 mrg if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) 2627 1.1.1.11 mrg { 2628 1.1.1.11 mrg char buf1[sizeof ("unified_address, unified_shared_memory, " 2629 1.1.1.11 mrg "reverse_offload")]; 2630 1.1.1.11 mrg char buf2[sizeof ("unified_address, unified_shared_memory, " 2631 1.1.1.11 mrg "reverse_offload")]; 2632 1.1.1.11 mrg gomp_requires_to_name (buf2, sizeof (buf2), 2633 1.1.1.11 mrg omp_req != GOMP_REQUIRES_TARGET_USED 2634 1.1.1.11 mrg ? omp_req : omp_requires_mask); 2635 1.1.1.11 mrg if (omp_req != GOMP_REQUIRES_TARGET_USED 2636 1.1.1.11 mrg && omp_requires_mask != GOMP_REQUIRES_TARGET_USED) 2637 1.1.1.11 mrg { 2638 1.1.1.11 mrg gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask); 2639 1.1.1.11 mrg gomp_fatal ("OpenMP 'requires' directive with non-identical clauses " 2640 1.1.1.11 mrg "in multiple compilation units: '%s' vs. '%s'", 2641 1.1.1.11 mrg buf1, buf2); 2642 1.1.1.11 mrg } 2643 1.1.1.11 mrg else 2644 1.1.1.11 mrg gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in " 2645 1.1.1.11 mrg "some compilation units", buf2); 2646 1.1.1.11 mrg } 2647 1.1.1.11 mrg omp_requires_mask = omp_req; 2648 1.1.1.11 mrg 2649 1.1 mrg /* Load image to all initialized devices. */ 2650 1.1 mrg for (i = 0; i < num_devices; i++) 2651 1.1 mrg { 2652 1.1 mrg struct gomp_device_descr *devicep = &devices[i]; 2653 1.1 mrg gomp_mutex_lock (&devicep->lock); 2654 1.1.1.2 mrg if (devicep->type == target_type 2655 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED) 2656 1.1.1.2 mrg gomp_load_image_to_device (devicep, version, 2657 1.1.1.2 mrg host_table, target_data, true); 2658 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2659 1.1 mrg } 2660 1.1 mrg 2661 1.1 mrg /* Insert image to array of pending images. */ 2662 1.1 mrg offload_images 2663 1.1 mrg = gomp_realloc_unlock (offload_images, 2664 1.1 mrg (num_offload_images + 1) 2665 1.1 mrg * sizeof (struct offload_image_descr)); 2666 1.1.1.2 mrg offload_images[num_offload_images].version = version; 2667 1.1 mrg offload_images[num_offload_images].type = target_type; 2668 1.1 mrg offload_images[num_offload_images].host_table = host_table; 2669 1.1 mrg offload_images[num_offload_images].target_data = target_data; 2670 1.1 mrg 2671 1.1 mrg num_offload_images++; 2672 1.1 mrg gomp_mutex_unlock (®ister_lock); 2673 1.1 mrg } 2674 1.1 mrg 2675 1.1.1.11 mrg /* Legacy entry point. */ 2676 1.1.1.11 mrg 2677 1.1.1.2 mrg void 2678 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type, 2679 1.1.1.2 mrg const void *target_data) 2680 1.1.1.2 mrg { 2681 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data); 2682 1.1.1.2 mrg } 2683 1.1.1.2 mrg 2684 1.1 mrg /* This function should be called from every offload image while unloading. 2685 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 2686 1.1.1.11 mrg the target, and DATA. */ 2687 1.1 mrg 2688 1.1 mrg void 2689 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table, 2690 1.1.1.11 mrg int target_type, const void *data) 2691 1.1 mrg { 2692 1.1 mrg int i; 2693 1.1 mrg 2694 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 2695 1.1.1.11 mrg gomp_fatal ("Library too old for offload (version %u < %u)", 2696 1.1.1.11 mrg GOMP_VERSION, GOMP_VERSION_LIB (version)); 2697 1.1.1.11 mrg 2698 1.1.1.11 mrg const void *target_data; 2699 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > 1) 2700 1.1.1.11 mrg target_data = &((void **) data)[1]; 2701 1.1.1.11 mrg else 2702 1.1.1.11 mrg target_data = data; 2703 1.1.1.11 mrg 2704 1.1 mrg gomp_mutex_lock (®ister_lock); 2705 1.1 mrg 2706 1.1 mrg /* Unload image from all initialized devices. */ 2707 1.1 mrg for (i = 0; i < num_devices; i++) 2708 1.1 mrg { 2709 1.1 mrg struct gomp_device_descr *devicep = &devices[i]; 2710 1.1 mrg gomp_mutex_lock (&devicep->lock); 2711 1.1.1.2 mrg if (devicep->type == target_type 2712 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED) 2713 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version, 2714 1.1.1.2 mrg host_table, target_data); 2715 1.1 mrg gomp_mutex_unlock (&devicep->lock); 2716 1.1 mrg } 2717 1.1 mrg 2718 1.1 mrg /* Remove image from array of pending images. */ 2719 1.1 mrg for (i = 0; i < num_offload_images; i++) 2720 1.1 mrg if (offload_images[i].target_data == target_data) 2721 1.1 mrg { 2722 1.1 mrg offload_images[i] = offload_images[--num_offload_images]; 2723 1.1 mrg break; 2724 1.1 mrg } 2725 1.1 mrg 2726 1.1 mrg gomp_mutex_unlock (®ister_lock); 2727 1.1 mrg } 2728 1.1 mrg 2729 1.1.1.11 mrg /* Legacy entry point. */ 2730 1.1.1.11 mrg 2731 1.1.1.2 mrg void 2732 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type, 2733 1.1.1.2 mrg const void *target_data) 2734 1.1.1.2 mrg { 2735 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data); 2736 1.1.1.2 mrg } 2737 1.1.1.2 mrg 2738 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP 2739 1.1 mrg must be locked on entry, and remains locked on return. */ 2740 1.1 mrg 2741 1.1 mrg attribute_hidden void 2742 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep) 2743 1.1 mrg { 2744 1.1 mrg int i; 2745 1.1.1.3 mrg if (!devicep->init_device_func (devicep->target_id)) 2746 1.1.1.3 mrg { 2747 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock); 2748 1.1.1.3 mrg gomp_fatal ("device initialization failed"); 2749 1.1.1.3 mrg } 2750 1.1 mrg 2751 1.1 mrg /* Load to device all images registered by the moment. */ 2752 1.1 mrg for (i = 0; i < num_offload_images; i++) 2753 1.1 mrg { 2754 1.1 mrg struct offload_image_descr *image = &offload_images[i]; 2755 1.1 mrg if (image->type == devicep->type) 2756 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version, 2757 1.1.1.2 mrg image->host_table, image->target_data, 2758 1.1.1.2 mrg false); 2759 1.1 mrg } 2760 1.1 mrg 2761 1.1.1.8 mrg /* Initialize OpenACC asynchronous queues. */ 2762 1.1.1.8 mrg goacc_init_asyncqueues (devicep); 2763 1.1.1.8 mrg 2764 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED; 2765 1.1.1.2 mrg } 2766 1.1.1.2 mrg 2767 1.1.1.8 mrg /* This function finalizes the target device, specified by DEVICEP. DEVICEP 2768 1.1.1.8 mrg must be locked on entry, and remains locked on return. */ 2769 1.1.1.8 mrg 2770 1.1.1.8 mrg attribute_hidden bool 2771 1.1.1.8 mrg gomp_fini_device (struct gomp_device_descr *devicep) 2772 1.1.1.8 mrg { 2773 1.1.1.8 mrg bool ret = goacc_fini_asyncqueues (devicep); 2774 1.1.1.8 mrg ret &= devicep->fini_device_func (devicep->target_id); 2775 1.1.1.8 mrg devicep->state = GOMP_DEVICE_FINALIZED; 2776 1.1.1.8 mrg return ret; 2777 1.1.1.8 mrg } 2778 1.1.1.8 mrg 2779 1.1.1.2 mrg attribute_hidden void 2780 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep) 2781 1.1.1.2 mrg { 2782 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED) 2783 1.1.1.2 mrg { 2784 1.1.1.2 mrg unsigned i; 2785 1.1.1.11 mrg 2786 1.1.1.2 mrg /* Unload from device all images registered at the moment. */ 2787 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++) 2788 1.1.1.2 mrg { 2789 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i]; 2790 1.1.1.2 mrg if (image->type == devicep->type) 2791 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version, 2792 1.1.1.2 mrg image->host_table, 2793 1.1.1.2 mrg image->target_data); 2794 1.1.1.2 mrg } 2795 1.1.1.2 mrg } 2796 1.1 mrg } 2797 1.1 mrg 2798 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */ 2799 1.1 mrg 2800 1.1.1.2 mrg static void 2801 1.1.1.10 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs, 2802 1.1.1.10 mrg struct gomp_device_descr *devicep, void **args) 2803 1.1 mrg { 2804 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread (); 2805 1.1.1.10 mrg 2806 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 2807 1.1.1.10 mrg && devicep != NULL) 2808 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " 2809 1.1.1.10 mrg "be used for offloading"); 2810 1.1.1.10 mrg 2811 1.1.1.2 mrg old_thr = *thr; 2812 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr)); 2813 1.1.1.2 mrg if (gomp_places_list) 2814 1.1.1.2 mrg { 2815 1.1.1.2 mrg thr->place = old_thr.place; 2816 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len; 2817 1.1.1.2 mrg } 2818 1.1.1.10 mrg if (args) 2819 1.1.1.10 mrg while (*args) 2820 1.1.1.10 mrg { 2821 1.1.1.10 mrg intptr_t id = (intptr_t) *args++, val; 2822 1.1.1.10 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 2823 1.1.1.10 mrg val = (intptr_t) *args++; 2824 1.1.1.10 mrg else 2825 1.1.1.10 mrg val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; 2826 1.1.1.10 mrg if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) 2827 1.1.1.10 mrg continue; 2828 1.1.1.10 mrg id &= GOMP_TARGET_ARG_ID_MASK; 2829 1.1.1.10 mrg if (id != GOMP_TARGET_ARG_THREAD_LIMIT) 2830 1.1.1.10 mrg continue; 2831 1.1.1.10 mrg val = val > INT_MAX ? INT_MAX : val; 2832 1.1.1.10 mrg if (val) 2833 1.1.1.10 mrg gomp_icv (true)->thread_limit_var = val; 2834 1.1.1.10 mrg break; 2835 1.1.1.10 mrg } 2836 1.1.1.10 mrg 2837 1.1.1.2 mrg fn (hostaddrs); 2838 1.1.1.2 mrg gomp_free_thread (thr); 2839 1.1.1.2 mrg *thr = old_thr; 2840 1.1 mrg } 2841 1.1 mrg 2842 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared 2843 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ 2844 1.1.1.2 mrg 2845 1.1.1.2 mrg static inline void 2846 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, 2847 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align, 2848 1.1.1.2 mrg size_t *tgt_size) 2849 1.1.1.2 mrg { 2850 1.1.1.2 mrg size_t i; 2851 1.1.1.2 mrg for (i = 0; i < mapnum; i++) 2852 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 2853 1.1.1.2 mrg { 2854 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 2855 1.1.1.2 mrg if (*tgt_align < align) 2856 1.1.1.2 mrg *tgt_align = align; 2857 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1); 2858 1.1.1.2 mrg *tgt_size += sizes[i]; 2859 1.1.1.2 mrg } 2860 1.1.1.2 mrg } 2861 1.1.1.2 mrg 2862 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ 2863 1.1.1.2 mrg 2864 1.1.1.2 mrg static inline void 2865 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, 2866 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align, 2867 1.1.1.2 mrg size_t tgt_size) 2868 1.1.1.2 mrg { 2869 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 2870 1.1.1.2 mrg if (al) 2871 1.1.1.2 mrg tgt += tgt_align - al; 2872 1.1.1.2 mrg tgt_size = 0; 2873 1.1.1.2 mrg size_t i; 2874 1.1.1.2 mrg for (i = 0; i < mapnum; i++) 2875 1.1.1.10 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL) 2876 1.1.1.2 mrg { 2877 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 2878 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 2879 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 2880 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size; 2881 1.1.1.2 mrg tgt_size = tgt_size + sizes[i]; 2882 1.1.1.11 mrg if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH) 2883 1.1.1.11 mrg { 2884 1.1.1.11 mrg *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i]; 2885 1.1.1.11 mrg ++i; 2886 1.1.1.11 mrg } 2887 1.1.1.2 mrg } 2888 1.1.1.2 mrg } 2889 1.1.1.2 mrg 2890 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */ 2891 1.1.1.2 mrg 2892 1.1.1.2 mrg static void * 2893 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep, 2894 1.1.1.2 mrg void (*host_fn) (void *)) 2895 1.1.1.2 mrg { 2896 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 2897 1.1.1.2 mrg return (void *) host_fn; 2898 1.1.1.2 mrg else 2899 1.1.1.2 mrg { 2900 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 2901 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED) 2902 1.1.1.2 mrg { 2903 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 2904 1.1.1.2 mrg return NULL; 2905 1.1.1.2 mrg } 2906 1.1.1.2 mrg 2907 1.1.1.2 mrg struct splay_tree_key_s k; 2908 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn; 2909 1.1.1.2 mrg k.host_end = k.host_start + 1; 2910 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 2911 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 2912 1.1.1.2 mrg if (tgt_fn == NULL) 2913 1.1.1.2 mrg return NULL; 2914 1.1.1.2 mrg 2915 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset; 2916 1.1.1.2 mrg } 2917 1.1.1.2 mrg } 2918 1.1.1.2 mrg 2919 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE 2920 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 2921 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value 2922 1.1 mrg larger than last available hw device), use host fallback. 2923 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but 2924 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 2925 1.1 mrg with MAPNUM entries, with addresses of the host objects, 2926 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias 2927 1.1 mrg and assumed sizeof (void *) size) and kinds. */ 2928 1.1 mrg 2929 1.1 mrg void 2930 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused, 2931 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes, 2932 1.1 mrg unsigned char *kinds) 2933 1.1 mrg { 2934 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 2935 1.1 mrg 2936 1.1.1.2 mrg void *fn_addr; 2937 1.1 mrg if (devicep == NULL 2938 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2939 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */ 2940 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM 2941 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) 2942 1.1.1.10 mrg return gomp_target_fallback (fn, hostaddrs, devicep, NULL); 2943 1.1.1.2 mrg 2944 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum); 2945 1.1.1.2 mrg struct target_mem_desc *tgt_vars 2946 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 2947 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_TARGET); 2948 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, 2949 1.1.1.2 mrg NULL); 2950 1.1.1.10 mrg htab_clear (refcount_set); 2951 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set); 2952 1.1.1.10 mrg htab_free (refcount_set); 2953 1.1.1.2 mrg } 2954 1.1.1.2 mrg 2955 1.1.1.8 mrg static inline unsigned int 2956 1.1.1.8 mrg clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) 2957 1.1.1.8 mrg { 2958 1.1.1.8 mrg /* If we cannot run asynchronously, simply ignore nowait. */ 2959 1.1.1.8 mrg if (devicep != NULL && devicep->async_run_func == NULL) 2960 1.1.1.8 mrg flags &= ~GOMP_TARGET_FLAG_NOWAIT; 2961 1.1.1.8 mrg 2962 1.1.1.8 mrg return flags; 2963 1.1.1.8 mrg } 2964 1.1.1.8 mrg 2965 1.1.1.11 mrg static void 2966 1.1.1.11 mrg gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device) 2967 1.1.1.11 mrg { 2968 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); 2969 1.1.1.11 mrg if (item == NULL) 2970 1.1.1.11 mrg return; 2971 1.1.1.11 mrg 2972 1.1.1.11 mrg void *host_ptr = &item->icvs; 2973 1.1.1.11 mrg void *dev_ptr = omp_get_mapped_ptr (host_ptr, device); 2974 1.1.1.11 mrg if (dev_ptr != NULL) 2975 1.1.1.11 mrg gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr, 2976 1.1.1.11 mrg sizeof (struct gomp_offload_icvs)); 2977 1.1.1.11 mrg } 2978 1.1.1.11 mrg 2979 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, 2980 1.1.1.2 mrg and several arguments have been added: 2981 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. 2982 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details. 2983 1.1.1.2 mrg 2984 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both 2985 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two 2986 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type 2987 1.1.1.2 mrg and optionally also the value. If the value is not present in the first 2988 1.1.1.2 mrg one, the whole second element the actual value. The last element of the 2989 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example 2990 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT. 2991 1.1.1.2 mrg 2992 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with 2993 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if 2994 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is 2995 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host 2996 1.1.1.2 mrg what value will GOMP_teams have on the device. 2997 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the 2998 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit 2999 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be 3000 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */ 3001 1.1.1.2 mrg 3002 1.1.1.2 mrg void 3003 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 3004 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds, 3005 1.1.1.2 mrg unsigned int flags, void **depend, void **args) 3006 1.1.1.2 mrg { 3007 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 3008 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0; 3009 1.1.1.2 mrg bool fpc_done = false; 3010 1.1.1.2 mrg 3011 1.1.1.11 mrg /* Obtain the original TEAMS and THREADS values from ARGS. */ 3012 1.1.1.11 mrg intptr_t orig_teams = 1, orig_threads = 0; 3013 1.1.1.11 mrg size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1; 3014 1.1.1.11 mrg void **tmpargs = args; 3015 1.1.1.11 mrg while (*tmpargs) 3016 1.1.1.11 mrg { 3017 1.1.1.11 mrg intptr_t id = (intptr_t) *tmpargs++, val; 3018 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 3019 1.1.1.11 mrg { 3020 1.1.1.11 mrg val = (intptr_t) *tmpargs++; 3021 1.1.1.11 mrg len = 2; 3022 1.1.1.11 mrg } 3023 1.1.1.11 mrg else 3024 1.1.1.11 mrg { 3025 1.1.1.11 mrg val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; 3026 1.1.1.11 mrg len = 1; 3027 1.1.1.11 mrg } 3028 1.1.1.11 mrg num_args += len; 3029 1.1.1.11 mrg if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) 3030 1.1.1.11 mrg continue; 3031 1.1.1.11 mrg val = val > INT_MAX ? INT_MAX : val; 3032 1.1.1.11 mrg if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS) 3033 1.1.1.11 mrg { 3034 1.1.1.11 mrg orig_teams = val; 3035 1.1.1.11 mrg teams_len = len; 3036 1.1.1.11 mrg } 3037 1.1.1.11 mrg else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT) 3038 1.1.1.11 mrg { 3039 1.1.1.11 mrg orig_threads = val; 3040 1.1.1.11 mrg threads_len = len; 3041 1.1.1.11 mrg } 3042 1.1.1.11 mrg } 3043 1.1.1.11 mrg 3044 1.1.1.11 mrg intptr_t new_teams = orig_teams, new_threads = orig_threads; 3045 1.1.1.11 mrg /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1. 3046 1.1.1.11 mrg ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the 3047 1.1.1.11 mrg value could not be determined. No change. 3048 1.1.1.11 mrg ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause. 3049 1.1.1.11 mrg Set device-specific value. 3050 1.1.1.11 mrg ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause. 3051 1.1.1.11 mrg No change. */ 3052 1.1.1.11 mrg if (orig_teams == -2) 3053 1.1.1.11 mrg new_teams = 1; 3054 1.1.1.11 mrg else if (orig_teams == 0) 3055 1.1.1.11 mrg { 3056 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); 3057 1.1.1.11 mrg if (item != NULL) 3058 1.1.1.11 mrg new_teams = item->icvs.nteams; 3059 1.1.1.11 mrg } 3060 1.1.1.11 mrg /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS 3061 1.1.1.11 mrg region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by 3062 1.1.1.11 mrg e.g. a THREAD_LIMIT clause. */ 3063 1.1.1.11 mrg if (orig_teams > -2 && orig_threads == 0) 3064 1.1.1.11 mrg { 3065 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); 3066 1.1.1.11 mrg if (item != NULL) 3067 1.1.1.11 mrg new_threads = item->icvs.teams_thread_limit; 3068 1.1.1.11 mrg } 3069 1.1.1.11 mrg 3070 1.1.1.11 mrg /* Copy and change the arguments list only if TEAMS or THREADS need to be 3071 1.1.1.11 mrg updated. */ 3072 1.1.1.11 mrg void **new_args = args; 3073 1.1.1.11 mrg if (orig_teams != new_teams || orig_threads != new_threads) 3074 1.1.1.11 mrg { 3075 1.1.1.11 mrg size_t tms_len = (orig_teams == new_teams 3076 1.1.1.11 mrg ? teams_len 3077 1.1.1.11 mrg : (new_teams > -(1 << 15) && new_teams < (1 << 15) 3078 1.1.1.11 mrg ? 1 : 2)); 3079 1.1.1.11 mrg size_t ths_len = (orig_threads == new_threads 3080 1.1.1.11 mrg ? threads_len 3081 1.1.1.11 mrg : (new_threads > -(1 << 15) && new_threads < (1 << 15) 3082 1.1.1.11 mrg ? 1 : 2)); 3083 1.1.1.11 mrg /* One additional item after the last arg must be NULL. */ 3084 1.1.1.11 mrg size_t new_args_cnt = num_args - teams_len - threads_len + tms_len 3085 1.1.1.11 mrg + ths_len + 1; 3086 1.1.1.11 mrg new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*)); 3087 1.1.1.11 mrg 3088 1.1.1.11 mrg tmpargs = args; 3089 1.1.1.11 mrg void **tmp_new_args = new_args; 3090 1.1.1.11 mrg /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied 3091 1.1.1.11 mrg too if they have not been changed and skipped otherwise. */ 3092 1.1.1.11 mrg while (*tmpargs) 3093 1.1.1.11 mrg { 3094 1.1.1.11 mrg intptr_t id = (intptr_t) *tmpargs; 3095 1.1.1.11 mrg if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS 3096 1.1.1.11 mrg && orig_teams != new_teams) 3097 1.1.1.11 mrg || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT 3098 1.1.1.11 mrg && orig_threads != new_threads)) 3099 1.1.1.11 mrg { 3100 1.1.1.11 mrg tmpargs++; 3101 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 3102 1.1.1.11 mrg tmpargs++; 3103 1.1.1.11 mrg } 3104 1.1.1.11 mrg else 3105 1.1.1.11 mrg { 3106 1.1.1.11 mrg *tmp_new_args++ = *tmpargs++; 3107 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 3108 1.1.1.11 mrg *tmp_new_args++ = *tmpargs++; 3109 1.1.1.11 mrg } 3110 1.1.1.11 mrg } 3111 1.1.1.11 mrg 3112 1.1.1.11 mrg /* Add the new TEAMS arg to the new args list if it has been changed. */ 3113 1.1.1.11 mrg if (orig_teams != new_teams) 3114 1.1.1.11 mrg { 3115 1.1.1.11 mrg intptr_t new_val = new_teams; 3116 1.1.1.11 mrg if (tms_len == 1) 3117 1.1.1.11 mrg { 3118 1.1.1.11 mrg new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) 3119 1.1.1.11 mrg | GOMP_TARGET_ARG_NUM_TEAMS; 3120 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val; 3121 1.1.1.11 mrg } 3122 1.1.1.11 mrg else 3123 1.1.1.11 mrg { 3124 1.1.1.11 mrg *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM 3125 1.1.1.11 mrg | GOMP_TARGET_ARG_NUM_TEAMS); 3126 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val; 3127 1.1.1.11 mrg } 3128 1.1.1.11 mrg } 3129 1.1.1.11 mrg 3130 1.1.1.11 mrg /* Add the new THREADS arg to the new args list if it has been changed. */ 3131 1.1.1.11 mrg if (orig_threads != new_threads) 3132 1.1.1.11 mrg { 3133 1.1.1.11 mrg intptr_t new_val = new_threads; 3134 1.1.1.11 mrg if (ths_len == 1) 3135 1.1.1.11 mrg { 3136 1.1.1.11 mrg new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) 3137 1.1.1.11 mrg | GOMP_TARGET_ARG_THREAD_LIMIT; 3138 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val; 3139 1.1.1.11 mrg } 3140 1.1.1.11 mrg else 3141 1.1.1.11 mrg { 3142 1.1.1.11 mrg *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM 3143 1.1.1.11 mrg | GOMP_TARGET_ARG_THREAD_LIMIT); 3144 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val; 3145 1.1.1.11 mrg } 3146 1.1.1.11 mrg } 3147 1.1.1.11 mrg 3148 1.1.1.11 mrg *tmp_new_args = NULL; 3149 1.1.1.11 mrg } 3150 1.1.1.11 mrg 3151 1.1.1.8 mrg flags = clear_unsupported_flags (devicep, flags); 3152 1.1.1.8 mrg 3153 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT) 3154 1.1 mrg { 3155 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 3156 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait 3157 1.1.1.2 mrg target tasks make sense to run asynchronously even when 3158 1.1.1.2 mrg outside of any parallel. */ 3159 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0)) 3160 1.1.1.2 mrg { 3161 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1); 3162 1.1.1.2 mrg struct gomp_task *task = thr->task; 3163 1.1.1.9 mrg struct gomp_task **implicit_task = &task; 3164 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; 3165 1.1.1.2 mrg team->prev_ts = thr->ts; 3166 1.1.1.2 mrg thr->ts.team = team; 3167 1.1.1.2 mrg thr->ts.team_id = 0; 3168 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0]; 3169 1.1.1.2 mrg thr->ts.last_work_share = NULL; 3170 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS 3171 1.1.1.2 mrg thr->ts.single_count = 0; 3172 1.1.1.2 mrg #endif 3173 1.1.1.2 mrg thr->ts.static_trip = 0; 3174 1.1.1.2 mrg thr->task = &team->implicit_task[0]; 3175 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv); 3176 1.1.1.9 mrg while (*implicit_task 3177 1.1.1.9 mrg && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) 3178 1.1.1.9 mrg implicit_task = &(*implicit_task)->parent; 3179 1.1.1.9 mrg if (*implicit_task) 3180 1.1.1.2 mrg { 3181 1.1.1.9 mrg thr->task = *implicit_task; 3182 1.1.1.2 mrg gomp_end_task (); 3183 1.1.1.9 mrg free (*implicit_task); 3184 1.1.1.2 mrg thr->task = &team->implicit_task[0]; 3185 1.1.1.2 mrg } 3186 1.1.1.2 mrg else 3187 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr); 3188 1.1.1.9 mrg if (implicit_task != &task) 3189 1.1.1.9 mrg { 3190 1.1.1.9 mrg *implicit_task = thr->task; 3191 1.1.1.9 mrg thr->task = task; 3192 1.1.1.9 mrg } 3193 1.1.1.2 mrg } 3194 1.1.1.2 mrg if (thr->ts.team 3195 1.1.1.2 mrg && !thr->task->final_task) 3196 1.1.1.2 mrg { 3197 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs, 3198 1.1.1.11 mrg sizes, kinds, flags, depend, new_args, 3199 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP); 3200 1.1.1.2 mrg return; 3201 1.1.1.2 mrg } 3202 1.1 mrg } 3203 1.1 mrg 3204 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present 3205 1.1.1.2 mrg (or we are in a final task), block the parent task until the 3206 1.1.1.2 mrg dependencies are resolved and then just continue with the rest 3207 1.1.1.2 mrg of the function as if it is a merged task. */ 3208 1.1.1.2 mrg if (depend != NULL) 3209 1.1.1.2 mrg { 3210 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 3211 1.1.1.2 mrg if (thr->task && thr->task->depend_hash) 3212 1.1.1.2 mrg { 3213 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */ 3214 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds, 3215 1.1.1.2 mrg &tgt_align, &tgt_size); 3216 1.1.1.2 mrg if (tgt_align) 3217 1.1.1.2 mrg { 3218 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 3219 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 3220 1.1.1.2 mrg tgt_align, tgt_size); 3221 1.1.1.2 mrg } 3222 1.1.1.2 mrg fpc_done = true; 3223 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend); 3224 1.1.1.2 mrg } 3225 1.1.1.2 mrg } 3226 1.1 mrg 3227 1.1 mrg void *fn_addr; 3228 1.1.1.2 mrg if (devicep == NULL 3229 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3230 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) 3231 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 3232 1.1 mrg { 3233 1.1.1.2 mrg if (!fpc_done) 3234 1.1 mrg { 3235 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds, 3236 1.1.1.2 mrg &tgt_align, &tgt_size); 3237 1.1.1.2 mrg if (tgt_align) 3238 1.1.1.2 mrg { 3239 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 3240 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 3241 1.1.1.2 mrg tgt_align, tgt_size); 3242 1.1.1.2 mrg } 3243 1.1 mrg } 3244 1.1.1.11 mrg gomp_target_fallback (fn, hostaddrs, devicep, new_args); 3245 1.1.1.2 mrg return; 3246 1.1.1.2 mrg } 3247 1.1 mrg 3248 1.1.1.2 mrg struct target_mem_desc *tgt_vars; 3249 1.1.1.10 mrg htab_t refcount_set = NULL; 3250 1.1.1.10 mrg 3251 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3252 1.1.1.2 mrg { 3253 1.1.1.2 mrg if (!fpc_done) 3254 1.1.1.2 mrg { 3255 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds, 3256 1.1.1.2 mrg &tgt_align, &tgt_size); 3257 1.1.1.2 mrg if (tgt_align) 3258 1.1.1.2 mrg { 3259 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 3260 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 3261 1.1.1.2 mrg tgt_align, tgt_size); 3262 1.1.1.2 mrg } 3263 1.1.1.2 mrg } 3264 1.1.1.2 mrg tgt_vars = NULL; 3265 1.1 mrg } 3266 1.1.1.2 mrg else 3267 1.1.1.10 mrg { 3268 1.1.1.10 mrg refcount_set = htab_create (mapnum); 3269 1.1.1.10 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, 3270 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_TARGET); 3271 1.1.1.10 mrg } 3272 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, 3273 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, 3274 1.1.1.11 mrg new_args); 3275 1.1.1.2 mrg if (tgt_vars) 3276 1.1.1.10 mrg { 3277 1.1.1.10 mrg htab_clear (refcount_set); 3278 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set); 3279 1.1.1.10 mrg } 3280 1.1.1.10 mrg if (refcount_set) 3281 1.1.1.10 mrg htab_free (refcount_set); 3282 1.1.1.11 mrg 3283 1.1.1.11 mrg /* Copy back ICVs from device to host. 3284 1.1.1.11 mrg HOST_PTR is expected to exist since it was added in 3285 1.1.1.11 mrg gomp_load_image_to_device if not already available. */ 3286 1.1.1.11 mrg gomp_copy_back_icvs (devicep, device); 3287 1.1.1.11 mrg 3288 1.1.1.2 mrg } 3289 1.1 mrg 3290 1.1.1.2 mrg 3291 1.1.1.11 mrg /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid 3292 1.1.1.11 mrg keeping track of all variable handling - assuming that reverse offload occurs 3293 1.1.1.11 mrg ony very rarely. Downside is that the reverse search is slow. */ 3294 1.1.1.11 mrg 3295 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data { 3296 1.1.1.11 mrg uintptr_t tgt_start; 3297 1.1.1.11 mrg uintptr_t tgt_end; 3298 1.1.1.11 mrg splay_tree_key key; 3299 1.1.1.11 mrg }; 3300 1.1.1.11 mrg 3301 1.1.1.11 mrg static int 3302 1.1.1.11 mrg gomp_splay_tree_rev_lookup (splay_tree_key key, void *d) 3303 1.1.1.2 mrg { 3304 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data *data; 3305 1.1.1.11 mrg data = (struct gomp_splay_tree_rev_lookup_data *)d; 3306 1.1.1.11 mrg uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset; 3307 1.1.1.10 mrg 3308 1.1.1.11 mrg if (tgt_start > data->tgt_start || key->tgt->list_count == 0) 3309 1.1.1.11 mrg return 0; 3310 1.1.1.10 mrg 3311 1.1.1.11 mrg size_t j; 3312 1.1.1.11 mrg for (j = 0; j < key->tgt->list_count; j++) 3313 1.1.1.11 mrg if (key->tgt->list[j].key == key) 3314 1.1.1.11 mrg break; 3315 1.1.1.11 mrg assert (j < key->tgt->list_count); 3316 1.1.1.11 mrg uintptr_t tgt_end = tgt_start + key->tgt->list[j].length; 3317 1.1.1.11 mrg 3318 1.1.1.11 mrg if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end) 3319 1.1.1.11 mrg || (tgt_end > data->tgt_start && tgt_start < data->tgt_end)) 3320 1.1 mrg { 3321 1.1.1.11 mrg data->key = key; 3322 1.1.1.11 mrg return 1; 3323 1.1 mrg } 3324 1.1.1.11 mrg return 0; 3325 1.1 mrg } 3326 1.1 mrg 3327 1.1.1.11 mrg static inline splay_tree_key 3328 1.1.1.11 mrg gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end, 3329 1.1.1.11 mrg bool zero_len) 3330 1.1 mrg { 3331 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data data; 3332 1.1.1.11 mrg data.key = NULL; 3333 1.1.1.11 mrg data.tgt_start = tgt_start; 3334 1.1.1.11 mrg data.tgt_end = tgt_end; 3335 1.1 mrg 3336 1.1.1.11 mrg if (tgt_start != tgt_end) 3337 1.1.1.11 mrg { 3338 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); 3339 1.1.1.11 mrg return data.key; 3340 1.1.1.11 mrg } 3341 1.1 mrg 3342 1.1.1.11 mrg data.tgt_end++; 3343 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); 3344 1.1.1.11 mrg if (data.key != NULL || zero_len) 3345 1.1.1.11 mrg return data.key; 3346 1.1.1.11 mrg data.tgt_end--; 3347 1.1.1.11 mrg 3348 1.1.1.11 mrg data.tgt_start--; 3349 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data); 3350 1.1.1.11 mrg return data.key; 3351 1.1.1.2 mrg } 3352 1.1.1.2 mrg 3353 1.1.1.11 mrg struct cpy_data 3354 1.1.1.2 mrg { 3355 1.1.1.11 mrg uint64_t devaddr; 3356 1.1.1.11 mrg bool present, aligned; 3357 1.1.1.11 mrg }; 3358 1.1.1.2 mrg 3359 1.1.1.2 mrg 3360 1.1.1.11 mrg /* Search just mapped reverse-offload data; returns index if found, 3361 1.1.1.11 mrg otherwise >= n. */ 3362 1.1.1.11 mrg 3363 1.1.1.11 mrg static inline int 3364 1.1.1.11 mrg gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs, 3365 1.1.1.11 mrg unsigned short *kinds, uint64_t *sizes, size_t n, 3366 1.1.1.11 mrg uint64_t tgt_start, uint64_t tgt_end) 3367 1.1.1.11 mrg { 3368 1.1.1.11 mrg const bool short_mapkind = true; 3369 1.1.1.11 mrg const int typemask = short_mapkind ? 0xff : 0x7; 3370 1.1.1.11 mrg size_t i; 3371 1.1.1.11 mrg for (i = 0; i < n; i++) 3372 1.1.1.11 mrg { 3373 1.1.1.11 mrg bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask) 3374 1.1.1.11 mrg == GOMP_MAP_STRUCT); 3375 1.1.1.11 mrg uint64_t dev_end; 3376 1.1.1.11 mrg if (!is_struct) 3377 1.1.1.11 mrg dev_end = d[i].devaddr + sizes[i]; 3378 1.1.1.11 mrg else 3379 1.1.1.11 mrg { 3380 1.1.1.11 mrg if (i + sizes[i] < n) 3381 1.1.1.11 mrg dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]]; 3382 1.1.1.11 mrg else 3383 1.1.1.11 mrg dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]]; 3384 1.1.1.11 mrg } 3385 1.1.1.11 mrg if ((d[i].devaddr == tgt_start && dev_end == tgt_end) 3386 1.1.1.11 mrg || (dev_end > tgt_start && d[i].devaddr < tgt_end)) 3387 1.1.1.11 mrg break; 3388 1.1.1.11 mrg if (is_struct) 3389 1.1.1.11 mrg i += sizes[i]; 3390 1.1.1.11 mrg } 3391 1.1.1.11 mrg return i; 3392 1.1.1.11 mrg } 3393 1.1.1.11 mrg 3394 1.1.1.11 mrg static inline int 3395 1.1.1.11 mrg gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs, 3396 1.1.1.11 mrg unsigned short *kinds, uint64_t *sizes, 3397 1.1.1.11 mrg size_t n, uint64_t tgt_start, uint64_t tgt_end, 3398 1.1.1.11 mrg bool zero_len) 3399 1.1.1.11 mrg { 3400 1.1.1.11 mrg size_t i; 3401 1.1.1.11 mrg if (tgt_start != tgt_end) 3402 1.1.1.11 mrg return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, 3403 1.1.1.11 mrg tgt_start, tgt_end); 3404 1.1.1.11 mrg tgt_end++; 3405 1.1.1.11 mrg i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, 3406 1.1.1.11 mrg tgt_start, tgt_end); 3407 1.1.1.11 mrg if (i < n || zero_len) 3408 1.1.1.11 mrg return i; 3409 1.1.1.11 mrg tgt_end--; 3410 1.1.1.11 mrg 3411 1.1.1.11 mrg tgt_start--; 3412 1.1.1.11 mrg return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n, 3413 1.1.1.11 mrg tgt_start, tgt_end); 3414 1.1.1.11 mrg } 3415 1.1.1.11 mrg 3416 1.1.1.11 mrg /* Handle reverse offload. This is called by the device plugins for a 3417 1.1.1.11 mrg reverse offload; it is not called if the outer target runs on the host. 3418 1.1.1.11 mrg The mapping is simplified device-affecting constructs (except for target 3419 1.1.1.11 mrg with device(ancestor:1)) must not be encountered; in particular not 3420 1.1.1.11 mrg target (enter/exit) data. */ 3421 1.1.1.11 mrg 3422 1.1.1.11 mrg void 3423 1.1.1.11 mrg gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, 3424 1.1.1.11 mrg uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, 3425 1.1.1.11 mrg struct goacc_asyncqueue *aq) 3426 1.1.1.11 mrg { 3427 1.1.1.11 mrg /* Return early if there is no offload code. */ 3428 1.1.1.11 mrg if (sizeof (OFFLOAD_PLUGINS) == sizeof ("")) 3429 1.1.1.11 mrg return; 3430 1.1.1.11 mrg /* Currently, this fails because of calculate_firstprivate_requirements 3431 1.1.1.11 mrg below; it could be fixed but additional code needs to be updated to 3432 1.1.1.11 mrg handle 32bit hosts - thus, it is not worthwhile. */ 3433 1.1.1.11 mrg if (sizeof (void *) != sizeof (uint64_t)) 3434 1.1.1.11 mrg gomp_fatal ("Reverse offload of 32bit hosts not supported."); 3435 1.1.1.11 mrg 3436 1.1.1.11 mrg struct cpy_data *cdata = NULL; 3437 1.1.1.11 mrg uint64_t *devaddrs; 3438 1.1.1.11 mrg uint64_t *sizes; 3439 1.1.1.11 mrg unsigned short *kinds; 3440 1.1.1.11 mrg const bool short_mapkind = true; 3441 1.1.1.11 mrg const int typemask = short_mapkind ? 0xff : 0x7; 3442 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (dev_num, false); 3443 1.1.1.11 mrg 3444 1.1.1.11 mrg reverse_splay_tree_key n; 3445 1.1.1.11 mrg struct reverse_splay_tree_key_s k; 3446 1.1.1.11 mrg k.dev = fn_ptr; 3447 1.1.1.11 mrg 3448 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock); 3449 1.1.1.11 mrg n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k); 3450 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3451 1.1.1.11 mrg 3452 1.1.1.11 mrg if (n == NULL) 3453 1.1.1.11 mrg gomp_fatal ("Cannot find reverse-offload function"); 3454 1.1.1.11 mrg void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start; 3455 1.1.1.11 mrg 3456 1.1.1.11 mrg if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0) 3457 1.1.1.11 mrg { 3458 1.1.1.11 mrg devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr; 3459 1.1.1.11 mrg sizes = (uint64_t *) (uintptr_t) sizes_ptr; 3460 1.1.1.11 mrg kinds = (unsigned short *) (uintptr_t) kinds_ptr; 3461 1.1.1.11 mrg } 3462 1.1.1.11 mrg else 3463 1.1.1.11 mrg { 3464 1.1.1.11 mrg devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); 3465 1.1.1.11 mrg sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t)); 3466 1.1.1.11 mrg kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short)); 3467 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, devaddrs, 3468 1.1.1.11 mrg (const void *) (uintptr_t) devaddrs_ptr, 3469 1.1.1.11 mrg mapnum * sizeof (uint64_t)); 3470 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, sizes, 3471 1.1.1.11 mrg (const void *) (uintptr_t) sizes_ptr, 3472 1.1.1.11 mrg mapnum * sizeof (uint64_t)); 3473 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, kinds, 3474 1.1.1.11 mrg (const void *) (uintptr_t) kinds_ptr, 3475 1.1.1.11 mrg mapnum * sizeof (unsigned short)); 3476 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq)) 3477 1.1.1.11 mrg exit (EXIT_FAILURE); 3478 1.1.1.11 mrg } 3479 1.1.1.11 mrg 3480 1.1.1.11 mrg size_t tgt_align = 0, tgt_size = 0; 3481 1.1.1.11 mrg 3482 1.1.1.11 mrg /* If actually executed on 32bit systems, the casts lead to wrong code; 3483 1.1.1.11 mrg but 32bit with offloading is not supported; see top of this function. */ 3484 1.1.1.11 mrg calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes, 3485 1.1.1.11 mrg (void *) (uintptr_t) kinds, 3486 1.1.1.11 mrg &tgt_align, &tgt_size); 3487 1.1.1.11 mrg 3488 1.1.1.11 mrg if (tgt_align) 3489 1.1.1.11 mrg { 3490 1.1.1.11 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 3491 1.1.1.11 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 3492 1.1.1.11 mrg if (al) 3493 1.1.1.11 mrg tgt += tgt_align - al; 3494 1.1.1.11 mrg tgt_size = 0; 3495 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++) 3496 1.1.1.11 mrg if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE 3497 1.1.1.11 mrg && devaddrs[i] != 0) 3498 1.1.1.11 mrg { 3499 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 3500 1.1.1.11 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1); 3501 1.1.1.11 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3502 1.1.1.11 mrg memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], 3503 1.1.1.11 mrg (size_t) sizes[i]); 3504 1.1.1.11 mrg else 3505 1.1.1.11 mrg { 3506 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, tgt + tgt_size, 3507 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3508 1.1.1.11 mrg (size_t) sizes[i]); 3509 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq)) 3510 1.1.1.11 mrg exit (EXIT_FAILURE); 3511 1.1.1.11 mrg } 3512 1.1.1.11 mrg devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size; 3513 1.1.1.11 mrg tgt_size = tgt_size + sizes[i]; 3514 1.1.1.11 mrg if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3515 1.1.1.11 mrg && i + 1 < mapnum 3516 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i) & typemask) 3517 1.1.1.11 mrg == GOMP_MAP_ATTACH)) 3518 1.1.1.11 mrg { 3519 1.1.1.11 mrg *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1]) 3520 1.1.1.11 mrg = (uint64_t) devaddrs[i]; 3521 1.1.1.11 mrg ++i; 3522 1.1.1.11 mrg } 3523 1.1.1.11 mrg } 3524 1.1.1.11 mrg } 3525 1.1.1.11 mrg 3526 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) 3527 1.1.1.11 mrg { 3528 1.1.1.11 mrg size_t j, struct_cpy = 0; 3529 1.1.1.11 mrg splay_tree_key n2; 3530 1.1.1.11 mrg cdata = gomp_alloca (sizeof (*cdata) * mapnum); 3531 1.1.1.11 mrg memset (cdata, '\0', sizeof (*cdata) * mapnum); 3532 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock); 3533 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++) 3534 1.1.1.11 mrg { 3535 1.1.1.11 mrg if (devaddrs[i] == 0) 3536 1.1.1.11 mrg continue; 3537 1.1.1.11 mrg n = NULL; 3538 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i) & typemask; 3539 1.1.1.11 mrg switch (kind) 3540 1.1.1.11 mrg { 3541 1.1.1.11 mrg case GOMP_MAP_FIRSTPRIVATE: 3542 1.1.1.11 mrg case GOMP_MAP_FIRSTPRIVATE_INT: 3543 1.1.1.11 mrg continue; 3544 1.1.1.11 mrg 3545 1.1.1.11 mrg case GOMP_MAP_DELETE: 3546 1.1.1.11 mrg case GOMP_MAP_RELEASE: 3547 1.1.1.11 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 3548 1.1.1.11 mrg /* Assume it is present; look it up - but ignore unless the 3549 1.1.1.11 mrg present clause is there. */ 3550 1.1.1.11 mrg case GOMP_MAP_ALLOC: 3551 1.1.1.11 mrg case GOMP_MAP_FROM: 3552 1.1.1.11 mrg case GOMP_MAP_FORCE_ALLOC: 3553 1.1.1.11 mrg case GOMP_MAP_FORCE_FROM: 3554 1.1.1.11 mrg case GOMP_MAP_ALWAYS_FROM: 3555 1.1.1.11 mrg case GOMP_MAP_TO: 3556 1.1.1.11 mrg case GOMP_MAP_TOFROM: 3557 1.1.1.11 mrg case GOMP_MAP_FORCE_TO: 3558 1.1.1.11 mrg case GOMP_MAP_FORCE_TOFROM: 3559 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TO: 3560 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TOFROM: 3561 1.1.1.11 mrg case GOMP_MAP_FORCE_PRESENT: 3562 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM: 3563 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TO: 3564 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM: 3565 1.1.1.11 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 3566 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i]; 3567 1.1.1.11 mrg bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 3568 1.1.1.11 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION); 3569 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, 3570 1.1.1.11 mrg devaddrs[i], 3571 1.1.1.11 mrg devaddrs[i] + sizes[i], zero_len); 3572 1.1.1.11 mrg if (j < i) 3573 1.1.1.11 mrg { 3574 1.1.1.11 mrg n2 = NULL; 3575 1.1.1.11 mrg cdata[i].present = true; 3576 1.1.1.11 mrg devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr; 3577 1.1.1.11 mrg } 3578 1.1.1.11 mrg else 3579 1.1.1.11 mrg { 3580 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, 3581 1.1.1.11 mrg devaddrs[i], 3582 1.1.1.11 mrg devaddrs[i] + sizes[i], zero_len); 3583 1.1.1.11 mrg cdata[i].present = n2 != NULL; 3584 1.1.1.11 mrg } 3585 1.1.1.11 mrg if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind)) 3586 1.1.1.11 mrg { 3587 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3588 1.1.1.11 mrg #ifdef HAVE_INTTYPES_H 3589 1.1.1.11 mrg gomp_fatal ("present clause: no corresponding data on " 3590 1.1.1.11 mrg "parent device at %p with size %"PRIu64, 3591 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3592 1.1.1.11 mrg (uint64_t) sizes[i]); 3593 1.1.1.11 mrg #else 3594 1.1.1.11 mrg gomp_fatal ("present clause: no corresponding data on " 3595 1.1.1.11 mrg "parent device at %p with size %lu", 3596 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3597 1.1.1.11 mrg (unsigned long) sizes[i]); 3598 1.1.1.11 mrg #endif 3599 1.1.1.11 mrg break; 3600 1.1.1.11 mrg } 3601 1.1.1.11 mrg else if (!cdata[i].present 3602 1.1.1.11 mrg && kind != GOMP_MAP_DELETE 3603 1.1.1.11 mrg && kind != GOMP_MAP_RELEASE 3604 1.1.1.11 mrg && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) 3605 1.1.1.11 mrg { 3606 1.1.1.11 mrg cdata[i].aligned = true; 3607 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 3608 1.1.1.11 mrg devaddrs[i] 3609 1.1.1.11 mrg = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, 3610 1.1.1.11 mrg sizes[i]); 3611 1.1.1.11 mrg } 3612 1.1.1.11 mrg else if (n2 != NULL) 3613 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr 3614 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset)); 3615 1.1.1.11 mrg if (((!cdata[i].present || struct_cpy) 3616 1.1.1.11 mrg && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM)) 3617 1.1.1.11 mrg || kind == GOMP_MAP_FORCE_TO 3618 1.1.1.11 mrg || kind == GOMP_MAP_FORCE_TOFROM 3619 1.1.1.11 mrg || GOMP_MAP_ALWAYS_TO_P (kind)) 3620 1.1.1.11 mrg { 3621 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, 3622 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3623 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr, 3624 1.1.1.11 mrg sizes[i]); 3625 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq)) 3626 1.1.1.11 mrg { 3627 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3628 1.1.1.11 mrg exit (EXIT_FAILURE); 3629 1.1.1.11 mrg } 3630 1.1.1.11 mrg } 3631 1.1.1.11 mrg if (struct_cpy) 3632 1.1.1.11 mrg struct_cpy--; 3633 1.1.1.11 mrg break; 3634 1.1.1.11 mrg case GOMP_MAP_ATTACH: 3635 1.1.1.11 mrg case GOMP_MAP_POINTER: 3636 1.1.1.11 mrg case GOMP_MAP_ALWAYS_POINTER: 3637 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, 3638 1.1.1.11 mrg devaddrs[i] + sizes[i], 3639 1.1.1.11 mrg devaddrs[i] + sizes[i] 3640 1.1.1.11 mrg + sizeof (void*), false); 3641 1.1.1.11 mrg cdata[i].present = n2 != NULL; 3642 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i]; 3643 1.1.1.11 mrg if (n2) 3644 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr 3645 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset)); 3646 1.1.1.11 mrg else 3647 1.1.1.11 mrg { 3648 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, 3649 1.1.1.11 mrg devaddrs[i] + sizes[i], 3650 1.1.1.11 mrg devaddrs[i] + sizes[i] 3651 1.1.1.11 mrg + sizeof (void*), false); 3652 1.1.1.11 mrg if (j < i) 3653 1.1.1.11 mrg { 3654 1.1.1.11 mrg cdata[i].present = true; 3655 1.1.1.11 mrg devaddrs[i] = (devaddrs[j] + devaddrs[i] 3656 1.1.1.11 mrg - cdata[j].devaddr); 3657 1.1.1.11 mrg } 3658 1.1.1.11 mrg } 3659 1.1.1.11 mrg if (!cdata[i].present) 3660 1.1.1.11 mrg devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*)); 3661 1.1.1.11 mrg /* Assume that when present, the pointer is already correct. */ 3662 1.1.1.11 mrg if (!n2) 3663 1.1.1.11 mrg *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i]) 3664 1.1.1.11 mrg = devaddrs[i-1]; 3665 1.1.1.11 mrg break; 3666 1.1.1.11 mrg case GOMP_MAP_TO_PSET: 3667 1.1.1.11 mrg /* Assume that when present, the pointers are fine and no 'to:' 3668 1.1.1.11 mrg is required. */ 3669 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, 3670 1.1.1.11 mrg devaddrs[i], devaddrs[i] + sizes[i], 3671 1.1.1.11 mrg false); 3672 1.1.1.11 mrg cdata[i].present = n2 != NULL; 3673 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i]; 3674 1.1.1.11 mrg if (n2) 3675 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr 3676 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset)); 3677 1.1.1.11 mrg else 3678 1.1.1.11 mrg { 3679 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i, 3680 1.1.1.11 mrg devaddrs[i], 3681 1.1.1.11 mrg devaddrs[i] + sizes[i], false); 3682 1.1.1.11 mrg if (j < i) 3683 1.1.1.11 mrg { 3684 1.1.1.11 mrg cdata[i].present = true; 3685 1.1.1.11 mrg devaddrs[i] = (devaddrs[j] + devaddrs[i] 3686 1.1.1.11 mrg - cdata[j].devaddr); 3687 1.1.1.11 mrg } 3688 1.1.1.11 mrg } 3689 1.1.1.11 mrg if (!cdata[i].present) 3690 1.1.1.11 mrg { 3691 1.1.1.11 mrg cdata[i].aligned = true; 3692 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 3693 1.1.1.11 mrg devaddrs[i] 3694 1.1.1.11 mrg = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, 3695 1.1.1.11 mrg sizes[i]); 3696 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, 3697 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3698 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr, 3699 1.1.1.11 mrg sizes[i]); 3700 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq)) 3701 1.1.1.11 mrg { 3702 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3703 1.1.1.11 mrg exit (EXIT_FAILURE); 3704 1.1.1.11 mrg } 3705 1.1.1.11 mrg } 3706 1.1.1.11 mrg for (j = i + 1; j < mapnum; j++) 3707 1.1.1.11 mrg { 3708 1.1.1.11 mrg kind = get_kind (short_mapkind, kinds, j) & typemask; 3709 1.1.1.11 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (kind) 3710 1.1.1.11 mrg && !GOMP_MAP_POINTER_P (kind)) 3711 1.1.1.11 mrg break; 3712 1.1.1.11 mrg if (devaddrs[j] < devaddrs[i]) 3713 1.1.1.11 mrg break; 3714 1.1.1.11 mrg if (cdata[i].present) 3715 1.1.1.11 mrg continue; 3716 1.1.1.11 mrg if (devaddrs[j] == 0) 3717 1.1.1.11 mrg { 3718 1.1.1.11 mrg *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0; 3719 1.1.1.11 mrg continue; 3720 1.1.1.11 mrg } 3721 1.1.1.11 mrg int k; 3722 1.1.1.11 mrg n2 = NULL; 3723 1.1.1.11 mrg /* Dereference devaddrs[j] to get the device addr. */ 3724 1.1.1.11 mrg assert (devaddrs[j] - sizes[j] == cdata[i].devaddr); 3725 1.1.1.11 mrg devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i] 3726 1.1.1.11 mrg + sizes[j]); 3727 1.1.1.11 mrg cdata[j].present = true; 3728 1.1.1.11 mrg cdata[j].devaddr = devaddrs[j]; 3729 1.1.1.11 mrg if (devaddrs[j] == 0) 3730 1.1.1.11 mrg continue; 3731 1.1.1.11 mrg k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j, 3732 1.1.1.11 mrg devaddrs[j], 3733 1.1.1.11 mrg devaddrs[j] + sizeof (void*), 3734 1.1.1.11 mrg false); 3735 1.1.1.11 mrg if (k < j) 3736 1.1.1.11 mrg devaddrs[j] = (devaddrs[k] + devaddrs[j] 3737 1.1.1.11 mrg - cdata[k].devaddr); 3738 1.1.1.11 mrg else 3739 1.1.1.11 mrg { 3740 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, 3741 1.1.1.11 mrg devaddrs[j], 3742 1.1.1.11 mrg devaddrs[j] + sizeof (void*), 3743 1.1.1.11 mrg false); 3744 1.1.1.11 mrg if (n2 == NULL) 3745 1.1.1.11 mrg { 3746 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3747 1.1.1.11 mrg gomp_fatal ("Pointer target wasn't mapped"); 3748 1.1.1.11 mrg } 3749 1.1.1.11 mrg devaddrs[j] = (n2->host_start + cdata[j].devaddr 3750 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset)); 3751 1.1.1.11 mrg } 3752 1.1.1.11 mrg *(void **) (uintptr_t) (devaddrs[i] + sizes[j]) 3753 1.1.1.11 mrg = (void *) (uintptr_t) devaddrs[j]; 3754 1.1.1.11 mrg } 3755 1.1.1.11 mrg i = j -1; 3756 1.1.1.11 mrg break; 3757 1.1.1.11 mrg case GOMP_MAP_STRUCT: 3758 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1], 3759 1.1.1.11 mrg devaddrs[i + sizes[i]] 3760 1.1.1.11 mrg + sizes[i + sizes[i]], false); 3761 1.1.1.11 mrg cdata[i].present = n2 != NULL; 3762 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i]; 3763 1.1.1.11 mrg struct_cpy = cdata[i].present ? 0 : sizes[i]; 3764 1.1.1.11 mrg if (!n2) 3765 1.1.1.11 mrg { 3766 1.1.1.11 mrg size_t sz = (size_t) (devaddrs[i + sizes[i]] 3767 1.1.1.11 mrg - devaddrs[i+1] 3768 1.1.1.11 mrg + sizes[i + sizes[i]]); 3769 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8); 3770 1.1.1.11 mrg cdata[i].aligned = true; 3771 1.1.1.11 mrg devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz); 3772 1.1.1.11 mrg devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr; 3773 1.1.1.11 mrg } 3774 1.1.1.11 mrg else 3775 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr 3776 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset)); 3777 1.1.1.11 mrg break; 3778 1.1.1.11 mrg default: 3779 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3780 1.1.1.11 mrg gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]); 3781 1.1.1.11 mrg } 3782 1.1.1.11 mrg } 3783 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 3784 1.1.1.11 mrg } 3785 1.1.1.11 mrg 3786 1.1.1.11 mrg host_fn (devaddrs); 3787 1.1.1.11 mrg 3788 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0) 3789 1.1.1.11 mrg { 3790 1.1.1.11 mrg uint64_t struct_cpy = 0; 3791 1.1.1.11 mrg bool clean_struct = false; 3792 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++) 3793 1.1.1.11 mrg { 3794 1.1.1.11 mrg if (cdata[i].devaddr == 0) 3795 1.1.1.11 mrg continue; 3796 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i) & typemask; 3797 1.1.1.11 mrg bool copy = !cdata[i].present || struct_cpy; 3798 1.1.1.11 mrg switch (kind) 3799 1.1.1.11 mrg { 3800 1.1.1.11 mrg case GOMP_MAP_FORCE_FROM: 3801 1.1.1.11 mrg case GOMP_MAP_FORCE_TOFROM: 3802 1.1.1.11 mrg case GOMP_MAP_ALWAYS_FROM: 3803 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TOFROM: 3804 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM: 3805 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM: 3806 1.1.1.11 mrg copy = true; 3807 1.1.1.11 mrg /* FALLTHRU */ 3808 1.1.1.11 mrg case GOMP_MAP_FROM: 3809 1.1.1.11 mrg case GOMP_MAP_TOFROM: 3810 1.1.1.11 mrg if (copy) 3811 1.1.1.11 mrg { 3812 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq, 3813 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr, 3814 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i], 3815 1.1.1.11 mrg sizes[i], false, NULL); 3816 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq)) 3817 1.1.1.11 mrg exit (EXIT_FAILURE); 3818 1.1.1.11 mrg } 3819 1.1.1.11 mrg default: 3820 1.1.1.11 mrg break; 3821 1.1.1.11 mrg } 3822 1.1.1.11 mrg if (struct_cpy) 3823 1.1.1.11 mrg { 3824 1.1.1.11 mrg struct_cpy--; 3825 1.1.1.11 mrg continue; 3826 1.1.1.11 mrg } 3827 1.1.1.11 mrg if (kind == GOMP_MAP_STRUCT && !cdata[i].present) 3828 1.1.1.11 mrg { 3829 1.1.1.11 mrg clean_struct = true; 3830 1.1.1.11 mrg struct_cpy = sizes[i]; 3831 1.1.1.11 mrg } 3832 1.1.1.11 mrg else if (!cdata[i].present && cdata[i].aligned) 3833 1.1.1.11 mrg gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); 3834 1.1.1.11 mrg else if (!cdata[i].present) 3835 1.1.1.11 mrg free ((void *) (uintptr_t) devaddrs[i]); 3836 1.1.1.11 mrg } 3837 1.1.1.11 mrg if (clean_struct) 3838 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++) 3839 1.1.1.11 mrg if (!cdata[i].present 3840 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i) & typemask) 3841 1.1.1.11 mrg == GOMP_MAP_STRUCT)) 3842 1.1.1.11 mrg { 3843 1.1.1.11 mrg devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr; 3844 1.1.1.11 mrg gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]); 3845 1.1.1.11 mrg } 3846 1.1.1.11 mrg 3847 1.1.1.11 mrg free (devaddrs); 3848 1.1.1.11 mrg free (sizes); 3849 1.1.1.11 mrg free (kinds); 3850 1.1.1.11 mrg } 3851 1.1.1.11 mrg } 3852 1.1.1.11 mrg 3853 1.1.1.11 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */ 3854 1.1.1.11 mrg 3855 1.1.1.11 mrg static void 3856 1.1.1.11 mrg gomp_target_data_fallback (struct gomp_device_descr *devicep) 3857 1.1.1.11 mrg { 3858 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (false); 3859 1.1.1.11 mrg 3860 1.1.1.11 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 3861 1.1.1.11 mrg && devicep != NULL) 3862 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " 3863 1.1.1.11 mrg "be used for offloading"); 3864 1.1.1.11 mrg 3865 1.1.1.11 mrg if (icv->target_data) 3866 1.1.1.11 mrg { 3867 1.1.1.11 mrg /* Even when doing a host fallback, if there are any active 3868 1.1.1.11 mrg #pragma omp target data constructs, need to remember the 3869 1.1.1.11 mrg new #pragma omp target data, otherwise GOMP_target_end_data 3870 1.1.1.11 mrg would get out of sync. */ 3871 1.1.1.11 mrg struct target_mem_desc *tgt 3872 1.1.1.11 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, 3873 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA); 3874 1.1.1.11 mrg tgt->prev = icv->target_data; 3875 1.1.1.11 mrg icv->target_data = tgt; 3876 1.1.1.11 mrg } 3877 1.1.1.11 mrg } 3878 1.1.1.11 mrg 3879 1.1.1.11 mrg void 3880 1.1.1.11 mrg GOMP_target_data (int device, const void *unused, size_t mapnum, 3881 1.1.1.11 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds) 3882 1.1.1.11 mrg { 3883 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 3884 1.1.1.11 mrg 3885 1.1.1.11 mrg if (devicep == NULL 3886 1.1.1.11 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3887 1.1.1.11 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) 3888 1.1.1.11 mrg return gomp_target_data_fallback (devicep); 3889 1.1.1.11 mrg 3890 1.1.1.11 mrg struct target_mem_desc *tgt 3891 1.1.1.11 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 3892 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA); 3893 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (true); 3894 1.1.1.11 mrg tgt->prev = icv->target_data; 3895 1.1.1.11 mrg icv->target_data = tgt; 3896 1.1.1.11 mrg } 3897 1.1.1.11 mrg 3898 1.1.1.11 mrg void 3899 1.1.1.11 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 3900 1.1.1.11 mrg size_t *sizes, unsigned short *kinds) 3901 1.1.1.11 mrg { 3902 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 3903 1.1.1.11 mrg 3904 1.1.1.11 mrg if (devicep == NULL 3905 1.1.1.11 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3906 1.1.1.11 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3907 1.1.1.11 mrg return gomp_target_data_fallback (devicep); 3908 1.1.1.11 mrg 3909 1.1.1.11 mrg struct target_mem_desc *tgt 3910 1.1.1.11 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 3911 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA); 3912 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (true); 3913 1.1.1.11 mrg tgt->prev = icv->target_data; 3914 1.1 mrg icv->target_data = tgt; 3915 1.1 mrg } 3916 1.1 mrg 3917 1.1 mrg void 3918 1.1 mrg GOMP_target_end_data (void) 3919 1.1 mrg { 3920 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false); 3921 1.1 mrg if (icv->target_data) 3922 1.1 mrg { 3923 1.1 mrg struct target_mem_desc *tgt = icv->target_data; 3924 1.1 mrg icv->target_data = tgt->prev; 3925 1.1.1.10 mrg gomp_unmap_vars (tgt, true, NULL); 3926 1.1 mrg } 3927 1.1 mrg } 3928 1.1 mrg 3929 1.1 mrg void 3930 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum, 3931 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds) 3932 1.1 mrg { 3933 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 3934 1.1 mrg 3935 1.1 mrg if (devicep == NULL 3936 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3937 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3938 1.1.1.2 mrg return; 3939 1.1.1.2 mrg 3940 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 3941 1.1.1.2 mrg } 3942 1.1.1.2 mrg 3943 1.1.1.2 mrg void 3944 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 3945 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, 3946 1.1.1.2 mrg unsigned int flags, void **depend) 3947 1.1.1.2 mrg { 3948 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 3949 1.1.1.2 mrg 3950 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present, 3951 1.1.1.2 mrg block the parent task until the dependencies are resolved 3952 1.1.1.2 mrg and then just continue with the rest of the function as if it 3953 1.1.1.2 mrg is a merged task. Until we are able to schedule task during 3954 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses 3955 1.1.1.2 mrg are not present. */ 3956 1.1.1.2 mrg if (depend != NULL) 3957 1.1.1.2 mrg { 3958 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 3959 1.1.1.2 mrg if (thr->task && thr->task->depend_hash) 3960 1.1.1.2 mrg { 3961 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT) 3962 1.1.1.2 mrg && thr->ts.team 3963 1.1.1.2 mrg && !thr->task->final_task) 3964 1.1.1.2 mrg { 3965 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 3966 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds, 3967 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE, 3968 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA)) 3969 1.1.1.2 mrg return; 3970 1.1.1.2 mrg } 3971 1.1.1.2 mrg else 3972 1.1.1.2 mrg { 3973 1.1.1.2 mrg struct gomp_team *team = thr->ts.team; 3974 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new 3975 1.1.1.2 mrg tasks. */ 3976 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team) 3977 1.1.1.7 mrg { 3978 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier)) 3979 1.1.1.7 mrg return; 3980 1.1.1.7 mrg if (thr->task->taskgroup) 3981 1.1.1.7 mrg { 3982 1.1.1.7 mrg if (thr->task->taskgroup->cancelled) 3983 1.1.1.7 mrg return; 3984 1.1.1.7 mrg if (thr->task->taskgroup->workshare 3985 1.1.1.7 mrg && thr->task->taskgroup->prev 3986 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled) 3987 1.1.1.7 mrg return; 3988 1.1.1.7 mrg } 3989 1.1.1.7 mrg } 3990 1.1.1.2 mrg 3991 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend); 3992 1.1.1.2 mrg } 3993 1.1.1.2 mrg } 3994 1.1.1.2 mrg } 3995 1.1.1.2 mrg 3996 1.1.1.2 mrg if (devicep == NULL 3997 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3998 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3999 1.1.1.2 mrg return; 4000 1.1.1.2 mrg 4001 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 4002 1.1.1.2 mrg struct gomp_team *team = thr->ts.team; 4003 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 4004 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team) 4005 1.1.1.7 mrg { 4006 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier)) 4007 1.1.1.7 mrg return; 4008 1.1.1.7 mrg if (thr->task->taskgroup) 4009 1.1.1.7 mrg { 4010 1.1.1.7 mrg if (thr->task->taskgroup->cancelled) 4011 1.1.1.7 mrg return; 4012 1.1.1.7 mrg if (thr->task->taskgroup->workshare 4013 1.1.1.7 mrg && thr->task->taskgroup->prev 4014 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled) 4015 1.1.1.7 mrg return; 4016 1.1.1.7 mrg } 4017 1.1.1.7 mrg } 4018 1.1 mrg 4019 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); 4020 1.1.1.2 mrg } 4021 1.1.1.2 mrg 4022 1.1.1.2 mrg static void 4023 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, 4024 1.1.1.10 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds, 4025 1.1.1.10 mrg htab_t *refcount_set) 4026 1.1.1.2 mrg { 4027 1.1.1.2 mrg const int typemask = 0xff; 4028 1.1.1.2 mrg size_t i; 4029 1.1 mrg gomp_mutex_lock (&devicep->lock); 4030 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED) 4031 1.1.1.2 mrg { 4032 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4033 1.1.1.2 mrg return; 4034 1.1.1.2 mrg } 4035 1.1.1.2 mrg 4036 1.1.1.2 mrg for (i = 0; i < mapnum; i++) 4037 1.1.1.10 mrg if ((kinds[i] & typemask) == GOMP_MAP_DETACH) 4038 1.1.1.10 mrg { 4039 1.1.1.10 mrg struct splay_tree_key_s cur_node; 4040 1.1.1.10 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 4041 1.1.1.10 mrg cur_node.host_end = cur_node.host_start + sizeof (void *); 4042 1.1.1.10 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 4043 1.1.1.10 mrg 4044 1.1.1.10 mrg if (n) 4045 1.1.1.10 mrg gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i], 4046 1.1.1.10 mrg false, NULL); 4047 1.1.1.10 mrg } 4048 1.1.1.10 mrg 4049 1.1.1.10 mrg int nrmvars = 0; 4050 1.1.1.10 mrg splay_tree_key remove_vars[mapnum]; 4051 1.1.1.10 mrg 4052 1.1.1.10 mrg for (i = 0; i < mapnum; i++) 4053 1.1.1.2 mrg { 4054 1.1.1.2 mrg struct splay_tree_key_s cur_node; 4055 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask; 4056 1.1.1.2 mrg switch (kind) 4057 1.1.1.2 mrg { 4058 1.1.1.2 mrg case GOMP_MAP_FROM: 4059 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM: 4060 1.1.1.2 mrg case GOMP_MAP_DELETE: 4061 1.1.1.2 mrg case GOMP_MAP_RELEASE: 4062 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 4063 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 4064 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i]; 4065 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i]; 4066 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 4067 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 4068 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) 4069 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node); 4070 1.1.1.2 mrg if (!k) 4071 1.1.1.2 mrg continue; 4072 1.1.1.2 mrg 4073 1.1.1.10 mrg bool delete_p = (kind == GOMP_MAP_DELETE 4074 1.1.1.10 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION); 4075 1.1.1.10 mrg bool do_copy, do_remove; 4076 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy, 4077 1.1.1.10 mrg &do_remove); 4078 1.1.1.2 mrg 4079 1.1.1.10 mrg if ((kind == GOMP_MAP_FROM && do_copy) 4080 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM) 4081 1.1.1.10 mrg { 4082 1.1.1.10 mrg if (k->aux && k->aux->attach_count) 4083 1.1.1.10 mrg { 4084 1.1.1.10 mrg /* We have to be careful not to overwrite still attached 4085 1.1.1.10 mrg pointers during the copyback to host. */ 4086 1.1.1.10 mrg uintptr_t addr = k->host_start; 4087 1.1.1.10 mrg while (addr < k->host_end) 4088 1.1.1.10 mrg { 4089 1.1.1.10 mrg size_t i = (addr - k->host_start) / sizeof (void *); 4090 1.1.1.10 mrg if (k->aux->attach_count[i] == 0) 4091 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) addr, 4092 1.1.1.10 mrg (void *) (k->tgt->tgt_start 4093 1.1.1.10 mrg + k->tgt_offset 4094 1.1.1.10 mrg + addr - k->host_start), 4095 1.1.1.10 mrg sizeof (void *)); 4096 1.1.1.10 mrg addr += sizeof (void *); 4097 1.1.1.10 mrg } 4098 1.1.1.10 mrg } 4099 1.1.1.10 mrg else 4100 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, 4101 1.1.1.10 mrg (void *) (k->tgt->tgt_start + k->tgt_offset 4102 1.1.1.10 mrg + cur_node.host_start 4103 1.1.1.10 mrg - k->host_start), 4104 1.1.1.10 mrg cur_node.host_end - cur_node.host_start); 4105 1.1.1.10 mrg } 4106 1.1.1.10 mrg 4107 1.1.1.10 mrg /* Structure elements lists are removed altogether at once, which 4108 1.1.1.10 mrg may cause immediate deallocation of the target_mem_desc, causing 4109 1.1.1.10 mrg errors if we still have following element siblings to copy back. 4110 1.1.1.10 mrg While we're at it, it also seems more disciplined to simply 4111 1.1.1.10 mrg queue all removals together for processing below. 4112 1.1.1.10 mrg 4113 1.1.1.10 mrg Structured block unmapping (i.e. gomp_unmap_vars_internal) should 4114 1.1.1.10 mrg not have this problem, since they maintain an additional 4115 1.1.1.10 mrg tgt->refcount = 1 reference to the target_mem_desc to start with. 4116 1.1.1.10 mrg */ 4117 1.1.1.10 mrg if (do_remove) 4118 1.1.1.10 mrg remove_vars[nrmvars++] = k; 4119 1.1.1.10 mrg break; 4120 1.1.1.2 mrg 4121 1.1.1.10 mrg case GOMP_MAP_DETACH: 4122 1.1.1.2 mrg break; 4123 1.1.1.2 mrg default: 4124 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4125 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", 4126 1.1.1.2 mrg kind); 4127 1.1.1.2 mrg } 4128 1.1.1.2 mrg } 4129 1.1.1.2 mrg 4130 1.1.1.10 mrg for (int i = 0; i < nrmvars; i++) 4131 1.1.1.10 mrg gomp_remove_var (devicep, remove_vars[i]); 4132 1.1.1.10 mrg 4133 1.1 mrg gomp_mutex_unlock (&devicep->lock); 4134 1.1.1.2 mrg } 4135 1.1 mrg 4136 1.1.1.2 mrg void 4137 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 4138 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, 4139 1.1.1.2 mrg unsigned int flags, void **depend) 4140 1.1.1.2 mrg { 4141 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true); 4142 1.1.1.2 mrg 4143 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present, 4144 1.1.1.2 mrg block the parent task until the dependencies are resolved 4145 1.1.1.2 mrg and then just continue with the rest of the function as if it 4146 1.1.1.2 mrg is a merged task. Until we are able to schedule task during 4147 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses 4148 1.1.1.2 mrg are not present. */ 4149 1.1.1.2 mrg if (depend != NULL) 4150 1.1.1.2 mrg { 4151 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 4152 1.1.1.2 mrg if (thr->task && thr->task->depend_hash) 4153 1.1.1.2 mrg { 4154 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT) 4155 1.1.1.2 mrg && thr->ts.team 4156 1.1.1.2 mrg && !thr->task->final_task) 4157 1.1.1.2 mrg { 4158 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 4159 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds, 4160 1.1.1.2 mrg flags, depend, NULL, 4161 1.1.1.2 mrg GOMP_TARGET_TASK_DATA)) 4162 1.1.1.2 mrg return; 4163 1.1.1.2 mrg } 4164 1.1.1.2 mrg else 4165 1.1.1.2 mrg { 4166 1.1.1.2 mrg struct gomp_team *team = thr->ts.team; 4167 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new 4168 1.1.1.2 mrg tasks. */ 4169 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team) 4170 1.1.1.7 mrg { 4171 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier)) 4172 1.1.1.7 mrg return; 4173 1.1.1.7 mrg if (thr->task->taskgroup) 4174 1.1.1.7 mrg { 4175 1.1.1.7 mrg if (thr->task->taskgroup->cancelled) 4176 1.1.1.7 mrg return; 4177 1.1.1.7 mrg if (thr->task->taskgroup->workshare 4178 1.1.1.7 mrg && thr->task->taskgroup->prev 4179 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled) 4180 1.1.1.7 mrg return; 4181 1.1.1.7 mrg } 4182 1.1.1.7 mrg } 4183 1.1.1.2 mrg 4184 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend); 4185 1.1.1.2 mrg } 4186 1.1.1.2 mrg } 4187 1.1.1.2 mrg } 4188 1.1.1.2 mrg 4189 1.1.1.2 mrg if (devicep == NULL 4190 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4191 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4192 1.1.1.2 mrg return; 4193 1.1.1.2 mrg 4194 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread (); 4195 1.1.1.2 mrg struct gomp_team *team = thr->ts.team; 4196 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 4197 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team) 4198 1.1.1.7 mrg { 4199 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier)) 4200 1.1.1.7 mrg return; 4201 1.1.1.7 mrg if (thr->task->taskgroup) 4202 1.1.1.7 mrg { 4203 1.1.1.7 mrg if (thr->task->taskgroup->cancelled) 4204 1.1.1.7 mrg return; 4205 1.1.1.7 mrg if (thr->task->taskgroup->workshare 4206 1.1.1.7 mrg && thr->task->taskgroup->prev 4207 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled) 4208 1.1.1.7 mrg return; 4209 1.1.1.7 mrg } 4210 1.1.1.7 mrg } 4211 1.1.1.2 mrg 4212 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum); 4213 1.1.1.10 mrg 4214 1.1.1.8 mrg /* The variables are mapped separately such that they can be released 4215 1.1.1.8 mrg independently. */ 4216 1.1.1.8 mrg size_t i, j; 4217 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 4218 1.1.1.2 mrg for (i = 0; i < mapnum; i++) 4219 1.1.1.11 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT 4220 1.1.1.11 mrg || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD) 4221 1.1.1.2 mrg { 4222 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], 4223 1.1.1.10 mrg &kinds[i], true, &refcount_set, 4224 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA); 4225 1.1.1.2 mrg i += sizes[i]; 4226 1.1.1.2 mrg } 4227 1.1.1.8 mrg else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) 4228 1.1.1.8 mrg { 4229 1.1.1.8 mrg for (j = i + 1; j < mapnum; j++) 4230 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff) 4231 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff)) 4232 1.1.1.8 mrg break; 4233 1.1.1.8 mrg gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], 4234 1.1.1.10 mrg &kinds[i], true, &refcount_set, 4235 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA); 4236 1.1.1.8 mrg i += j - i - 1; 4237 1.1.1.8 mrg } 4238 1.1.1.11 mrg else if (i + 1 < mapnum 4239 1.1.1.11 mrg && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH 4240 1.1.1.11 mrg || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER 4241 1.1.1.11 mrg && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER))) 4242 1.1.1.10 mrg { 4243 1.1.1.10 mrg /* An attach operation must be processed together with the mapped 4244 1.1.1.10 mrg base-pointer list item. */ 4245 1.1.1.10 mrg gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 4246 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 4247 1.1.1.10 mrg i += 1; 4248 1.1.1.10 mrg } 4249 1.1.1.2 mrg else 4250 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 4251 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 4252 1.1.1.2 mrg else 4253 1.1.1.10 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set); 4254 1.1.1.10 mrg htab_free (refcount_set); 4255 1.1.1.2 mrg } 4256 1.1.1.2 mrg 4257 1.1.1.2 mrg bool 4258 1.1.1.2 mrg gomp_target_task_fn (void *data) 4259 1.1.1.2 mrg { 4260 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data; 4261 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep; 4262 1.1.1.2 mrg 4263 1.1.1.2 mrg if (ttask->fn != NULL) 4264 1.1.1.2 mrg { 4265 1.1.1.2 mrg void *fn_addr; 4266 1.1.1.2 mrg if (devicep == NULL 4267 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4268 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) 4269 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 4270 1.1.1.2 mrg { 4271 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK; 4272 1.1.1.10 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep, 4273 1.1.1.10 mrg ttask->args); 4274 1.1.1.2 mrg return false; 4275 1.1.1.2 mrg } 4276 1.1.1.2 mrg 4277 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED) 4278 1.1.1.2 mrg { 4279 1.1.1.2 mrg if (ttask->tgt) 4280 1.1.1.10 mrg gomp_unmap_vars (ttask->tgt, true, NULL); 4281 1.1.1.2 mrg return false; 4282 1.1.1.2 mrg } 4283 1.1.1.2 mrg 4284 1.1.1.2 mrg void *actual_arguments; 4285 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4286 1.1.1.2 mrg { 4287 1.1.1.2 mrg ttask->tgt = NULL; 4288 1.1.1.2 mrg actual_arguments = ttask->hostaddrs; 4289 1.1.1.2 mrg } 4290 1.1.1.2 mrg else 4291 1.1.1.2 mrg { 4292 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, 4293 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true, 4294 1.1.1.10 mrg NULL, GOMP_MAP_VARS_TARGET); 4295 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start; 4296 1.1.1.2 mrg } 4297 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; 4298 1.1.1.2 mrg 4299 1.1.1.8 mrg assert (devicep->async_run_func); 4300 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, 4301 1.1.1.2 mrg ttask->args, (void *) ttask); 4302 1.1.1.2 mrg return true; 4303 1.1.1.2 mrg } 4304 1.1.1.2 mrg else if (devicep == NULL 4305 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4306 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4307 1.1.1.2 mrg return false; 4308 1.1.1.2 mrg 4309 1.1.1.2 mrg size_t i; 4310 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) 4311 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 4312 1.1.1.2 mrg ttask->kinds, true); 4313 1.1.1.2 mrg else 4314 1.1.1.10 mrg { 4315 1.1.1.10 mrg htab_t refcount_set = htab_create (ttask->mapnum); 4316 1.1.1.10 mrg if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 4317 1.1.1.10 mrg for (i = 0; i < ttask->mapnum; i++) 4318 1.1.1.11 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT 4319 1.1.1.11 mrg || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD) 4320 1.1.1.10 mrg { 4321 1.1.1.10 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], 4322 1.1.1.10 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true, 4323 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 4324 1.1.1.10 mrg i += ttask->sizes[i]; 4325 1.1.1.10 mrg } 4326 1.1.1.10 mrg else 4327 1.1.1.10 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], 4328 1.1.1.10 mrg &ttask->kinds[i], true, &refcount_set, 4329 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA); 4330 1.1.1.10 mrg else 4331 1.1.1.10 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 4332 1.1.1.10 mrg ttask->kinds, &refcount_set); 4333 1.1.1.10 mrg htab_free (refcount_set); 4334 1.1.1.10 mrg } 4335 1.1.1.2 mrg return false; 4336 1.1 mrg } 4337 1.1 mrg 4338 1.1 mrg void 4339 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 4340 1.1 mrg { 4341 1.1 mrg if (thread_limit) 4342 1.1 mrg { 4343 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true); 4344 1.1 mrg icv->thread_limit_var 4345 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 4346 1.1 mrg } 4347 1.1 mrg (void) num_teams; 4348 1.1 mrg } 4349 1.1 mrg 4350 1.1.1.10 mrg bool 4351 1.1.1.10 mrg GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high, 4352 1.1.1.10 mrg unsigned int thread_limit, bool first) 4353 1.1.1.10 mrg { 4354 1.1.1.10 mrg struct gomp_thread *thr = gomp_thread (); 4355 1.1.1.10 mrg if (first) 4356 1.1.1.10 mrg { 4357 1.1.1.10 mrg if (thread_limit) 4358 1.1.1.10 mrg { 4359 1.1.1.10 mrg struct gomp_task_icv *icv = gomp_icv (true); 4360 1.1.1.10 mrg icv->thread_limit_var 4361 1.1.1.10 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 4362 1.1.1.10 mrg } 4363 1.1.1.10 mrg (void) num_teams_high; 4364 1.1.1.10 mrg if (num_teams_low == 0) 4365 1.1.1.10 mrg num_teams_low = 1; 4366 1.1.1.10 mrg thr->num_teams = num_teams_low - 1; 4367 1.1.1.10 mrg thr->team_num = 0; 4368 1.1.1.10 mrg } 4369 1.1.1.10 mrg else if (thr->team_num == thr->num_teams) 4370 1.1.1.10 mrg return false; 4371 1.1.1.10 mrg else 4372 1.1.1.10 mrg ++thr->team_num; 4373 1.1.1.10 mrg return true; 4374 1.1.1.10 mrg } 4375 1.1.1.10 mrg 4376 1.1.1.2 mrg void * 4377 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num) 4378 1.1.1.2 mrg { 4379 1.1.1.11 mrg if (device_num == omp_initial_device 4380 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 4381 1.1.1.2 mrg return malloc (size); 4382 1.1.1.2 mrg 4383 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 4384 1.1.1.2 mrg if (devicep == NULL) 4385 1.1.1.2 mrg return NULL; 4386 1.1.1.2 mrg 4387 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4388 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4389 1.1.1.2 mrg return malloc (size); 4390 1.1.1.2 mrg 4391 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 4392 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size); 4393 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4394 1.1.1.2 mrg return ret; 4395 1.1.1.2 mrg } 4396 1.1.1.2 mrg 4397 1.1.1.2 mrg void 4398 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num) 4399 1.1.1.2 mrg { 4400 1.1.1.11 mrg if (device_num == omp_initial_device 4401 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 4402 1.1.1.2 mrg { 4403 1.1.1.2 mrg free (device_ptr); 4404 1.1.1.2 mrg return; 4405 1.1.1.2 mrg } 4406 1.1.1.2 mrg 4407 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 4408 1.1.1.11 mrg if (devicep == NULL || device_ptr == NULL) 4409 1.1.1.2 mrg return; 4410 1.1.1.2 mrg 4411 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4412 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4413 1.1.1.2 mrg { 4414 1.1.1.2 mrg free (device_ptr); 4415 1.1.1.2 mrg return; 4416 1.1.1.2 mrg } 4417 1.1.1.2 mrg 4418 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 4419 1.1.1.3 mrg gomp_free_device_memory (devicep, device_ptr); 4420 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4421 1.1.1.2 mrg } 4422 1.1.1.2 mrg 4423 1.1.1.2 mrg int 4424 1.1.1.7 mrg omp_target_is_present (const void *ptr, int device_num) 4425 1.1.1.2 mrg { 4426 1.1.1.11 mrg if (device_num == omp_initial_device 4427 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 4428 1.1.1.2 mrg return 1; 4429 1.1.1.2 mrg 4430 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 4431 1.1.1.2 mrg if (devicep == NULL) 4432 1.1.1.2 mrg return 0; 4433 1.1.1.2 mrg 4434 1.1.1.11 mrg if (ptr == NULL) 4435 1.1.1.11 mrg return 1; 4436 1.1.1.11 mrg 4437 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4438 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4439 1.1.1.2 mrg return 1; 4440 1.1.1.2 mrg 4441 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 4442 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 4443 1.1.1.2 mrg struct splay_tree_key_s cur_node; 4444 1.1.1.2 mrg 4445 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr; 4446 1.1.1.2 mrg cur_node.host_end = cur_node.host_start; 4447 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 4448 1.1.1.2 mrg int ret = n != NULL; 4449 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4450 1.1.1.2 mrg return ret; 4451 1.1.1.2 mrg } 4452 1.1.1.2 mrg 4453 1.1.1.11 mrg static int 4454 1.1.1.11 mrg omp_target_memcpy_check (int dst_device_num, int src_device_num, 4455 1.1.1.11 mrg struct gomp_device_descr **dst_devicep, 4456 1.1.1.11 mrg struct gomp_device_descr **src_devicep) 4457 1.1.1.11 mrg { 4458 1.1.1.11 mrg if (dst_device_num != gomp_get_num_devices () 4459 1.1.1.11 mrg /* Above gomp_get_num_devices has to be called unconditionally. */ 4460 1.1.1.11 mrg && dst_device_num != omp_initial_device) 4461 1.1.1.2 mrg { 4462 1.1.1.11 mrg *dst_devicep = resolve_device (dst_device_num, false); 4463 1.1.1.11 mrg if (*dst_devicep == NULL) 4464 1.1.1.2 mrg return EINVAL; 4465 1.1.1.2 mrg 4466 1.1.1.11 mrg if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4467 1.1.1.11 mrg || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4468 1.1.1.11 mrg *dst_devicep = NULL; 4469 1.1.1.2 mrg } 4470 1.1.1.2 mrg 4471 1.1.1.11 mrg if (src_device_num != num_devices_openmp 4472 1.1.1.11 mrg && src_device_num != omp_initial_device) 4473 1.1.1.11 mrg { 4474 1.1.1.11 mrg *src_devicep = resolve_device (src_device_num, false); 4475 1.1.1.11 mrg if (*src_devicep == NULL) 4476 1.1.1.2 mrg return EINVAL; 4477 1.1.1.2 mrg 4478 1.1.1.11 mrg if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4479 1.1.1.11 mrg || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4480 1.1.1.11 mrg *src_devicep = NULL; 4481 1.1.1.2 mrg } 4482 1.1.1.11 mrg 4483 1.1.1.11 mrg return 0; 4484 1.1.1.11 mrg } 4485 1.1.1.11 mrg 4486 1.1.1.11 mrg static int 4487 1.1.1.11 mrg omp_target_memcpy_copy (void *dst, const void *src, size_t length, 4488 1.1.1.11 mrg size_t dst_offset, size_t src_offset, 4489 1.1.1.11 mrg struct gomp_device_descr *dst_devicep, 4490 1.1.1.11 mrg struct gomp_device_descr *src_devicep) 4491 1.1.1.11 mrg { 4492 1.1.1.11 mrg bool ret; 4493 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL) 4494 1.1.1.2 mrg { 4495 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); 4496 1.1.1.2 mrg return 0; 4497 1.1.1.2 mrg } 4498 1.1.1.2 mrg if (src_devicep == NULL) 4499 1.1.1.2 mrg { 4500 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock); 4501 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id, 4502 1.1.1.3 mrg (char *) dst + dst_offset, 4503 1.1.1.3 mrg (char *) src + src_offset, length); 4504 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock); 4505 1.1.1.3 mrg return (ret ? 0 : EINVAL); 4506 1.1.1.2 mrg } 4507 1.1.1.2 mrg if (dst_devicep == NULL) 4508 1.1.1.2 mrg { 4509 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock); 4510 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id, 4511 1.1.1.3 mrg (char *) dst + dst_offset, 4512 1.1.1.3 mrg (char *) src + src_offset, length); 4513 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock); 4514 1.1.1.3 mrg return (ret ? 0 : EINVAL); 4515 1.1.1.2 mrg } 4516 1.1.1.2 mrg if (src_devicep == dst_devicep) 4517 1.1.1.2 mrg { 4518 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock); 4519 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id, 4520 1.1.1.3 mrg (char *) dst + dst_offset, 4521 1.1.1.3 mrg (char *) src + src_offset, length); 4522 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock); 4523 1.1.1.3 mrg return (ret ? 0 : EINVAL); 4524 1.1.1.2 mrg } 4525 1.1.1.2 mrg return EINVAL; 4526 1.1.1.2 mrg } 4527 1.1.1.2 mrg 4528 1.1.1.11 mrg int 4529 1.1.1.11 mrg omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset, 4530 1.1.1.11 mrg size_t src_offset, int dst_device_num, int src_device_num) 4531 1.1.1.11 mrg { 4532 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 4533 1.1.1.11 mrg int ret = omp_target_memcpy_check (dst_device_num, src_device_num, 4534 1.1.1.11 mrg &dst_devicep, &src_devicep); 4535 1.1.1.11 mrg 4536 1.1.1.11 mrg if (ret) 4537 1.1.1.11 mrg return ret; 4538 1.1.1.11 mrg 4539 1.1.1.11 mrg ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset, 4540 1.1.1.11 mrg dst_devicep, src_devicep); 4541 1.1.1.11 mrg 4542 1.1.1.11 mrg return ret; 4543 1.1.1.11 mrg } 4544 1.1.1.11 mrg 4545 1.1.1.11 mrg typedef struct 4546 1.1.1.11 mrg { 4547 1.1.1.11 mrg void *dst; 4548 1.1.1.11 mrg const void *src; 4549 1.1.1.11 mrg size_t length; 4550 1.1.1.11 mrg size_t dst_offset; 4551 1.1.1.11 mrg size_t src_offset; 4552 1.1.1.11 mrg struct gomp_device_descr *dst_devicep; 4553 1.1.1.11 mrg struct gomp_device_descr *src_devicep; 4554 1.1.1.11 mrg } omp_target_memcpy_data; 4555 1.1.1.11 mrg 4556 1.1.1.11 mrg static void 4557 1.1.1.11 mrg omp_target_memcpy_async_helper (void *args) 4558 1.1.1.11 mrg { 4559 1.1.1.11 mrg omp_target_memcpy_data *a = args; 4560 1.1.1.11 mrg if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset, 4561 1.1.1.11 mrg a->src_offset, a->dst_devicep, a->src_devicep)) 4562 1.1.1.11 mrg gomp_fatal ("omp_target_memcpy failed"); 4563 1.1.1.11 mrg } 4564 1.1.1.11 mrg 4565 1.1.1.11 mrg int 4566 1.1.1.11 mrg omp_target_memcpy_async (void *dst, const void *src, size_t length, 4567 1.1.1.11 mrg size_t dst_offset, size_t src_offset, 4568 1.1.1.11 mrg int dst_device_num, int src_device_num, 4569 1.1.1.11 mrg int depobj_count, omp_depend_t *depobj_list) 4570 1.1.1.11 mrg { 4571 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 4572 1.1.1.11 mrg unsigned int flags = 0; 4573 1.1.1.11 mrg void *depend[depobj_count + 5]; 4574 1.1.1.11 mrg int i; 4575 1.1.1.11 mrg int check = omp_target_memcpy_check (dst_device_num, src_device_num, 4576 1.1.1.11 mrg &dst_devicep, &src_devicep); 4577 1.1.1.11 mrg 4578 1.1.1.11 mrg omp_target_memcpy_data s = { 4579 1.1.1.11 mrg .dst = dst, 4580 1.1.1.11 mrg .src = src, 4581 1.1.1.11 mrg .length = length, 4582 1.1.1.11 mrg .dst_offset = dst_offset, 4583 1.1.1.11 mrg .src_offset = src_offset, 4584 1.1.1.11 mrg .dst_devicep = dst_devicep, 4585 1.1.1.11 mrg .src_devicep = src_devicep 4586 1.1.1.11 mrg }; 4587 1.1.1.11 mrg 4588 1.1.1.11 mrg if (check) 4589 1.1.1.11 mrg return check; 4590 1.1.1.11 mrg 4591 1.1.1.11 mrg if (depobj_count > 0 && depobj_list != NULL) 4592 1.1.1.11 mrg { 4593 1.1.1.11 mrg flags |= GOMP_TASK_FLAG_DEPEND; 4594 1.1.1.11 mrg depend[0] = 0; 4595 1.1.1.11 mrg depend[1] = (void *) (uintptr_t) depobj_count; 4596 1.1.1.11 mrg depend[2] = depend[3] = depend[4] = 0; 4597 1.1.1.11 mrg for (i = 0; i < depobj_count; ++i) 4598 1.1.1.11 mrg depend[i + 5] = &depobj_list[i]; 4599 1.1.1.11 mrg } 4600 1.1.1.11 mrg 4601 1.1.1.11 mrg GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s), 4602 1.1.1.11 mrg __alignof__ (s), true, flags, depend, 0, NULL); 4603 1.1.1.11 mrg 4604 1.1.1.11 mrg return 0; 4605 1.1.1.11 mrg } 4606 1.1.1.11 mrg 4607 1.1.1.2 mrg static int 4608 1.1.1.7 mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, 4609 1.1.1.2 mrg int num_dims, const size_t *volume, 4610 1.1.1.2 mrg const size_t *dst_offsets, 4611 1.1.1.2 mrg const size_t *src_offsets, 4612 1.1.1.2 mrg const size_t *dst_dimensions, 4613 1.1.1.2 mrg const size_t *src_dimensions, 4614 1.1.1.2 mrg struct gomp_device_descr *dst_devicep, 4615 1.1.1.11 mrg struct gomp_device_descr *src_devicep, 4616 1.1.1.11 mrg size_t *tmp_size, void **tmp) 4617 1.1.1.2 mrg { 4618 1.1.1.2 mrg size_t dst_slice = element_size; 4619 1.1.1.2 mrg size_t src_slice = element_size; 4620 1.1.1.2 mrg size_t j, dst_off, src_off, length; 4621 1.1.1.2 mrg int i, ret; 4622 1.1.1.2 mrg 4623 1.1.1.2 mrg if (num_dims == 1) 4624 1.1.1.2 mrg { 4625 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length) 4626 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) 4627 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) 4628 1.1.1.2 mrg return EINVAL; 4629 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL) 4630 1.1.1.3 mrg { 4631 1.1.1.7 mrg memcpy ((char *) dst + dst_off, (const char *) src + src_off, 4632 1.1.1.7 mrg length); 4633 1.1.1.3 mrg ret = 1; 4634 1.1.1.3 mrg } 4635 1.1.1.2 mrg else if (src_devicep == NULL) 4636 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id, 4637 1.1.1.3 mrg (char *) dst + dst_off, 4638 1.1.1.7 mrg (const char *) src + src_off, 4639 1.1.1.7 mrg length); 4640 1.1.1.2 mrg else if (dst_devicep == NULL) 4641 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id, 4642 1.1.1.3 mrg (char *) dst + dst_off, 4643 1.1.1.7 mrg (const char *) src + src_off, 4644 1.1.1.7 mrg length); 4645 1.1.1.2 mrg else if (src_devicep == dst_devicep) 4646 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id, 4647 1.1.1.3 mrg (char *) dst + dst_off, 4648 1.1.1.7 mrg (const char *) src + src_off, 4649 1.1.1.7 mrg length); 4650 1.1.1.2 mrg else 4651 1.1.1.11 mrg { 4652 1.1.1.11 mrg if (*tmp_size == 0) 4653 1.1.1.11 mrg { 4654 1.1.1.11 mrg *tmp_size = length; 4655 1.1.1.11 mrg *tmp = malloc (length); 4656 1.1.1.11 mrg if (*tmp == NULL) 4657 1.1.1.11 mrg return ENOMEM; 4658 1.1.1.11 mrg } 4659 1.1.1.11 mrg else if (*tmp_size < length) 4660 1.1.1.11 mrg { 4661 1.1.1.11 mrg *tmp_size = length; 4662 1.1.1.11 mrg free (*tmp); 4663 1.1.1.11 mrg *tmp = malloc (length); 4664 1.1.1.11 mrg if (*tmp == NULL) 4665 1.1.1.11 mrg return ENOMEM; 4666 1.1.1.11 mrg } 4667 1.1.1.11 mrg ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp, 4668 1.1.1.11 mrg (const char *) src + src_off, 4669 1.1.1.11 mrg length); 4670 1.1.1.11 mrg if (ret == 1) 4671 1.1.1.11 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id, 4672 1.1.1.11 mrg (char *) dst + dst_off, *tmp, 4673 1.1.1.11 mrg length); 4674 1.1.1.11 mrg } 4675 1.1.1.3 mrg return ret ? 0 : EINVAL; 4676 1.1.1.2 mrg } 4677 1.1.1.2 mrg 4678 1.1.1.11 mrg /* host->device, device->host and intra device. */ 4679 1.1.1.11 mrg if (num_dims == 2 4680 1.1.1.11 mrg && ((src_devicep 4681 1.1.1.11 mrg && src_devicep == dst_devicep 4682 1.1.1.11 mrg && src_devicep->memcpy2d_func) 4683 1.1.1.11 mrg || (!src_devicep != !dst_devicep 4684 1.1.1.11 mrg && ((src_devicep && src_devicep->memcpy2d_func) 4685 1.1.1.11 mrg || (dst_devicep && dst_devicep->memcpy2d_func))))) 4686 1.1.1.11 mrg { 4687 1.1.1.11 mrg size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1; 4688 1.1.1.11 mrg int dst_id = dst_devicep ? dst_devicep->target_id : -1; 4689 1.1.1.11 mrg int src_id = src_devicep ? src_devicep->target_id : -1; 4690 1.1.1.11 mrg struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep; 4691 1.1.1.11 mrg 4692 1.1.1.11 mrg if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1) 4693 1.1.1.11 mrg || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1) 4694 1.1.1.11 mrg || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1) 4695 1.1.1.11 mrg || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1) 4696 1.1.1.11 mrg || __builtin_mul_overflow (src_offsets[1], element_size, 4697 1.1.1.11 mrg &src_off_sz1)) 4698 1.1.1.11 mrg return EINVAL; 4699 1.1.1.11 mrg ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0], 4700 1.1.1.11 mrg dst, dst_off_sz1, dst_offsets[0], dst_sz1, 4701 1.1.1.11 mrg src, src_off_sz1, src_offsets[0], src_sz1); 4702 1.1.1.11 mrg if (ret != -1) 4703 1.1.1.11 mrg return ret ? 0 : EINVAL; 4704 1.1.1.11 mrg } 4705 1.1.1.11 mrg else if (num_dims == 3 4706 1.1.1.11 mrg && ((src_devicep 4707 1.1.1.11 mrg && src_devicep == dst_devicep 4708 1.1.1.11 mrg && src_devicep->memcpy3d_func) 4709 1.1.1.11 mrg || (!src_devicep != !dst_devicep 4710 1.1.1.11 mrg && ((src_devicep && src_devicep->memcpy3d_func) 4711 1.1.1.11 mrg || (dst_devicep && dst_devicep->memcpy3d_func))))) 4712 1.1.1.11 mrg { 4713 1.1.1.11 mrg size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2; 4714 1.1.1.11 mrg int dst_id = dst_devicep ? dst_devicep->target_id : -1; 4715 1.1.1.11 mrg int src_id = src_devicep ? src_devicep->target_id : -1; 4716 1.1.1.11 mrg struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep; 4717 1.1.1.11 mrg 4718 1.1.1.11 mrg if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2) 4719 1.1.1.11 mrg || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2) 4720 1.1.1.11 mrg || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2) 4721 1.1.1.11 mrg || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2) 4722 1.1.1.11 mrg || __builtin_mul_overflow (src_offsets[2], element_size, 4723 1.1.1.11 mrg &src_off_sz2)) 4724 1.1.1.11 mrg return EINVAL; 4725 1.1.1.11 mrg ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0], 4726 1.1.1.11 mrg dst, dst_off_sz2, dst_offsets[1], 4727 1.1.1.11 mrg dst_offsets[0], dst_sz2, dst_dimensions[1], 4728 1.1.1.11 mrg src, src_off_sz2, src_offsets[1], 4729 1.1.1.11 mrg src_offsets[0], src_sz2, src_dimensions[1]); 4730 1.1.1.11 mrg if (ret != -1) 4731 1.1.1.11 mrg return ret ? 0 : EINVAL; 4732 1.1.1.11 mrg } 4733 1.1.1.2 mrg 4734 1.1.1.2 mrg for (i = 1; i < num_dims; i++) 4735 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) 4736 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) 4737 1.1.1.2 mrg return EINVAL; 4738 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) 4739 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) 4740 1.1.1.2 mrg return EINVAL; 4741 1.1.1.2 mrg for (j = 0; j < volume[0]; j++) 4742 1.1.1.2 mrg { 4743 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, 4744 1.1.1.7 mrg (const char *) src + src_off, 4745 1.1.1.2 mrg element_size, num_dims - 1, 4746 1.1.1.2 mrg volume + 1, dst_offsets + 1, 4747 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1, 4748 1.1.1.2 mrg src_dimensions + 1, dst_devicep, 4749 1.1.1.11 mrg src_devicep, tmp_size, tmp); 4750 1.1.1.2 mrg if (ret) 4751 1.1.1.2 mrg return ret; 4752 1.1.1.2 mrg dst_off += dst_slice; 4753 1.1.1.2 mrg src_off += src_slice; 4754 1.1.1.2 mrg } 4755 1.1.1.2 mrg return 0; 4756 1.1.1.2 mrg } 4757 1.1.1.2 mrg 4758 1.1.1.11 mrg static int 4759 1.1.1.11 mrg omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num, 4760 1.1.1.11 mrg int src_device_num, 4761 1.1.1.11 mrg struct gomp_device_descr **dst_devicep, 4762 1.1.1.11 mrg struct gomp_device_descr **src_devicep) 4763 1.1.1.11 mrg { 4764 1.1.1.11 mrg if (!dst && !src) 4765 1.1.1.11 mrg return INT_MAX; 4766 1.1.1.11 mrg 4767 1.1.1.11 mrg int ret = omp_target_memcpy_check (dst_device_num, src_device_num, 4768 1.1.1.11 mrg dst_devicep, src_devicep); 4769 1.1.1.11 mrg if (ret) 4770 1.1.1.11 mrg return ret; 4771 1.1.1.11 mrg 4772 1.1.1.11 mrg return 0; 4773 1.1.1.11 mrg } 4774 1.1.1.11 mrg 4775 1.1.1.11 mrg static int 4776 1.1.1.11 mrg omp_target_memcpy_rect_copy (void *dst, const void *src, 4777 1.1.1.11 mrg size_t element_size, int num_dims, 4778 1.1.1.11 mrg const size_t *volume, const size_t *dst_offsets, 4779 1.1.1.11 mrg const size_t *src_offsets, 4780 1.1.1.11 mrg const size_t *dst_dimensions, 4781 1.1.1.11 mrg const size_t *src_dimensions, 4782 1.1.1.11 mrg struct gomp_device_descr *dst_devicep, 4783 1.1.1.11 mrg struct gomp_device_descr *src_devicep) 4784 1.1.1.11 mrg { 4785 1.1.1.11 mrg size_t tmp_size = 0; 4786 1.1.1.11 mrg void *tmp = NULL; 4787 1.1.1.11 mrg bool lock_src; 4788 1.1.1.11 mrg bool lock_dst; 4789 1.1.1.11 mrg 4790 1.1.1.11 mrg lock_src = src_devicep != NULL; 4791 1.1.1.11 mrg lock_dst = dst_devicep != NULL && src_devicep != dst_devicep; 4792 1.1.1.11 mrg if (lock_src) 4793 1.1.1.11 mrg gomp_mutex_lock (&src_devicep->lock); 4794 1.1.1.11 mrg if (lock_dst) 4795 1.1.1.11 mrg gomp_mutex_lock (&dst_devicep->lock); 4796 1.1.1.11 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, 4797 1.1.1.11 mrg volume, dst_offsets, src_offsets, 4798 1.1.1.11 mrg dst_dimensions, src_dimensions, 4799 1.1.1.11 mrg dst_devicep, src_devicep, 4800 1.1.1.11 mrg &tmp_size, &tmp); 4801 1.1.1.11 mrg if (lock_src) 4802 1.1.1.11 mrg gomp_mutex_unlock (&src_devicep->lock); 4803 1.1.1.11 mrg if (lock_dst) 4804 1.1.1.11 mrg gomp_mutex_unlock (&dst_devicep->lock); 4805 1.1.1.11 mrg if (tmp) 4806 1.1.1.11 mrg free (tmp); 4807 1.1.1.11 mrg 4808 1.1.1.11 mrg return ret; 4809 1.1.1.11 mrg } 4810 1.1.1.11 mrg 4811 1.1.1.2 mrg int 4812 1.1.1.7 mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, 4813 1.1.1.2 mrg int num_dims, const size_t *volume, 4814 1.1.1.2 mrg const size_t *dst_offsets, 4815 1.1.1.2 mrg const size_t *src_offsets, 4816 1.1.1.2 mrg const size_t *dst_dimensions, 4817 1.1.1.2 mrg const size_t *src_dimensions, 4818 1.1.1.2 mrg int dst_device_num, int src_device_num) 4819 1.1.1.2 mrg { 4820 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 4821 1.1.1.2 mrg 4822 1.1.1.11 mrg int check = omp_target_memcpy_rect_check (dst, src, dst_device_num, 4823 1.1.1.11 mrg src_device_num, &dst_devicep, 4824 1.1.1.11 mrg &src_devicep); 4825 1.1.1.11 mrg 4826 1.1.1.11 mrg if (check) 4827 1.1.1.11 mrg return check; 4828 1.1.1.11 mrg 4829 1.1.1.11 mrg int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims, 4830 1.1.1.11 mrg volume, dst_offsets, src_offsets, 4831 1.1.1.11 mrg dst_dimensions, src_dimensions, 4832 1.1.1.11 mrg dst_devicep, src_devicep); 4833 1.1.1.2 mrg 4834 1.1.1.11 mrg return ret; 4835 1.1.1.11 mrg } 4836 1.1.1.2 mrg 4837 1.1.1.11 mrg typedef struct 4838 1.1.1.11 mrg { 4839 1.1.1.11 mrg void *dst; 4840 1.1.1.11 mrg const void *src; 4841 1.1.1.11 mrg size_t element_size; 4842 1.1.1.11 mrg const size_t *volume; 4843 1.1.1.11 mrg const size_t *dst_offsets; 4844 1.1.1.11 mrg const size_t *src_offsets; 4845 1.1.1.11 mrg const size_t *dst_dimensions; 4846 1.1.1.11 mrg const size_t *src_dimensions; 4847 1.1.1.11 mrg struct gomp_device_descr *dst_devicep; 4848 1.1.1.11 mrg struct gomp_device_descr *src_devicep; 4849 1.1.1.11 mrg int num_dims; 4850 1.1.1.11 mrg } omp_target_memcpy_rect_data; 4851 1.1.1.2 mrg 4852 1.1.1.11 mrg static void 4853 1.1.1.11 mrg omp_target_memcpy_rect_async_helper (void *args) 4854 1.1.1.11 mrg { 4855 1.1.1.11 mrg omp_target_memcpy_rect_data *a = args; 4856 1.1.1.11 mrg int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size, 4857 1.1.1.11 mrg a->num_dims, a->volume, a->dst_offsets, 4858 1.1.1.11 mrg a->src_offsets, a->dst_dimensions, 4859 1.1.1.11 mrg a->src_dimensions, a->dst_devicep, 4860 1.1.1.11 mrg a->src_devicep); 4861 1.1.1.11 mrg if (ret) 4862 1.1.1.11 mrg gomp_fatal ("omp_target_memcpy_rect failed"); 4863 1.1.1.11 mrg } 4864 1.1.1.2 mrg 4865 1.1.1.11 mrg int 4866 1.1.1.11 mrg omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, 4867 1.1.1.11 mrg int num_dims, const size_t *volume, 4868 1.1.1.11 mrg const size_t *dst_offsets, 4869 1.1.1.11 mrg const size_t *src_offsets, 4870 1.1.1.11 mrg const size_t *dst_dimensions, 4871 1.1.1.11 mrg const size_t *src_dimensions, 4872 1.1.1.11 mrg int dst_device_num, int src_device_num, 4873 1.1.1.11 mrg int depobj_count, omp_depend_t *depobj_list) 4874 1.1.1.11 mrg { 4875 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 4876 1.1.1.11 mrg unsigned flags = 0; 4877 1.1.1.11 mrg int check = omp_target_memcpy_rect_check (dst, src, dst_device_num, 4878 1.1.1.11 mrg src_device_num, &dst_devicep, 4879 1.1.1.11 mrg &src_devicep); 4880 1.1.1.11 mrg void *depend[depobj_count + 5]; 4881 1.1.1.11 mrg int i; 4882 1.1.1.2 mrg 4883 1.1.1.11 mrg omp_target_memcpy_rect_data s = { 4884 1.1.1.11 mrg .dst = dst, 4885 1.1.1.11 mrg .src = src, 4886 1.1.1.11 mrg .element_size = element_size, 4887 1.1.1.11 mrg .num_dims = num_dims, 4888 1.1.1.11 mrg .volume = volume, 4889 1.1.1.11 mrg .dst_offsets = dst_offsets, 4890 1.1.1.11 mrg .src_offsets = src_offsets, 4891 1.1.1.11 mrg .dst_dimensions = dst_dimensions, 4892 1.1.1.11 mrg .src_dimensions = src_dimensions, 4893 1.1.1.11 mrg .dst_devicep = dst_devicep, 4894 1.1.1.11 mrg .src_devicep = src_devicep 4895 1.1.1.11 mrg }; 4896 1.1.1.11 mrg 4897 1.1.1.11 mrg if (check) 4898 1.1.1.11 mrg return check; 4899 1.1.1.11 mrg 4900 1.1.1.11 mrg if (depobj_count > 0 && depobj_list != NULL) 4901 1.1.1.11 mrg { 4902 1.1.1.11 mrg flags |= GOMP_TASK_FLAG_DEPEND; 4903 1.1.1.11 mrg depend[0] = 0; 4904 1.1.1.11 mrg depend[1] = (void *) (uintptr_t) depobj_count; 4905 1.1.1.11 mrg depend[2] = depend[3] = depend[4] = 0; 4906 1.1.1.11 mrg for (i = 0; i < depobj_count; ++i) 4907 1.1.1.11 mrg depend[i + 5] = &depobj_list[i]; 4908 1.1.1.2 mrg } 4909 1.1.1.2 mrg 4910 1.1.1.11 mrg GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s), 4911 1.1.1.11 mrg __alignof__ (s), true, flags, depend, 0, NULL); 4912 1.1.1.2 mrg 4913 1.1.1.11 mrg return 0; 4914 1.1.1.2 mrg } 4915 1.1.1.2 mrg 4916 1.1.1.2 mrg int 4917 1.1.1.7 mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, 4918 1.1.1.7 mrg size_t size, size_t device_offset, int device_num) 4919 1.1.1.2 mrg { 4920 1.1.1.11 mrg if (device_num == omp_initial_device 4921 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 4922 1.1.1.2 mrg return EINVAL; 4923 1.1.1.2 mrg 4924 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 4925 1.1.1.2 mrg if (devicep == NULL) 4926 1.1.1.2 mrg return EINVAL; 4927 1.1.1.2 mrg 4928 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 4929 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 4930 1.1.1.2 mrg return EINVAL; 4931 1.1.1.2 mrg 4932 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 4933 1.1.1.2 mrg 4934 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 4935 1.1.1.2 mrg struct splay_tree_key_s cur_node; 4936 1.1.1.2 mrg int ret = EINVAL; 4937 1.1.1.2 mrg 4938 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr; 4939 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size; 4940 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 4941 1.1.1.2 mrg if (n) 4942 1.1.1.2 mrg { 4943 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset 4944 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset 4945 1.1.1.2 mrg && n->host_start <= cur_node.host_start 4946 1.1.1.2 mrg && n->host_end >= cur_node.host_end) 4947 1.1.1.2 mrg ret = 0; 4948 1.1.1.2 mrg } 4949 1.1.1.2 mrg else 4950 1.1.1.2 mrg { 4951 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 4952 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array)); 4953 1.1.1.2 mrg tgt->refcount = 1; 4954 1.1.1.2 mrg tgt->tgt_start = 0; 4955 1.1.1.2 mrg tgt->tgt_end = 0; 4956 1.1.1.2 mrg tgt->to_free = NULL; 4957 1.1.1.2 mrg tgt->prev = NULL; 4958 1.1.1.2 mrg tgt->list_count = 0; 4959 1.1.1.2 mrg tgt->device_descr = devicep; 4960 1.1.1.2 mrg splay_tree_node array = tgt->array; 4961 1.1.1.2 mrg splay_tree_key k = &array->key; 4962 1.1.1.2 mrg k->host_start = cur_node.host_start; 4963 1.1.1.2 mrg k->host_end = cur_node.host_end; 4964 1.1.1.2 mrg k->tgt = tgt; 4965 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset; 4966 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY; 4967 1.1.1.7 mrg k->dynamic_refcount = 0; 4968 1.1.1.8 mrg k->aux = NULL; 4969 1.1.1.2 mrg array->left = NULL; 4970 1.1.1.2 mrg array->right = NULL; 4971 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array); 4972 1.1.1.2 mrg ret = 0; 4973 1.1.1.2 mrg } 4974 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 4975 1.1.1.2 mrg return ret; 4976 1.1.1.2 mrg } 4977 1.1.1.2 mrg 4978 1.1.1.2 mrg int 4979 1.1.1.7 mrg omp_target_disassociate_ptr (const void *ptr, int device_num) 4980 1.1.1.2 mrg { 4981 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 4982 1.1.1.2 mrg if (devicep == NULL) 4983 1.1.1.2 mrg return EINVAL; 4984 1.1.1.2 mrg 4985 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 4986 1.1.1.2 mrg return EINVAL; 4987 1.1.1.2 mrg 4988 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 4989 1.1.1.2 mrg 4990 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 4991 1.1.1.2 mrg struct splay_tree_key_s cur_node; 4992 1.1.1.2 mrg int ret = EINVAL; 4993 1.1.1.2 mrg 4994 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr; 4995 1.1.1.2 mrg cur_node.host_end = cur_node.host_start; 4996 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 4997 1.1.1.2 mrg if (n 4998 1.1.1.2 mrg && n->host_start == cur_node.host_start 4999 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY 5000 1.1.1.2 mrg && n->tgt->tgt_start == 0 5001 1.1.1.2 mrg && n->tgt->to_free == NULL 5002 1.1.1.2 mrg && n->tgt->refcount == 1 5003 1.1.1.2 mrg && n->tgt->list_count == 0) 5004 1.1.1.2 mrg { 5005 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n); 5006 1.1.1.2 mrg gomp_unmap_tgt (n->tgt); 5007 1.1.1.2 mrg ret = 0; 5008 1.1.1.2 mrg } 5009 1.1.1.2 mrg 5010 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 5011 1.1.1.2 mrg return ret; 5012 1.1.1.2 mrg } 5013 1.1.1.2 mrg 5014 1.1.1.11 mrg void * 5015 1.1.1.11 mrg omp_get_mapped_ptr (const void *ptr, int device_num) 5016 1.1.1.11 mrg { 5017 1.1.1.11 mrg if (device_num == omp_initial_device 5018 1.1.1.11 mrg || device_num == omp_get_initial_device ()) 5019 1.1.1.11 mrg return (void *) ptr; 5020 1.1.1.11 mrg 5021 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 5022 1.1.1.11 mrg if (devicep == NULL) 5023 1.1.1.11 mrg return NULL; 5024 1.1.1.11 mrg 5025 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 5026 1.1.1.11 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 5027 1.1.1.11 mrg return (void *) ptr; 5028 1.1.1.11 mrg 5029 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock); 5030 1.1.1.11 mrg 5031 1.1.1.11 mrg struct splay_tree_s *mem_map = &devicep->mem_map; 5032 1.1.1.11 mrg struct splay_tree_key_s cur_node; 5033 1.1.1.11 mrg void *ret = NULL; 5034 1.1.1.11 mrg 5035 1.1.1.11 mrg cur_node.host_start = (uintptr_t) ptr; 5036 1.1.1.11 mrg cur_node.host_end = cur_node.host_start; 5037 1.1.1.11 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 5038 1.1.1.11 mrg 5039 1.1.1.11 mrg if (n) 5040 1.1.1.11 mrg { 5041 1.1.1.11 mrg uintptr_t offset = cur_node.host_start - n->host_start; 5042 1.1.1.11 mrg ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset); 5043 1.1.1.11 mrg } 5044 1.1.1.11 mrg 5045 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock); 5046 1.1.1.11 mrg 5047 1.1.1.11 mrg return ret; 5048 1.1.1.11 mrg } 5049 1.1.1.11 mrg 5050 1.1.1.11 mrg int 5051 1.1.1.11 mrg omp_target_is_accessible (const void *ptr, size_t size, int device_num) 5052 1.1.1.11 mrg { 5053 1.1.1.11 mrg if (device_num == omp_initial_device 5054 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 5055 1.1.1.11 mrg return true; 5056 1.1.1.11 mrg 5057 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 5058 1.1.1.11 mrg if (devicep == NULL) 5059 1.1.1.11 mrg return false; 5060 1.1.1.11 mrg 5061 1.1.1.11 mrg /* TODO: Unified shared memory must be handled when available. */ 5062 1.1.1.11 mrg 5063 1.1.1.11 mrg return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM; 5064 1.1.1.11 mrg } 5065 1.1.1.11 mrg 5066 1.1.1.7 mrg int 5067 1.1.1.7 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num) 5068 1.1.1.7 mrg { 5069 1.1.1.7 mrg (void) kind; 5070 1.1.1.11 mrg if (device_num == omp_initial_device 5071 1.1.1.11 mrg || device_num == gomp_get_num_devices ()) 5072 1.1.1.7 mrg return gomp_pause_host (); 5073 1.1.1.11 mrg 5074 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false); 5075 1.1.1.11 mrg if (devicep == NULL) 5076 1.1.1.7 mrg return -1; 5077 1.1.1.11 mrg 5078 1.1.1.7 mrg /* Do nothing for target devices for now. */ 5079 1.1.1.7 mrg return 0; 5080 1.1.1.7 mrg } 5081 1.1.1.7 mrg 5082 1.1.1.7 mrg int 5083 1.1.1.7 mrg omp_pause_resource_all (omp_pause_resource_t kind) 5084 1.1.1.7 mrg { 5085 1.1.1.7 mrg (void) kind; 5086 1.1.1.7 mrg if (gomp_pause_host ()) 5087 1.1.1.7 mrg return -1; 5088 1.1.1.7 mrg /* Do nothing for target devices for now. */ 5089 1.1.1.7 mrg return 0; 5090 1.1.1.7 mrg } 5091 1.1.1.7 mrg 5092 1.1.1.7 mrg ialias (omp_pause_resource) 5093 1.1.1.7 mrg ialias (omp_pause_resource_all) 5094 1.1.1.7 mrg 5095 1.1 mrg #ifdef PLUGIN_SUPPORT 5096 1.1 mrg 5097 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed 5098 1.1 mrg in PLUGIN_NAME. 5099 1.1 mrg The handles of the found functions are stored in the corresponding fields 5100 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 5101 1.1 mrg 5102 1.1 mrg static bool 5103 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device, 5104 1.1 mrg const char *plugin_name) 5105 1.1 mrg { 5106 1.1 mrg const char *err = NULL, *last_missing = NULL; 5107 1.1 mrg 5108 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 5109 1.1 mrg if (!plugin_handle) 5110 1.1.1.10 mrg #if OFFLOAD_DEFAULTED 5111 1.1.1.10 mrg return 0; 5112 1.1.1.10 mrg #else 5113 1.1.1.2 mrg goto dl_fail; 5114 1.1.1.10 mrg #endif 5115 1.1 mrg 5116 1.1 mrg /* Check if all required functions are available in the plugin and store 5117 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL, 5118 1.1.1.2 mrg so we don't need to check dlerror all the time. */ 5119 1.1 mrg #define DLSYM(f) \ 5120 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ 5121 1.1.1.2 mrg goto dl_fail 5122 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if 5123 1.1.1.2 mrg failed, true otherwise. */ 5124 1.1.1.2 mrg #define DLSYM_OPT(f, n) \ 5125 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ 5126 1.1.1.2 mrg || (last_missing = #n, 0)) 5127 1.1.1.2 mrg 5128 1.1.1.2 mrg DLSYM (version); 5129 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION) 5130 1.1.1.2 mrg { 5131 1.1.1.2 mrg err = "plugin version mismatch"; 5132 1.1.1.2 mrg goto fail; 5133 1.1.1.2 mrg } 5134 1.1 mrg 5135 1.1 mrg DLSYM (get_name); 5136 1.1 mrg DLSYM (get_caps); 5137 1.1 mrg DLSYM (get_type); 5138 1.1 mrg DLSYM (get_num_devices); 5139 1.1 mrg DLSYM (init_device); 5140 1.1 mrg DLSYM (fini_device); 5141 1.1 mrg DLSYM (load_image); 5142 1.1 mrg DLSYM (unload_image); 5143 1.1 mrg DLSYM (alloc); 5144 1.1 mrg DLSYM (free); 5145 1.1 mrg DLSYM (dev2host); 5146 1.1 mrg DLSYM (host2dev); 5147 1.1.1.11 mrg DLSYM_OPT (memcpy2d, memcpy2d); 5148 1.1.1.11 mrg DLSYM_OPT (memcpy3d, memcpy3d); 5149 1.1 mrg device->capabilities = device->get_caps_func (); 5150 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 5151 1.1.1.2 mrg { 5152 1.1.1.2 mrg DLSYM (run); 5153 1.1.1.8 mrg DLSYM_OPT (async_run, async_run); 5154 1.1.1.2 mrg DLSYM_OPT (can_run, can_run); 5155 1.1.1.2 mrg DLSYM (dev2dev); 5156 1.1.1.2 mrg } 5157 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 5158 1.1 mrg { 5159 1.1.1.3 mrg if (!DLSYM_OPT (openacc.exec, openacc_exec) 5160 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data, 5161 1.1.1.2 mrg openacc_create_thread_data) 5162 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data, 5163 1.1.1.8 mrg openacc_destroy_thread_data) 5164 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) 5165 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) 5166 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.test, openacc_async_test) 5167 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) 5168 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) 5169 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.queue_callback, 5170 1.1.1.8 mrg openacc_async_queue_callback) 5171 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) 5172 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) 5173 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) 5174 1.1.1.8 mrg || !DLSYM_OPT (openacc.get_property, openacc_get_property)) 5175 1.1 mrg { 5176 1.1.1.2 mrg /* Require all the OpenACC handlers if we have 5177 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */ 5178 1.1 mrg err = "plugin missing OpenACC handler function"; 5179 1.1.1.2 mrg goto fail; 5180 1.1 mrg } 5181 1.1.1.2 mrg 5182 1.1.1.2 mrg unsigned cuda = 0; 5183 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device, 5184 1.1.1.3 mrg openacc_cuda_get_current_device); 5185 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context, 5186 1.1.1.3 mrg openacc_cuda_get_current_context); 5187 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); 5188 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); 5189 1.1.1.2 mrg if (cuda && cuda != 4) 5190 1.1 mrg { 5191 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */ 5192 1.1 mrg err = "plugin missing OpenACC CUDA handler function"; 5193 1.1.1.2 mrg goto fail; 5194 1.1 mrg } 5195 1.1 mrg } 5196 1.1 mrg #undef DLSYM 5197 1.1 mrg #undef DLSYM_OPT 5198 1.1 mrg 5199 1.1.1.2 mrg return 1; 5200 1.1.1.2 mrg 5201 1.1.1.2 mrg dl_fail: 5202 1.1.1.2 mrg err = dlerror (); 5203 1.1.1.2 mrg fail: 5204 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err); 5205 1.1.1.2 mrg if (last_missing) 5206 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing); 5207 1.1.1.2 mrg if (plugin_handle) 5208 1.1.1.2 mrg dlclose (plugin_handle); 5209 1.1.1.2 mrg 5210 1.1.1.2 mrg return 0; 5211 1.1.1.2 mrg } 5212 1.1.1.2 mrg 5213 1.1.1.2 mrg /* This function finalizes all initialized devices. */ 5214 1.1.1.2 mrg 5215 1.1.1.2 mrg static void 5216 1.1.1.2 mrg gomp_target_fini (void) 5217 1.1.1.2 mrg { 5218 1.1.1.2 mrg int i; 5219 1.1.1.2 mrg for (i = 0; i < num_devices; i++) 5220 1.1 mrg { 5221 1.1.1.3 mrg bool ret = true; 5222 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i]; 5223 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock); 5224 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED) 5225 1.1.1.8 mrg ret = gomp_fini_device (devicep); 5226 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock); 5227 1.1.1.3 mrg if (!ret) 5228 1.1.1.3 mrg gomp_fatal ("device finalization failed"); 5229 1.1 mrg } 5230 1.1 mrg } 5231 1.1 mrg 5232 1.1.1.7 mrg /* This function initializes the runtime for offloading. 5233 1.1.1.7 mrg It parses the list of offload plugins, and tries to load these. 5234 1.1.1.7 mrg On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 5235 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for 5236 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 5237 1.1 mrg by the others. */ 5238 1.1 mrg 5239 1.1 mrg static void 5240 1.1 mrg gomp_target_init (void) 5241 1.1 mrg { 5242 1.1 mrg const char *prefix ="libgomp-plugin-"; 5243 1.1 mrg const char *suffix = SONAME_SUFFIX (1); 5244 1.1 mrg const char *cur, *next; 5245 1.1 mrg char *plugin_name; 5246 1.1.1.10 mrg int i, new_num_devs; 5247 1.1.1.10 mrg int num_devs = 0, num_devs_openmp; 5248 1.1.1.10 mrg struct gomp_device_descr *devs = NULL; 5249 1.1 mrg 5250 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED) 5251 1.1.1.10 mrg return; 5252 1.1 mrg 5253 1.1.1.7 mrg cur = OFFLOAD_PLUGINS; 5254 1.1 mrg if (*cur) 5255 1.1 mrg do 5256 1.1 mrg { 5257 1.1 mrg struct gomp_device_descr current_device; 5258 1.1.1.6 mrg size_t prefix_len, suffix_len, cur_len; 5259 1.1 mrg 5260 1.1 mrg next = strchr (cur, ','); 5261 1.1 mrg 5262 1.1.1.6 mrg prefix_len = strlen (prefix); 5263 1.1.1.6 mrg cur_len = next ? next - cur : strlen (cur); 5264 1.1.1.6 mrg suffix_len = strlen (suffix); 5265 1.1.1.6 mrg 5266 1.1.1.6 mrg plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); 5267 1.1 mrg if (!plugin_name) 5268 1.1 mrg { 5269 1.1.1.10 mrg num_devs = 0; 5270 1.1 mrg break; 5271 1.1 mrg } 5272 1.1 mrg 5273 1.1.1.6 mrg memcpy (plugin_name, prefix, prefix_len); 5274 1.1.1.6 mrg memcpy (plugin_name + prefix_len, cur, cur_len); 5275 1.1.1.6 mrg memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); 5276 1.1 mrg 5277 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 5278 1.1 mrg { 5279 1.1.1.11 mrg int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED; 5280 1.1.1.11 mrg new_num_devs = current_device.get_num_devices_func (omp_req); 5281 1.1.1.11 mrg if (gomp_debug_var > 0 && new_num_devs < 0) 5282 1.1.1.11 mrg { 5283 1.1.1.11 mrg bool found = false; 5284 1.1.1.11 mrg int type = current_device.get_type_func (); 5285 1.1.1.11 mrg for (int img = 0; img < num_offload_images; img++) 5286 1.1.1.11 mrg if (type == offload_images[img].type) 5287 1.1.1.11 mrg found = true; 5288 1.1.1.11 mrg if (found) 5289 1.1.1.11 mrg { 5290 1.1.1.11 mrg char buf[sizeof ("unified_address, unified_shared_memory, " 5291 1.1.1.11 mrg "reverse_offload")]; 5292 1.1.1.11 mrg gomp_requires_to_name (buf, sizeof (buf), omp_req); 5293 1.1.1.11 mrg char *name = (char *) malloc (cur_len + 1); 5294 1.1.1.11 mrg memcpy (name, cur, cur_len); 5295 1.1.1.11 mrg name[cur_len] = '\0'; 5296 1.1.1.11 mrg gomp_debug (1, 5297 1.1.1.11 mrg "%s devices present but 'omp requires %s' " 5298 1.1.1.11 mrg "cannot be fulfilled\n", name, buf); 5299 1.1.1.11 mrg free (name); 5300 1.1.1.11 mrg } 5301 1.1.1.11 mrg } 5302 1.1.1.11 mrg else if (new_num_devs >= 1) 5303 1.1 mrg { 5304 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */ 5305 1.1 mrg 5306 1.1.1.10 mrg devs = realloc (devs, (num_devs + new_num_devs) 5307 1.1.1.10 mrg * sizeof (struct gomp_device_descr)); 5308 1.1.1.10 mrg if (!devs) 5309 1.1 mrg { 5310 1.1.1.10 mrg num_devs = 0; 5311 1.1 mrg free (plugin_name); 5312 1.1 mrg break; 5313 1.1 mrg } 5314 1.1 mrg 5315 1.1 mrg current_device.name = current_device.get_name_func (); 5316 1.1 mrg /* current_device.capabilities has already been set. */ 5317 1.1 mrg current_device.type = current_device.get_type_func (); 5318 1.1 mrg current_device.mem_map.root = NULL; 5319 1.1.1.11 mrg current_device.mem_map_rev.root = NULL; 5320 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED; 5321 1.1.1.10 mrg for (i = 0; i < new_num_devs; i++) 5322 1.1 mrg { 5323 1.1 mrg current_device.target_id = i; 5324 1.1.1.10 mrg devs[num_devs] = current_device; 5325 1.1.1.10 mrg gomp_mutex_init (&devs[num_devs].lock); 5326 1.1.1.10 mrg num_devs++; 5327 1.1 mrg } 5328 1.1 mrg } 5329 1.1 mrg } 5330 1.1 mrg 5331 1.1 mrg free (plugin_name); 5332 1.1 mrg cur = next + 1; 5333 1.1 mrg } 5334 1.1 mrg while (next); 5335 1.1 mrg 5336 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 5337 1.1 mrg NUM_DEVICES_OPENMP. */ 5338 1.1.1.10 mrg struct gomp_device_descr *devs_s 5339 1.1.1.10 mrg = malloc (num_devs * sizeof (struct gomp_device_descr)); 5340 1.1.1.10 mrg if (!devs_s) 5341 1.1.1.10 mrg { 5342 1.1.1.10 mrg num_devs = 0; 5343 1.1.1.10 mrg free (devs); 5344 1.1.1.10 mrg devs = NULL; 5345 1.1.1.10 mrg } 5346 1.1.1.10 mrg num_devs_openmp = 0; 5347 1.1.1.10 mrg for (i = 0; i < num_devs; i++) 5348 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 5349 1.1.1.10 mrg devs_s[num_devs_openmp++] = devs[i]; 5350 1.1.1.10 mrg int num_devs_after_openmp = num_devs_openmp; 5351 1.1.1.10 mrg for (i = 0; i < num_devs; i++) 5352 1.1.1.10 mrg if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 5353 1.1.1.10 mrg devs_s[num_devs_after_openmp++] = devs[i]; 5354 1.1.1.10 mrg free (devs); 5355 1.1.1.10 mrg devs = devs_s; 5356 1.1 mrg 5357 1.1.1.10 mrg for (i = 0; i < num_devs; i++) 5358 1.1 mrg { 5359 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have 5360 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which 5361 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */ 5362 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 5363 1.1.1.10 mrg goacc_register (&devs[i]); 5364 1.1 mrg } 5365 1.1.1.11 mrg if (gomp_global_icv.default_device_var == INT_MIN) 5366 1.1.1.11 mrg { 5367 1.1.1.11 mrg /* This implies OMP_TARGET_OFFLOAD=mandatory. */ 5368 1.1.1.11 mrg struct gomp_icv_list *none; 5369 1.1.1.11 mrg none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX); 5370 1.1.1.11 mrg gomp_global_icv.default_device_var = (num_devs_openmp 5371 1.1.1.11 mrg ? 0 : omp_invalid_device); 5372 1.1.1.11 mrg none->icvs.default_device_var = gomp_global_icv.default_device_var; 5373 1.1.1.11 mrg } 5374 1.1.1.2 mrg 5375 1.1.1.10 mrg num_devices = num_devs; 5376 1.1.1.10 mrg num_devices_openmp = num_devs_openmp; 5377 1.1.1.10 mrg devices = devs; 5378 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0) 5379 1.1.1.2 mrg gomp_fatal ("atexit failed"); 5380 1.1 mrg } 5381 1.1 mrg 5382 1.1 mrg #else /* PLUGIN_SUPPORT */ 5383 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution. 5384 1.1 mrg GOMP_target* routines are just stubs for this case. */ 5385 1.1 mrg static void 5386 1.1 mrg gomp_target_init (void) 5387 1.1 mrg { 5388 1.1 mrg } 5389 1.1 mrg #endif /* PLUGIN_SUPPORT */ 5390