target.c revision 1.1.1.11 1 1.1.1.11 mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "libgomp.h"
29 1.1 mrg #include "oacc-plugin.h"
30 1.1 mrg #include "oacc-int.h"
31 1.1 mrg #include "gomp-constants.h"
32 1.1 mrg #include <limits.h>
33 1.1 mrg #include <stdbool.h>
34 1.1 mrg #include <stdlib.h>
35 1.1 mrg #ifdef HAVE_INTTYPES_H
36 1.1 mrg # include <inttypes.h> /* For PRIu64. */
37 1.1 mrg #endif
38 1.1 mrg #include <string.h>
39 1.1.1.11 mrg #include <stdio.h> /* For snprintf. */
40 1.1 mrg #include <assert.h>
41 1.1.1.2 mrg #include <errno.h>
42 1.1 mrg
43 1.1 mrg #ifdef PLUGIN_SUPPORT
44 1.1 mrg #include <dlfcn.h>
45 1.1 mrg #include "plugin-suffix.h"
46 1.1 mrg #endif
47 1.1 mrg
48 1.1.1.11 mrg /* Define another splay tree instantiation - for reverse offload. */
49 1.1.1.11 mrg #define splay_tree_prefix reverse
50 1.1.1.11 mrg #define splay_tree_static
51 1.1.1.11 mrg #define splay_tree_c
52 1.1.1.11 mrg #include "splay-tree.h"
53 1.1.1.11 mrg
54 1.1.1.11 mrg
55 1.1.1.10 mrg typedef uintptr_t *hash_entry_type;
56 1.1.1.10 mrg static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
57 1.1.1.10 mrg static inline void htab_free (void *ptr) { free (ptr); }
58 1.1.1.10 mrg #include "hashtab.h"
59 1.1.1.10 mrg
60 1.1.1.11 mrg ialias_redirect (GOMP_task)
61 1.1.1.11 mrg
62 1.1.1.10 mrg static inline hashval_t
63 1.1.1.10 mrg htab_hash (hash_entry_type element)
64 1.1.1.10 mrg {
65 1.1.1.10 mrg return hash_pointer ((void *) element);
66 1.1.1.10 mrg }
67 1.1.1.10 mrg
68 1.1.1.10 mrg static inline bool
69 1.1.1.10 mrg htab_eq (hash_entry_type x, hash_entry_type y)
70 1.1.1.10 mrg {
71 1.1.1.10 mrg return x == y;
72 1.1.1.10 mrg }
73 1.1.1.10 mrg
74 1.1.1.8 mrg #define FIELD_TGT_EMPTY (~(size_t) 0)
75 1.1.1.8 mrg
76 1.1 mrg static void gomp_target_init (void);
77 1.1 mrg
78 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
79 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
80 1.1 mrg
81 1.1 mrg /* Mutex for offload image registration. */
82 1.1 mrg static gomp_mutex_t register_lock;
83 1.1 mrg
84 1.1 mrg /* This structure describes an offload image.
85 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
86 1.1 mrg pointer to target data. */
87 1.1 mrg struct offload_image_descr {
88 1.1.1.2 mrg unsigned version;
89 1.1 mrg enum offload_target_type type;
90 1.1.1.2 mrg const void *host_table;
91 1.1.1.2 mrg const void *target_data;
92 1.1 mrg };
93 1.1 mrg
94 1.1 mrg /* Array of descriptors of offload images. */
95 1.1 mrg static struct offload_image_descr *offload_images;
96 1.1 mrg
97 1.1 mrg /* Total number of offload images. */
98 1.1 mrg static int num_offload_images;
99 1.1 mrg
100 1.1 mrg /* Array of descriptors for all available devices. */
101 1.1 mrg static struct gomp_device_descr *devices;
102 1.1 mrg
103 1.1 mrg /* Total number of available devices. */
104 1.1 mrg static int num_devices;
105 1.1 mrg
106 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 1.1 mrg static int num_devices_openmp;
108 1.1 mrg
109 1.1.1.11 mrg /* OpenMP requires mask. */
110 1.1.1.11 mrg static int omp_requires_mask;
111 1.1.1.11 mrg
112 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113 1.1 mrg
114 1.1 mrg static void *
115 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
116 1.1 mrg {
117 1.1 mrg void *ret = realloc (old, size);
118 1.1 mrg if (ret == NULL)
119 1.1 mrg {
120 1.1 mrg gomp_mutex_unlock (®ister_lock);
121 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 1.1 mrg }
123 1.1 mrg return ret;
124 1.1 mrg }
125 1.1 mrg
126 1.1 mrg attribute_hidden void
127 1.1 mrg gomp_init_targets_once (void)
128 1.1 mrg {
129 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
130 1.1 mrg }
131 1.1 mrg
132 1.1 mrg attribute_hidden int
133 1.1 mrg gomp_get_num_devices (void)
134 1.1 mrg {
135 1.1 mrg gomp_init_targets_once ();
136 1.1 mrg return num_devices_openmp;
137 1.1 mrg }
138 1.1 mrg
139 1.1 mrg static struct gomp_device_descr *
140 1.1.1.11 mrg resolve_device (int device_id, bool remapped)
141 1.1 mrg {
142 1.1.1.11 mrg /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 1.1.1.11 mrg called, which must be done before using default_device_var. */
144 1.1.1.11 mrg int num_devices = gomp_get_num_devices ();
145 1.1.1.11 mrg
146 1.1.1.11 mrg if (remapped && device_id == GOMP_DEVICE_ICV)
147 1.1 mrg {
148 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
149 1.1 mrg device_id = icv->default_device_var;
150 1.1.1.11 mrg remapped = false;
151 1.1 mrg }
152 1.1 mrg
153 1.1.1.11 mrg if (device_id < 0)
154 1.1.1.11 mrg {
155 1.1.1.11 mrg if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
156 1.1.1.11 mrg : omp_initial_device))
157 1.1.1.11 mrg return NULL;
158 1.1.1.11 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
159 1.1.1.11 mrg && num_devices == 0)
160 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 1.1.1.11 mrg "but only the host device is available");
162 1.1.1.11 mrg else if (device_id == omp_invalid_device)
163 1.1.1.11 mrg gomp_fatal ("omp_invalid_device encountered");
164 1.1.1.11 mrg else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
165 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 1.1.1.11 mrg "but device not found");
167 1.1.1.11 mrg
168 1.1.1.11 mrg return NULL;
169 1.1.1.11 mrg }
170 1.1.1.11 mrg else if (device_id >= num_devices)
171 1.1.1.10 mrg {
172 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
173 1.1.1.11 mrg && device_id != num_devices)
174 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 1.1.1.10 mrg "but device not found");
176 1.1.1.10 mrg
177 1.1.1.10 mrg return NULL;
178 1.1.1.10 mrg }
179 1.1 mrg
180 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock);
181 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
182 1.1.1.2 mrg gomp_init_device (&devices[device_id]);
183 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
184 1.1.1.2 mrg {
185 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
186 1.1.1.10 mrg
187 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
188 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 1.1.1.10 mrg "but device is finalized");
190 1.1.1.10 mrg
191 1.1.1.2 mrg return NULL;
192 1.1.1.2 mrg }
193 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
194 1.1.1.2 mrg
195 1.1 mrg return &devices[device_id];
196 1.1 mrg }
197 1.1 mrg
198 1.1 mrg
199 1.1.1.2 mrg static inline splay_tree_key
200 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
201 1.1.1.2 mrg {
202 1.1.1.2 mrg if (key->host_start != key->host_end)
203 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
204 1.1.1.2 mrg
205 1.1.1.2 mrg key->host_end++;
206 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
207 1.1.1.2 mrg key->host_end--;
208 1.1.1.2 mrg if (n)
209 1.1.1.2 mrg return n;
210 1.1.1.2 mrg key->host_start--;
211 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key);
212 1.1.1.2 mrg key->host_start++;
213 1.1.1.2 mrg if (n)
214 1.1.1.2 mrg return n;
215 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
216 1.1.1.2 mrg }
217 1.1.1.2 mrg
218 1.1.1.11 mrg static inline reverse_splay_tree_key
219 1.1.1.11 mrg gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
220 1.1.1.11 mrg {
221 1.1.1.11 mrg return reverse_splay_tree_lookup (mem_map_rev, key);
222 1.1.1.11 mrg }
223 1.1.1.11 mrg
224 1.1.1.2 mrg static inline splay_tree_key
225 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
226 1.1.1.2 mrg {
227 1.1.1.2 mrg if (key->host_start != key->host_end)
228 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
229 1.1.1.2 mrg
230 1.1.1.2 mrg key->host_end++;
231 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
232 1.1.1.2 mrg key->host_end--;
233 1.1.1.2 mrg return n;
234 1.1.1.2 mrg }
235 1.1.1.2 mrg
236 1.1.1.3 mrg static inline void
237 1.1.1.3 mrg gomp_device_copy (struct gomp_device_descr *devicep,
238 1.1.1.3 mrg bool (*copy_func) (int, void *, const void *, size_t),
239 1.1.1.3 mrg const char *dst, void *dstaddr,
240 1.1.1.3 mrg const char *src, const void *srcaddr,
241 1.1.1.3 mrg size_t size)
242 1.1.1.3 mrg {
243 1.1.1.3 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
244 1.1.1.3 mrg {
245 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
246 1.1.1.3 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 1.1.1.3 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
248 1.1.1.3 mrg }
249 1.1.1.3 mrg }
250 1.1.1.3 mrg
251 1.1.1.8 mrg static inline void
252 1.1.1.8 mrg goacc_device_copy_async (struct gomp_device_descr *devicep,
253 1.1.1.8 mrg bool (*copy_func) (int, void *, const void *, size_t,
254 1.1.1.8 mrg struct goacc_asyncqueue *),
255 1.1.1.8 mrg const char *dst, void *dstaddr,
256 1.1.1.8 mrg const char *src, const void *srcaddr,
257 1.1.1.10 mrg const void *srcaddr_orig,
258 1.1.1.8 mrg size_t size, struct goacc_asyncqueue *aq)
259 1.1.1.8 mrg {
260 1.1.1.8 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
261 1.1.1.8 mrg {
262 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
263 1.1.1.10 mrg if (srcaddr_orig && srcaddr_orig != srcaddr)
264 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)"
265 1.1.1.10 mrg " via buffer %s object [%p..%p)"
266 1.1.1.10 mrg " to %s object [%p..%p) failed",
267 1.1.1.10 mrg src, srcaddr_orig, srcaddr_orig + size,
268 1.1.1.10 mrg src, srcaddr, srcaddr + size,
269 1.1.1.10 mrg dst, dstaddr, dstaddr + size);
270 1.1.1.10 mrg else
271 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)"
272 1.1.1.10 mrg " to %s object [%p..%p) failed",
273 1.1.1.10 mrg src, srcaddr, srcaddr + size,
274 1.1.1.10 mrg dst, dstaddr, dstaddr + size);
275 1.1.1.8 mrg }
276 1.1.1.8 mrg }
277 1.1.1.8 mrg
278 1.1.1.6 mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 1.1.1.6 mrg host to device memory transfers. */
280 1.1.1.6 mrg
281 1.1.1.7 mrg struct gomp_coalesce_chunk
282 1.1.1.7 mrg {
283 1.1.1.7 mrg /* The starting and ending point of a coalesced chunk of memory. */
284 1.1.1.7 mrg size_t start, end;
285 1.1.1.7 mrg };
286 1.1.1.7 mrg
287 1.1.1.6 mrg struct gomp_coalesce_buf
288 1.1.1.6 mrg {
289 1.1.1.6 mrg /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 1.1.1.6 mrg it will be copied to the device. */
291 1.1.1.6 mrg void *buf;
292 1.1.1.6 mrg struct target_mem_desc *tgt;
293 1.1.1.7 mrg /* Array with offsets, chunks[i].start is the starting offset and
294 1.1.1.7 mrg chunks[i].end ending offset relative to tgt->tgt_start device address
295 1.1.1.6 mrg of chunks which are to be copied to buf and later copied to device. */
296 1.1.1.7 mrg struct gomp_coalesce_chunk *chunks;
297 1.1.1.6 mrg /* Number of chunks in chunks array, or -1 if coalesce buffering should not
298 1.1.1.6 mrg be performed. */
299 1.1.1.6 mrg long chunk_cnt;
300 1.1.1.6 mrg /* During construction of chunks array, how many memory regions are within
301 1.1.1.6 mrg the last chunk. If there is just one memory region for a chunk, we copy
302 1.1.1.6 mrg it directly to device rather than going through buf. */
303 1.1.1.6 mrg long use_cnt;
304 1.1.1.6 mrg };
305 1.1.1.6 mrg
306 1.1.1.6 mrg /* Maximum size of memory region considered for coalescing. Larger copies
307 1.1.1.6 mrg are performed directly. */
308 1.1.1.6 mrg #define MAX_COALESCE_BUF_SIZE (32 * 1024)
309 1.1.1.6 mrg
310 1.1.1.6 mrg /* Maximum size of a gap in between regions to consider them being copied
311 1.1.1.6 mrg within the same chunk. All the device offsets considered are within
312 1.1.1.6 mrg newly allocated device memory, so it isn't fatal if we copy some padding
313 1.1.1.6 mrg in between from host to device. The gaps come either from alignment
314 1.1.1.6 mrg padding or from memory regions which are not supposed to be copied from
315 1.1.1.6 mrg host to device (e.g. map(alloc:), map(from:) etc.). */
316 1.1.1.6 mrg #define MAX_COALESCE_BUF_GAP (4 * 1024)
317 1.1.1.6 mrg
318 1.1.1.10 mrg /* Add region with device tgt_start relative offset and length to CBUF.
319 1.1.1.10 mrg
320 1.1.1.10 mrg This must not be used for asynchronous copies, because the host data might
321 1.1.1.10 mrg not be computed yet (by an earlier asynchronous compute region, for
322 1.1.1.11 mrg example). The exception is for EPHEMERAL data, that we know is available
323 1.1.1.11 mrg already "by construction". */
324 1.1.1.6 mrg
325 1.1.1.6 mrg static inline void
326 1.1.1.6 mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
327 1.1.1.6 mrg {
328 1.1.1.6 mrg if (len > MAX_COALESCE_BUF_SIZE || len == 0)
329 1.1.1.6 mrg return;
330 1.1.1.6 mrg if (cbuf->chunk_cnt)
331 1.1.1.6 mrg {
332 1.1.1.6 mrg if (cbuf->chunk_cnt < 0)
333 1.1.1.6 mrg return;
334 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
335 1.1.1.6 mrg {
336 1.1.1.6 mrg cbuf->chunk_cnt = -1;
337 1.1.1.6 mrg return;
338 1.1.1.6 mrg }
339 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
340 1.1.1.6 mrg {
341 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
342 1.1.1.6 mrg cbuf->use_cnt++;
343 1.1.1.6 mrg return;
344 1.1.1.6 mrg }
345 1.1.1.6 mrg /* If the last chunk is only used by one mapping, discard it,
346 1.1.1.6 mrg as it will be one host to device copy anyway and
347 1.1.1.6 mrg memcpying it around will only waste cycles. */
348 1.1.1.6 mrg if (cbuf->use_cnt == 1)
349 1.1.1.6 mrg cbuf->chunk_cnt--;
350 1.1.1.6 mrg }
351 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].start = start;
352 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].end = start + len;
353 1.1.1.6 mrg cbuf->chunk_cnt++;
354 1.1.1.6 mrg cbuf->use_cnt = 1;
355 1.1.1.6 mrg }
356 1.1.1.6 mrg
357 1.1.1.6 mrg /* Return true for mapping kinds which need to copy data from the
358 1.1.1.6 mrg host to device for regions that weren't previously mapped. */
359 1.1.1.6 mrg
360 1.1.1.6 mrg static inline bool
361 1.1.1.6 mrg gomp_to_device_kind_p (int kind)
362 1.1.1.6 mrg {
363 1.1.1.6 mrg switch (kind)
364 1.1.1.6 mrg {
365 1.1.1.6 mrg case GOMP_MAP_ALLOC:
366 1.1.1.6 mrg case GOMP_MAP_FROM:
367 1.1.1.6 mrg case GOMP_MAP_FORCE_ALLOC:
368 1.1.1.8 mrg case GOMP_MAP_FORCE_FROM:
369 1.1.1.6 mrg case GOMP_MAP_ALWAYS_FROM:
370 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM:
371 1.1.1.11 mrg case GOMP_MAP_FORCE_PRESENT:
372 1.1.1.6 mrg return false;
373 1.1.1.6 mrg default:
374 1.1.1.6 mrg return true;
375 1.1.1.6 mrg }
376 1.1.1.6 mrg }
377 1.1.1.6 mrg
378 1.1.1.10 mrg /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 1.1.1.10 mrg non-NULL), when the source data is stack or may otherwise be deallocated
380 1.1.1.10 mrg before the asynchronous copy takes place, EPHEMERAL must be passed as
381 1.1.1.10 mrg TRUE. */
382 1.1.1.10 mrg
383 1.1.1.8 mrg attribute_hidden void
384 1.1.1.3 mrg gomp_copy_host2dev (struct gomp_device_descr *devicep,
385 1.1.1.8 mrg struct goacc_asyncqueue *aq,
386 1.1.1.6 mrg void *d, const void *h, size_t sz,
387 1.1.1.10 mrg bool ephemeral, struct gomp_coalesce_buf *cbuf)
388 1.1.1.3 mrg {
389 1.1.1.6 mrg if (cbuf)
390 1.1.1.6 mrg {
391 1.1.1.6 mrg uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
392 1.1.1.7 mrg if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
393 1.1.1.6 mrg {
394 1.1.1.6 mrg long first = 0;
395 1.1.1.6 mrg long last = cbuf->chunk_cnt - 1;
396 1.1.1.6 mrg while (first <= last)
397 1.1.1.6 mrg {
398 1.1.1.6 mrg long middle = (first + last) >> 1;
399 1.1.1.7 mrg if (cbuf->chunks[middle].end <= doff)
400 1.1.1.6 mrg first = middle + 1;
401 1.1.1.7 mrg else if (cbuf->chunks[middle].start <= doff)
402 1.1.1.6 mrg {
403 1.1.1.7 mrg if (doff + sz > cbuf->chunks[middle].end)
404 1.1.1.10 mrg {
405 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
406 1.1.1.10 mrg gomp_fatal ("internal libgomp cbuf error");
407 1.1.1.10 mrg }
408 1.1.1.11 mrg
409 1.1.1.11 mrg /* In an asynchronous context, verify that CBUF isn't used
410 1.1.1.11 mrg with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 1.1.1.11 mrg if (__builtin_expect (aq != NULL, 0))
412 1.1.1.11 mrg assert (ephemeral);
413 1.1.1.11 mrg
414 1.1.1.7 mrg memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
415 1.1.1.6 mrg h, sz);
416 1.1.1.6 mrg return;
417 1.1.1.6 mrg }
418 1.1.1.6 mrg else
419 1.1.1.6 mrg last = middle - 1;
420 1.1.1.6 mrg }
421 1.1.1.6 mrg }
422 1.1.1.6 mrg }
423 1.1.1.10 mrg
424 1.1.1.11 mrg if (__builtin_expect (aq != NULL, 0))
425 1.1.1.11 mrg {
426 1.1.1.11 mrg void *h_buf = (void *) h;
427 1.1.1.11 mrg if (ephemeral)
428 1.1.1.11 mrg {
429 1.1.1.11 mrg /* We're queueing up an asynchronous copy from data that may
430 1.1.1.11 mrg disappear before the transfer takes place (i.e. because it is a
431 1.1.1.11 mrg stack local in a function that is no longer executing). As we've
432 1.1.1.11 mrg not been able to use CBUF, make a copy of the data into a
433 1.1.1.11 mrg temporary buffer. */
434 1.1.1.11 mrg h_buf = gomp_malloc (sz);
435 1.1.1.11 mrg memcpy (h_buf, h, sz);
436 1.1.1.11 mrg }
437 1.1.1.11 mrg goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
438 1.1.1.11 mrg "dev", d, "host", h_buf, h, sz, aq);
439 1.1.1.11 mrg if (ephemeral)
440 1.1.1.11 mrg /* Free once the transfer has completed. */
441 1.1.1.11 mrg devicep->openacc.async.queue_callback_func (aq, free, h_buf);
442 1.1.1.11 mrg }
443 1.1.1.11 mrg else
444 1.1.1.11 mrg gomp_device_copy (devicep, devicep->host2dev_func,
445 1.1.1.11 mrg "dev", d, "host", h, sz);
446 1.1.1.3 mrg }
447 1.1.1.3 mrg
448 1.1.1.8 mrg attribute_hidden void
449 1.1.1.3 mrg gomp_copy_dev2host (struct gomp_device_descr *devicep,
450 1.1.1.8 mrg struct goacc_asyncqueue *aq,
451 1.1.1.3 mrg void *h, const void *d, size_t sz)
452 1.1.1.3 mrg {
453 1.1.1.8 mrg if (__builtin_expect (aq != NULL, 0))
454 1.1.1.8 mrg goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
455 1.1.1.10 mrg "host", h, "dev", d, NULL, sz, aq);
456 1.1.1.8 mrg else
457 1.1.1.8 mrg gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
458 1.1.1.3 mrg }
459 1.1.1.3 mrg
460 1.1.1.3 mrg static void
461 1.1.1.3 mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
462 1.1.1.3 mrg {
463 1.1.1.3 mrg if (!devicep->free_func (devicep->target_id, devptr))
464 1.1.1.3 mrg {
465 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
466 1.1.1.3 mrg gomp_fatal ("error in freeing device memory block at %p", devptr);
467 1.1.1.3 mrg }
468 1.1.1.3 mrg }
469 1.1.1.3 mrg
470 1.1.1.10 mrg /* Increment reference count of a splay_tree_key region K by 1.
471 1.1.1.10 mrg If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 1.1.1.10 mrg increment the value if refcount is not yet contained in the set (used for
473 1.1.1.10 mrg OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 1.1.1.10 mrg once for each construct). */
475 1.1.1.10 mrg
476 1.1.1.10 mrg static inline void
477 1.1.1.10 mrg gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
478 1.1.1.10 mrg {
479 1.1.1.11 mrg if (k == NULL
480 1.1.1.11 mrg || k->refcount == REFCOUNT_INFINITY
481 1.1.1.11 mrg || k->refcount == REFCOUNT_ACC_MAP_DATA)
482 1.1.1.10 mrg return;
483 1.1.1.10 mrg
484 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount;
485 1.1.1.10 mrg
486 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
487 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount;
488 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount))
489 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr;
490 1.1.1.10 mrg
491 1.1.1.10 mrg if (refcount_set)
492 1.1.1.10 mrg {
493 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr))
494 1.1.1.10 mrg return;
495 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
496 1.1.1.10 mrg *slot = refcount_ptr;
497 1.1.1.10 mrg }
498 1.1.1.10 mrg
499 1.1.1.10 mrg *refcount_ptr += 1;
500 1.1.1.10 mrg return;
501 1.1.1.10 mrg }
502 1.1.1.10 mrg
503 1.1.1.10 mrg /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
504 1.1.1.10 mrg is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
505 1.1.1.10 mrg track already seen refcounts, and only adjust the value if refcount is not
506 1.1.1.10 mrg yet contained in the set (like gomp_increment_refcount).
507 1.1.1.10 mrg
508 1.1.1.10 mrg Return out-values: set *DO_COPY to true if we set the refcount to zero, or
509 1.1.1.10 mrg it is already zero and we know we decremented it earlier. This signals that
510 1.1.1.10 mrg associated maps should be copied back to host.
511 1.1.1.10 mrg
512 1.1.1.10 mrg *DO_REMOVE is set to true when we this is the first handling of this refcount
513 1.1.1.10 mrg and we are setting it to zero. This signals a removal of this key from the
514 1.1.1.10 mrg splay-tree map.
515 1.1.1.10 mrg
516 1.1.1.10 mrg Copy and removal are separated due to cases like handling of structure
517 1.1.1.10 mrg elements, e.g. each map of a structure element representing a possible copy
518 1.1.1.10 mrg out of a structure field has to be handled individually, but we only signal
519 1.1.1.10 mrg removal for one (the first encountered) sibing map. */
520 1.1.1.10 mrg
521 1.1.1.10 mrg static inline void
522 1.1.1.10 mrg gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
523 1.1.1.10 mrg bool *do_copy, bool *do_remove)
524 1.1.1.10 mrg {
525 1.1.1.11 mrg if (k == NULL
526 1.1.1.11 mrg || k->refcount == REFCOUNT_INFINITY
527 1.1.1.11 mrg || k->refcount == REFCOUNT_ACC_MAP_DATA)
528 1.1.1.10 mrg {
529 1.1.1.10 mrg *do_copy = *do_remove = false;
530 1.1.1.10 mrg return;
531 1.1.1.10 mrg }
532 1.1.1.10 mrg
533 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount;
534 1.1.1.10 mrg
535 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
536 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount;
537 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount))
538 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr;
539 1.1.1.10 mrg
540 1.1.1.10 mrg bool new_encountered_refcount;
541 1.1.1.10 mrg bool set_to_zero = false;
542 1.1.1.10 mrg bool is_zero = false;
543 1.1.1.10 mrg
544 1.1.1.10 mrg uintptr_t orig_refcount = *refcount_ptr;
545 1.1.1.10 mrg
546 1.1.1.10 mrg if (refcount_set)
547 1.1.1.10 mrg {
548 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr))
549 1.1.1.10 mrg {
550 1.1.1.10 mrg new_encountered_refcount = false;
551 1.1.1.10 mrg goto end;
552 1.1.1.10 mrg }
553 1.1.1.10 mrg
554 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
555 1.1.1.10 mrg *slot = refcount_ptr;
556 1.1.1.10 mrg new_encountered_refcount = true;
557 1.1.1.10 mrg }
558 1.1.1.10 mrg else
559 1.1.1.10 mrg /* If no refcount_set being used, assume all keys are being decremented
560 1.1.1.10 mrg for the first time. */
561 1.1.1.10 mrg new_encountered_refcount = true;
562 1.1.1.10 mrg
563 1.1.1.10 mrg if (delete_p)
564 1.1.1.10 mrg *refcount_ptr = 0;
565 1.1.1.10 mrg else if (*refcount_ptr > 0)
566 1.1.1.10 mrg *refcount_ptr -= 1;
567 1.1.1.10 mrg
568 1.1.1.10 mrg end:
569 1.1.1.10 mrg if (*refcount_ptr == 0)
570 1.1.1.10 mrg {
571 1.1.1.10 mrg if (orig_refcount > 0)
572 1.1.1.10 mrg set_to_zero = true;
573 1.1.1.10 mrg
574 1.1.1.10 mrg is_zero = true;
575 1.1.1.10 mrg }
576 1.1.1.10 mrg
577 1.1.1.10 mrg *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
578 1.1.1.10 mrg *do_remove = (new_encountered_refcount && set_to_zero);
579 1.1.1.10 mrg }
580 1.1.1.10 mrg
581 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
582 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn.
583 1.1 mrg Helper function of gomp_map_vars. */
584 1.1 mrg
585 1.1 mrg static inline void
586 1.1.1.8 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep,
587 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key oldn,
588 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var,
589 1.1.1.10 mrg unsigned char kind, bool always_to_flag, bool implicit,
590 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf,
591 1.1.1.10 mrg htab_t *refcount_set)
592 1.1 mrg {
593 1.1.1.10 mrg assert (kind != GOMP_MAP_ATTACH
594 1.1.1.10 mrg || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
595 1.1.1.8 mrg
596 1.1.1.2 mrg tgt_var->key = oldn;
597 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
598 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
599 1.1.1.8 mrg tgt_var->is_attach = false;
600 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start;
601 1.1.1.10 mrg
602 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */
603 1.1.1.10 mrg bool implicit_subset = (implicit
604 1.1.1.10 mrg && newn->host_start <= oldn->host_start
605 1.1.1.10 mrg && oldn->host_end <= newn->host_end);
606 1.1.1.10 mrg if (implicit_subset)
607 1.1.1.10 mrg tgt_var->length = oldn->host_end - oldn->host_start;
608 1.1.1.10 mrg else
609 1.1.1.10 mrg tgt_var->length = newn->host_end - newn->host_start;
610 1.1.1.2 mrg
611 1.1.1.11 mrg if (GOMP_MAP_FORCE_P (kind)
612 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */
613 1.1.1.10 mrg || !(implicit_subset
614 1.1.1.10 mrg /* Otherwise, new contained inside old is considered valid. */
615 1.1.1.10 mrg || (oldn->host_start <= newn->host_start
616 1.1.1.10 mrg && newn->host_end <= oldn->host_end)))
617 1.1 mrg {
618 1.1 mrg gomp_mutex_unlock (&devicep->lock);
619 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
620 1.1 mrg "[%p..%p) is already mapped",
621 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
622 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
623 1.1 mrg }
624 1.1.1.2 mrg
625 1.1.1.10 mrg if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
626 1.1.1.10 mrg {
627 1.1.1.10 mrg /* Implicit + always should not happen. If this does occur, below
628 1.1.1.10 mrg address/length adjustment is a TODO. */
629 1.1.1.10 mrg assert (!implicit_subset);
630 1.1.1.10 mrg
631 1.1.1.10 mrg if (oldn->aux && oldn->aux->attach_count)
632 1.1.1.10 mrg {
633 1.1.1.10 mrg /* We have to be careful not to overwrite still attached pointers
634 1.1.1.10 mrg during the copyback to host. */
635 1.1.1.10 mrg uintptr_t addr = newn->host_start;
636 1.1.1.10 mrg while (addr < newn->host_end)
637 1.1.1.10 mrg {
638 1.1.1.10 mrg size_t i = (addr - oldn->host_start) / sizeof (void *);
639 1.1.1.10 mrg if (oldn->aux->attach_count[i] == 0)
640 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq,
641 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start
642 1.1.1.10 mrg + oldn->tgt_offset
643 1.1.1.10 mrg + addr - oldn->host_start),
644 1.1.1.10 mrg (void *) addr,
645 1.1.1.10 mrg sizeof (void *), false, cbuf);
646 1.1.1.10 mrg addr += sizeof (void *);
647 1.1.1.10 mrg }
648 1.1.1.10 mrg }
649 1.1.1.10 mrg else
650 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq,
651 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
652 1.1.1.10 mrg + newn->host_start - oldn->host_start),
653 1.1.1.10 mrg (void *) newn->host_start,
654 1.1.1.10 mrg newn->host_end - newn->host_start, false, cbuf);
655 1.1.1.10 mrg }
656 1.1.1.3 mrg
657 1.1.1.10 mrg gomp_increment_refcount (oldn, refcount_set);
658 1.1 mrg }
659 1.1 mrg
660 1.1 mrg static int
661 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx)
662 1.1.1.2 mrg {
663 1.1.1.10 mrg if (!short_mapkind)
664 1.1.1.10 mrg return ((unsigned char *) kinds)[idx];
665 1.1.1.10 mrg
666 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx];
667 1.1.1.10 mrg if (GOMP_MAP_IMPLICIT_P (val))
668 1.1.1.10 mrg val &= ~GOMP_MAP_IMPLICIT;
669 1.1.1.10 mrg return val;
670 1.1.1.10 mrg }
671 1.1.1.10 mrg
672 1.1.1.10 mrg
673 1.1.1.10 mrg static bool
674 1.1.1.10 mrg get_implicit (bool short_mapkind, void *kinds, int idx)
675 1.1.1.10 mrg {
676 1.1.1.10 mrg if (!short_mapkind)
677 1.1.1.10 mrg return false;
678 1.1.1.10 mrg
679 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx];
680 1.1.1.10 mrg return GOMP_MAP_IMPLICIT_P (val);
681 1.1.1.2 mrg }
682 1.1.1.2 mrg
683 1.1.1.2 mrg static void
684 1.1.1.8 mrg gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
685 1.1.1.8 mrg uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
686 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf,
687 1.1.1.10 mrg bool allow_zero_length_array_sections)
688 1.1.1.2 mrg {
689 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
690 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
691 1.1.1.2 mrg struct splay_tree_key_s cur_node;
692 1.1.1.2 mrg
693 1.1.1.2 mrg cur_node.host_start = host_ptr;
694 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL)
695 1.1.1.2 mrg {
696 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL;
697 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
698 1.1.1.3 mrg (void *) (tgt->tgt_start + target_offset),
699 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
700 1.1.1.10 mrg true, cbuf);
701 1.1.1.2 mrg return;
702 1.1.1.2 mrg }
703 1.1.1.2 mrg /* Add bias to the pointer value. */
704 1.1.1.2 mrg cur_node.host_start += bias;
705 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
706 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
707 1.1.1.2 mrg if (n == NULL)
708 1.1.1.2 mrg {
709 1.1.1.10 mrg if (allow_zero_length_array_sections)
710 1.1.1.11 mrg cur_node.tgt_offset = cur_node.host_start;
711 1.1.1.10 mrg else
712 1.1.1.10 mrg {
713 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
714 1.1.1.10 mrg gomp_fatal ("Pointer target of array section wasn't mapped");
715 1.1.1.10 mrg }
716 1.1.1.10 mrg }
717 1.1.1.10 mrg else
718 1.1.1.10 mrg {
719 1.1.1.10 mrg cur_node.host_start -= n->host_start;
720 1.1.1.10 mrg cur_node.tgt_offset
721 1.1.1.10 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
722 1.1.1.10 mrg /* At this point tgt_offset is target address of the
723 1.1.1.10 mrg array section. Now subtract bias to get what we want
724 1.1.1.10 mrg to initialize the pointer with. */
725 1.1.1.10 mrg cur_node.tgt_offset -= bias;
726 1.1.1.2 mrg }
727 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
728 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
729 1.1.1.10 mrg true, cbuf);
730 1.1.1.2 mrg }
731 1.1.1.2 mrg
732 1.1.1.2 mrg static void
733 1.1.1.8 mrg gomp_map_fields_existing (struct target_mem_desc *tgt,
734 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
735 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs,
736 1.1.1.6 mrg size_t *sizes, void *kinds,
737 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
738 1.1 mrg {
739 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
740 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
741 1.1.1.2 mrg struct splay_tree_key_s cur_node;
742 1.1.1.2 mrg int kind;
743 1.1.1.10 mrg bool implicit;
744 1.1.1.2 mrg const bool short_mapkind = true;
745 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
746 1.1.1.2 mrg
747 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
748 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
749 1.1.1.11 mrg splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
750 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i);
751 1.1.1.10 mrg implicit = get_implicit (short_mapkind, kinds, i);
752 1.1.1.2 mrg if (n2
753 1.1.1.2 mrg && n2->tgt == n->tgt
754 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
755 1.1.1.2 mrg {
756 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
757 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
758 1.1.1.10 mrg refcount_set);
759 1.1.1.2 mrg return;
760 1.1.1.2 mrg }
761 1.1.1.2 mrg if (sizes[i] == 0)
762 1.1.1.2 mrg {
763 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
764 1.1.1.2 mrg {
765 1.1.1.2 mrg cur_node.host_start--;
766 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
767 1.1.1.2 mrg cur_node.host_start++;
768 1.1.1.2 mrg if (n2
769 1.1.1.2 mrg && n2->tgt == n->tgt
770 1.1.1.2 mrg && n2->host_start - n->host_start
771 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset)
772 1.1.1.2 mrg {
773 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
774 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
775 1.1.1.10 mrg refcount_set);
776 1.1.1.2 mrg return;
777 1.1.1.2 mrg }
778 1.1.1.2 mrg }
779 1.1.1.2 mrg cur_node.host_end++;
780 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
781 1.1.1.2 mrg cur_node.host_end--;
782 1.1.1.2 mrg if (n2
783 1.1.1.2 mrg && n2->tgt == n->tgt
784 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
785 1.1.1.2 mrg {
786 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
787 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
788 1.1.1.10 mrg refcount_set);
789 1.1.1.2 mrg return;
790 1.1.1.2 mrg }
791 1.1.1.2 mrg }
792 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
793 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when "
794 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped "
795 1.1.1.2 mrg "together with it", (void *) cur_node.host_start,
796 1.1.1.2 mrg (void *) cur_node.host_end);
797 1.1.1.2 mrg }
798 1.1.1.2 mrg
799 1.1.1.8 mrg attribute_hidden void
800 1.1.1.8 mrg gomp_attach_pointer (struct gomp_device_descr *devicep,
801 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree mem_map,
802 1.1.1.8 mrg splay_tree_key n, uintptr_t attach_to, size_t bias,
803 1.1.1.10 mrg struct gomp_coalesce_buf *cbufp,
804 1.1.1.10 mrg bool allow_zero_length_array_sections)
805 1.1.1.8 mrg {
806 1.1.1.8 mrg struct splay_tree_key_s s;
807 1.1.1.8 mrg size_t size, idx;
808 1.1.1.8 mrg
809 1.1.1.8 mrg if (n == NULL)
810 1.1.1.8 mrg {
811 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
812 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for attach");
813 1.1.1.8 mrg }
814 1.1.1.8 mrg
815 1.1.1.8 mrg size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
816 1.1.1.8 mrg /* We might have a pointer in a packed struct: however we cannot have more
817 1.1.1.8 mrg than one such pointer in each pointer-sized portion of the struct, so
818 1.1.1.8 mrg this is safe. */
819 1.1.1.8 mrg idx = (attach_to - n->host_start) / sizeof (void *);
820 1.1.1.8 mrg
821 1.1.1.8 mrg if (!n->aux)
822 1.1.1.8 mrg n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
823 1.1.1.8 mrg
824 1.1.1.8 mrg if (!n->aux->attach_count)
825 1.1.1.8 mrg n->aux->attach_count
826 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
827 1.1.1.8 mrg
828 1.1.1.8 mrg if (n->aux->attach_count[idx] < UINTPTR_MAX)
829 1.1.1.8 mrg n->aux->attach_count[idx]++;
830 1.1.1.8 mrg else
831 1.1.1.8 mrg {
832 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
833 1.1.1.8 mrg gomp_fatal ("attach count overflow");
834 1.1.1.8 mrg }
835 1.1.1.8 mrg
836 1.1.1.8 mrg if (n->aux->attach_count[idx] == 1)
837 1.1.1.8 mrg {
838 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
839 1.1.1.8 mrg - n->host_start;
840 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) attach_to;
841 1.1.1.8 mrg splay_tree_key tn;
842 1.1.1.8 mrg uintptr_t data;
843 1.1.1.8 mrg
844 1.1.1.8 mrg if ((void *) target == NULL)
845 1.1.1.8 mrg {
846 1.1.1.11 mrg /* As a special case, allow attaching NULL host pointers. This
847 1.1.1.11 mrg allows e.g. unassociated Fortran pointers to be mapped
848 1.1.1.11 mrg properly. */
849 1.1.1.11 mrg data = 0;
850 1.1.1.11 mrg
851 1.1.1.11 mrg gomp_debug (1,
852 1.1.1.11 mrg "%s: attaching NULL host pointer, target %p "
853 1.1.1.11 mrg "(struct base %p)\n", __FUNCTION__, (void *) devptr,
854 1.1.1.11 mrg (void *) (n->tgt->tgt_start + n->tgt_offset));
855 1.1.1.11 mrg
856 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
857 1.1.1.11 mrg sizeof (void *), true, cbufp);
858 1.1.1.11 mrg
859 1.1.1.11 mrg return;
860 1.1.1.8 mrg }
861 1.1.1.8 mrg
862 1.1.1.8 mrg s.host_start = target + bias;
863 1.1.1.8 mrg s.host_end = s.host_start + 1;
864 1.1.1.8 mrg tn = splay_tree_lookup (mem_map, &s);
865 1.1.1.8 mrg
866 1.1.1.8 mrg if (!tn)
867 1.1.1.8 mrg {
868 1.1.1.10 mrg if (allow_zero_length_array_sections)
869 1.1.1.10 mrg /* When allowing attachment to zero-length array sections, we
870 1.1.1.11 mrg copy the host pointer when the target region is not mapped. */
871 1.1.1.11 mrg data = target;
872 1.1.1.10 mrg else
873 1.1.1.10 mrg {
874 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
875 1.1.1.10 mrg gomp_fatal ("pointer target not mapped for attach");
876 1.1.1.10 mrg }
877 1.1.1.8 mrg }
878 1.1.1.10 mrg else
879 1.1.1.10 mrg data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
880 1.1.1.8 mrg
881 1.1.1.8 mrg gomp_debug (1,
882 1.1.1.8 mrg "%s: attaching host %p, target %p (struct base %p) to %p\n",
883 1.1.1.8 mrg __FUNCTION__, (void *) attach_to, (void *) devptr,
884 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
885 1.1.1.8 mrg
886 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
887 1.1.1.10 mrg sizeof (void *), true, cbufp);
888 1.1.1.8 mrg }
889 1.1.1.8 mrg else
890 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
891 1.1.1.8 mrg (void *) attach_to, (int) n->aux->attach_count[idx]);
892 1.1.1.8 mrg }
893 1.1.1.8 mrg
894 1.1.1.8 mrg attribute_hidden void
895 1.1.1.8 mrg gomp_detach_pointer (struct gomp_device_descr *devicep,
896 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
897 1.1.1.8 mrg uintptr_t detach_from, bool finalize,
898 1.1.1.8 mrg struct gomp_coalesce_buf *cbufp)
899 1.1.1.8 mrg {
900 1.1.1.8 mrg size_t idx;
901 1.1.1.8 mrg
902 1.1.1.8 mrg if (n == NULL)
903 1.1.1.8 mrg {
904 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
905 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for detach");
906 1.1.1.8 mrg }
907 1.1.1.8 mrg
908 1.1.1.8 mrg idx = (detach_from - n->host_start) / sizeof (void *);
909 1.1.1.8 mrg
910 1.1.1.8 mrg if (!n->aux || !n->aux->attach_count)
911 1.1.1.8 mrg {
912 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
913 1.1.1.8 mrg gomp_fatal ("no attachment counters for struct");
914 1.1.1.8 mrg }
915 1.1.1.8 mrg
916 1.1.1.8 mrg if (finalize)
917 1.1.1.8 mrg n->aux->attach_count[idx] = 1;
918 1.1.1.8 mrg
919 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
920 1.1.1.8 mrg {
921 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
922 1.1.1.8 mrg gomp_fatal ("attach count underflow");
923 1.1.1.8 mrg }
924 1.1.1.8 mrg else
925 1.1.1.8 mrg n->aux->attach_count[idx]--;
926 1.1.1.8 mrg
927 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
928 1.1.1.8 mrg {
929 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
930 1.1.1.8 mrg - n->host_start;
931 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) detach_from;
932 1.1.1.8 mrg
933 1.1.1.8 mrg gomp_debug (1,
934 1.1.1.8 mrg "%s: detaching host %p, target %p (struct base %p) to %p\n",
935 1.1.1.8 mrg __FUNCTION__, (void *) detach_from, (void *) devptr,
936 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset),
937 1.1.1.8 mrg (void *) target);
938 1.1.1.8 mrg
939 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
940 1.1.1.10 mrg sizeof (void *), true, cbufp);
941 1.1.1.8 mrg }
942 1.1.1.8 mrg else
943 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
944 1.1.1.8 mrg (void *) detach_from, (int) n->aux->attach_count[idx]);
945 1.1.1.8 mrg }
946 1.1.1.8 mrg
947 1.1.1.8 mrg attribute_hidden uintptr_t
948 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
949 1.1.1.2 mrg {
950 1.1.1.2 mrg if (tgt->list[i].key != NULL)
951 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start
952 1.1.1.2 mrg + tgt->list[i].key->tgt_offset
953 1.1.1.2 mrg + tgt->list[i].offset;
954 1.1.1.8 mrg
955 1.1.1.8 mrg switch (tgt->list[i].offset)
956 1.1.1.8 mrg {
957 1.1.1.8 mrg case OFFSET_INLINED:
958 1.1.1.8 mrg return (uintptr_t) hostaddrs[i];
959 1.1.1.8 mrg
960 1.1.1.8 mrg case OFFSET_POINTER:
961 1.1.1.8 mrg return 0;
962 1.1.1.8 mrg
963 1.1.1.8 mrg case OFFSET_STRUCT:
964 1.1.1.8 mrg return tgt->list[i + 1].key->tgt->tgt_start
965 1.1.1.8 mrg + tgt->list[i + 1].key->tgt_offset
966 1.1.1.8 mrg + tgt->list[i + 1].offset
967 1.1.1.8 mrg + (uintptr_t) hostaddrs[i]
968 1.1.1.8 mrg - (uintptr_t) hostaddrs[i + 1];
969 1.1.1.8 mrg
970 1.1.1.8 mrg default:
971 1.1.1.8 mrg return tgt->tgt_start + tgt->list[i].offset;
972 1.1.1.8 mrg }
973 1.1 mrg }
974 1.1 mrg
975 1.1.1.8 mrg static inline __attribute__((always_inline)) struct target_mem_desc *
976 1.1.1.8 mrg gomp_map_vars_internal (struct gomp_device_descr *devicep,
977 1.1.1.8 mrg struct goacc_asyncqueue *aq, size_t mapnum,
978 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
979 1.1.1.8 mrg void *kinds, bool short_mapkind,
980 1.1.1.10 mrg htab_t *refcount_set,
981 1.1.1.8 mrg enum gomp_map_vars_kind pragma_kind)
982 1.1 mrg {
983 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
984 1.1.1.2 mrg bool has_firstprivate = false;
985 1.1.1.10 mrg bool has_always_ptrset = false;
986 1.1.1.10 mrg bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
987 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3;
988 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
989 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
990 1.1 mrg struct splay_tree_key_s cur_node;
991 1.1 mrg struct target_mem_desc *tgt
992 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
993 1.1 mrg tgt->list_count = mapnum;
994 1.1.1.10 mrg tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
995 1.1 mrg tgt->device_descr = devicep;
996 1.1.1.8 mrg tgt->prev = NULL;
997 1.1.1.6 mrg struct gomp_coalesce_buf cbuf, *cbufp = NULL;
998 1.1 mrg
999 1.1 mrg if (mapnum == 0)
1000 1.1.1.2 mrg {
1001 1.1.1.2 mrg tgt->tgt_start = 0;
1002 1.1.1.2 mrg tgt->tgt_end = 0;
1003 1.1.1.2 mrg return tgt;
1004 1.1.1.2 mrg }
1005 1.1 mrg
1006 1.1 mrg tgt_align = sizeof (void *);
1007 1.1 mrg tgt_size = 0;
1008 1.1.1.6 mrg cbuf.chunks = NULL;
1009 1.1.1.6 mrg cbuf.chunk_cnt = -1;
1010 1.1.1.6 mrg cbuf.use_cnt = 0;
1011 1.1.1.6 mrg cbuf.buf = NULL;
1012 1.1.1.11 mrg if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1013 1.1.1.6 mrg {
1014 1.1.1.7 mrg size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1015 1.1.1.7 mrg cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1016 1.1.1.6 mrg cbuf.chunk_cnt = 0;
1017 1.1.1.6 mrg }
1018 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET)
1019 1.1 mrg {
1020 1.1 mrg size_t align = 4 * sizeof (void *);
1021 1.1 mrg tgt_align = align;
1022 1.1 mrg tgt_size = mapnum * sizeof (void *);
1023 1.1.1.6 mrg cbuf.chunk_cnt = 1;
1024 1.1.1.6 mrg cbuf.use_cnt = 1 + (mapnum > 1);
1025 1.1.1.7 mrg cbuf.chunks[0].start = 0;
1026 1.1.1.7 mrg cbuf.chunks[0].end = tgt_size;
1027 1.1 mrg }
1028 1.1 mrg
1029 1.1 mrg gomp_mutex_lock (&devicep->lock);
1030 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1031 1.1.1.2 mrg {
1032 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1033 1.1.1.2 mrg free (tgt);
1034 1.1.1.2 mrg return NULL;
1035 1.1.1.2 mrg }
1036 1.1 mrg
1037 1.1 mrg for (i = 0; i < mapnum; i++)
1038 1.1 mrg {
1039 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
1040 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i);
1041 1.1.1.2 mrg if (hostaddrs[i] == NULL
1042 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1043 1.1.1.2 mrg {
1044 1.1.1.2 mrg tgt->list[i].key = NULL;
1045 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1046 1.1.1.2 mrg continue;
1047 1.1.1.2 mrg }
1048 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1049 1.1.1.8 mrg || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1050 1.1.1.2 mrg {
1051 1.1.1.8 mrg tgt->list[i].key = NULL;
1052 1.1.1.8 mrg if (!not_found_cnt)
1053 1.1.1.2 mrg {
1054 1.1.1.8 mrg /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1055 1.1.1.8 mrg on a separate construct prior to using use_device_{addr,ptr}.
1056 1.1.1.8 mrg In OpenMP 5.0, map directives need to be ordered by the
1057 1.1.1.8 mrg middle-end before the use_device_* clauses. If
1058 1.1.1.8 mrg !not_found_cnt, all mappings requested (if any) are already
1059 1.1.1.8 mrg mapped, so use_device_{addr,ptr} can be resolved right away.
1060 1.1.1.8 mrg Otherwise, if not_found_cnt, gomp_map_lookup might fail
1061 1.1.1.8 mrg now but would succeed after performing the mappings in the
1062 1.1.1.8 mrg following loop. We can't defer this always to the second
1063 1.1.1.8 mrg loop, because it is not even invoked when !not_found_cnt
1064 1.1.1.8 mrg after the first loop. */
1065 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1066 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
1067 1.1.1.8 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1068 1.1.1.8 mrg if (n != NULL)
1069 1.1.1.8 mrg {
1070 1.1.1.8 mrg cur_node.host_start -= n->host_start;
1071 1.1.1.8 mrg hostaddrs[i]
1072 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
1073 1.1.1.8 mrg + cur_node.host_start);
1074 1.1.1.8 mrg }
1075 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1076 1.1.1.8 mrg {
1077 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1078 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
1079 1.1.1.8 mrg }
1080 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1081 1.1.1.8 mrg /* If not present, continue using the host address. */
1082 1.1.1.8 mrg ;
1083 1.1.1.8 mrg else
1084 1.1.1.8 mrg __builtin_unreachable ();
1085 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1086 1.1.1.2 mrg }
1087 1.1.1.8 mrg else
1088 1.1.1.8 mrg tgt->list[i].offset = 0;
1089 1.1.1.2 mrg continue;
1090 1.1.1.2 mrg }
1091 1.1.1.11 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT
1092 1.1.1.11 mrg || (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
1093 1.1.1.2 mrg {
1094 1.1.1.2 mrg size_t first = i + 1;
1095 1.1.1.2 mrg size_t last = i + sizes[i];
1096 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1097 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
1098 1.1.1.2 mrg + sizes[last];
1099 1.1.1.2 mrg tgt->list[i].key = NULL;
1100 1.1.1.8 mrg tgt->list[i].offset = OFFSET_STRUCT;
1101 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1102 1.1.1.2 mrg if (n == NULL)
1103 1.1.1.2 mrg {
1104 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1105 1.1.1.2 mrg if (tgt_align < align)
1106 1.1.1.2 mrg tgt_align = align;
1107 1.1.1.6 mrg tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1108 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1109 1.1.1.6 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1110 1.1.1.2 mrg not_found_cnt += last - i;
1111 1.1.1.2 mrg for (i = first; i <= last; i++)
1112 1.1.1.6 mrg {
1113 1.1.1.6 mrg tgt->list[i].key = NULL;
1114 1.1.1.10 mrg if (!aq
1115 1.1.1.10 mrg && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1116 1.1.1.11 mrg & typemask)
1117 1.1.1.11 mrg && sizes[i] != 0)
1118 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf,
1119 1.1.1.6 mrg tgt_size - cur_node.host_end
1120 1.1.1.6 mrg + (uintptr_t) hostaddrs[i],
1121 1.1.1.6 mrg sizes[i]);
1122 1.1.1.6 mrg }
1123 1.1.1.2 mrg i--;
1124 1.1.1.2 mrg continue;
1125 1.1.1.2 mrg }
1126 1.1.1.2 mrg for (i = first; i <= last; i++)
1127 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1128 1.1.1.10 mrg sizes, kinds, NULL, refcount_set);
1129 1.1.1.2 mrg i--;
1130 1.1.1.2 mrg continue;
1131 1.1.1.2 mrg }
1132 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1133 1.1 mrg {
1134 1.1.1.2 mrg tgt->list[i].key = NULL;
1135 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
1136 1.1.1.8 mrg has_firstprivate = true;
1137 1.1.1.8 mrg continue;
1138 1.1.1.8 mrg }
1139 1.1.1.10 mrg else if ((kind & typemask) == GOMP_MAP_ATTACH
1140 1.1.1.10 mrg || ((kind & typemask)
1141 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1142 1.1.1.8 mrg {
1143 1.1.1.8 mrg tgt->list[i].key = NULL;
1144 1.1.1.2 mrg has_firstprivate = true;
1145 1.1 mrg continue;
1146 1.1 mrg }
1147 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1148 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
1149 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1150 1.1 mrg else
1151 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1152 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1153 1.1.1.2 mrg {
1154 1.1.1.2 mrg tgt->list[i].key = NULL;
1155 1.1.1.2 mrg
1156 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1157 1.1.1.2 mrg if (tgt_align < align)
1158 1.1.1.2 mrg tgt_align = align;
1159 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1160 1.1.1.10 mrg if (!aq)
1161 1.1.1.10 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
1162 1.1.1.10 mrg cur_node.host_end - cur_node.host_start);
1163 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1164 1.1.1.2 mrg has_firstprivate = true;
1165 1.1.1.2 mrg continue;
1166 1.1.1.2 mrg }
1167 1.1.1.2 mrg splay_tree_key n;
1168 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1169 1.1 mrg {
1170 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node);
1171 1.1.1.2 mrg if (!n)
1172 1.1.1.2 mrg {
1173 1.1.1.2 mrg tgt->list[i].key = NULL;
1174 1.1.1.11 mrg tgt->list[i].offset = OFFSET_INLINED;
1175 1.1.1.2 mrg continue;
1176 1.1.1.2 mrg }
1177 1.1 mrg }
1178 1.1 mrg else
1179 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1180 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
1181 1.1.1.10 mrg {
1182 1.1.1.10 mrg int always_to_cnt = 0;
1183 1.1.1.10 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
1184 1.1.1.10 mrg {
1185 1.1.1.10 mrg bool has_nullptr = false;
1186 1.1.1.10 mrg size_t j;
1187 1.1.1.10 mrg for (j = 0; j < n->tgt->list_count; j++)
1188 1.1.1.10 mrg if (n->tgt->list[j].key == n)
1189 1.1.1.10 mrg {
1190 1.1.1.10 mrg has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1191 1.1.1.10 mrg break;
1192 1.1.1.10 mrg }
1193 1.1.1.10 mrg if (n->tgt->list_count == 0)
1194 1.1.1.10 mrg {
1195 1.1.1.10 mrg /* 'declare target'; assume has_nullptr; it could also be
1196 1.1.1.10 mrg statically assigned pointer, but that it should be to
1197 1.1.1.10 mrg the equivalent variable on the host. */
1198 1.1.1.10 mrg assert (n->refcount == REFCOUNT_INFINITY);
1199 1.1.1.10 mrg has_nullptr = true;
1200 1.1.1.10 mrg }
1201 1.1.1.10 mrg else
1202 1.1.1.10 mrg assert (j < n->tgt->list_count);
1203 1.1.1.10 mrg /* Re-map the data if there is an 'always' modifier or if it a
1204 1.1.1.10 mrg null pointer was there and non a nonnull has been found; that
1205 1.1.1.10 mrg permits transparent re-mapping for Fortran array descriptors
1206 1.1.1.10 mrg which were previously mapped unallocated. */
1207 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++)
1208 1.1.1.10 mrg {
1209 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1210 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1211 1.1.1.10 mrg && (!has_nullptr
1212 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind)
1213 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL))
1214 1.1.1.10 mrg break;
1215 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1216 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1217 1.1.1.10 mrg > cur_node.host_end))
1218 1.1.1.10 mrg break;
1219 1.1.1.10 mrg else
1220 1.1.1.10 mrg {
1221 1.1.1.10 mrg has_always_ptrset = true;
1222 1.1.1.10 mrg ++always_to_cnt;
1223 1.1.1.10 mrg }
1224 1.1.1.10 mrg }
1225 1.1.1.10 mrg }
1226 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1227 1.1.1.10 mrg kind & typemask, always_to_cnt > 0, implicit,
1228 1.1.1.10 mrg NULL, refcount_set);
1229 1.1.1.10 mrg i += always_to_cnt;
1230 1.1.1.10 mrg }
1231 1.1.1.2 mrg else
1232 1.1 mrg {
1233 1.1.1.2 mrg tgt->list[i].key = NULL;
1234 1.1 mrg
1235 1.1.1.8 mrg if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1236 1.1.1.8 mrg {
1237 1.1.1.8 mrg /* Not present, hence, skip entry - including its MAP_POINTER,
1238 1.1.1.8 mrg when existing. */
1239 1.1.1.11 mrg tgt->list[i].offset = OFFSET_INLINED;
1240 1.1.1.8 mrg if (i + 1 < mapnum
1241 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1242 1.1.1.8 mrg == GOMP_MAP_POINTER))
1243 1.1.1.8 mrg {
1244 1.1.1.8 mrg ++i;
1245 1.1.1.8 mrg tgt->list[i].key = NULL;
1246 1.1.1.8 mrg tgt->list[i].offset = 0;
1247 1.1.1.8 mrg }
1248 1.1.1.8 mrg continue;
1249 1.1.1.8 mrg }
1250 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
1251 1.1 mrg not_found_cnt++;
1252 1.1 mrg if (tgt_align < align)
1253 1.1 mrg tgt_align = align;
1254 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1255 1.1.1.10 mrg if (!aq
1256 1.1.1.10 mrg && gomp_to_device_kind_p (kind & typemask))
1257 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
1258 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
1259 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1260 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
1261 1.1 mrg {
1262 1.1 mrg size_t j;
1263 1.1.1.10 mrg int kind;
1264 1.1 mrg for (j = i + 1; j < mapnum; j++)
1265 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1266 1.1.1.10 mrg kinds, j)) & typemask))
1267 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1268 1.1 mrg break;
1269 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1270 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1271 1.1 mrg > cur_node.host_end))
1272 1.1 mrg break;
1273 1.1 mrg else
1274 1.1 mrg {
1275 1.1.1.2 mrg tgt->list[j].key = NULL;
1276 1.1 mrg i++;
1277 1.1 mrg }
1278 1.1 mrg }
1279 1.1 mrg }
1280 1.1 mrg }
1281 1.1 mrg
1282 1.1 mrg if (devaddrs)
1283 1.1 mrg {
1284 1.1 mrg if (mapnum != 1)
1285 1.1 mrg {
1286 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1287 1.1 mrg gomp_fatal ("unexpected aggregation");
1288 1.1 mrg }
1289 1.1 mrg tgt->to_free = devaddrs[0];
1290 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
1291 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
1292 1.1 mrg }
1293 1.1.1.11 mrg else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1294 1.1 mrg {
1295 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
1296 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
1297 1.1 mrg memory. */
1298 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
1299 1.1 mrg tgt_size + tgt_align - 1);
1300 1.1.1.3 mrg if (!tgt->to_free)
1301 1.1.1.3 mrg {
1302 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1303 1.1.1.3 mrg gomp_fatal ("device memory allocation fail");
1304 1.1.1.3 mrg }
1305 1.1.1.3 mrg
1306 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
1307 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1308 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
1309 1.1.1.6 mrg
1310 1.1.1.6 mrg if (cbuf.use_cnt == 1)
1311 1.1.1.6 mrg cbuf.chunk_cnt--;
1312 1.1.1.6 mrg if (cbuf.chunk_cnt > 0)
1313 1.1.1.6 mrg {
1314 1.1.1.6 mrg cbuf.buf
1315 1.1.1.7 mrg = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1316 1.1.1.6 mrg if (cbuf.buf)
1317 1.1.1.6 mrg {
1318 1.1.1.6 mrg cbuf.tgt = tgt;
1319 1.1.1.6 mrg cbufp = &cbuf;
1320 1.1.1.6 mrg }
1321 1.1.1.6 mrg }
1322 1.1 mrg }
1323 1.1 mrg else
1324 1.1 mrg {
1325 1.1 mrg tgt->to_free = NULL;
1326 1.1 mrg tgt->tgt_start = 0;
1327 1.1 mrg tgt->tgt_end = 0;
1328 1.1 mrg }
1329 1.1 mrg
1330 1.1 mrg tgt_size = 0;
1331 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET)
1332 1.1 mrg tgt_size = mapnum * sizeof (void *);
1333 1.1 mrg
1334 1.1 mrg tgt->array = NULL;
1335 1.1.1.10 mrg if (not_found_cnt || has_firstprivate || has_always_ptrset)
1336 1.1 mrg {
1337 1.1.1.2 mrg if (not_found_cnt)
1338 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1339 1.1 mrg splay_tree_node array = tgt->array;
1340 1.1.1.10 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1341 1.1.1.2 mrg uintptr_t field_tgt_base = 0;
1342 1.1.1.10 mrg splay_tree_key field_tgt_structelem_first = NULL;
1343 1.1 mrg
1344 1.1 mrg for (i = 0; i < mapnum; i++)
1345 1.1.1.10 mrg if (has_always_ptrset
1346 1.1.1.10 mrg && tgt->list[i].key
1347 1.1.1.10 mrg && (get_kind (short_mapkind, kinds, i) & typemask)
1348 1.1.1.10 mrg == GOMP_MAP_TO_PSET)
1349 1.1.1.10 mrg {
1350 1.1.1.10 mrg splay_tree_key k = tgt->list[i].key;
1351 1.1.1.10 mrg bool has_nullptr = false;
1352 1.1.1.10 mrg size_t j;
1353 1.1.1.10 mrg for (j = 0; j < k->tgt->list_count; j++)
1354 1.1.1.10 mrg if (k->tgt->list[j].key == k)
1355 1.1.1.10 mrg {
1356 1.1.1.10 mrg has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1357 1.1.1.10 mrg break;
1358 1.1.1.10 mrg }
1359 1.1.1.10 mrg if (k->tgt->list_count == 0)
1360 1.1.1.10 mrg has_nullptr = true;
1361 1.1.1.10 mrg else
1362 1.1.1.10 mrg assert (j < k->tgt->list_count);
1363 1.1.1.10 mrg
1364 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false;
1365 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++)
1366 1.1.1.10 mrg {
1367 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1368 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1369 1.1.1.10 mrg && (!has_nullptr
1370 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind)
1371 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL))
1372 1.1.1.10 mrg break;
1373 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
1374 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1375 1.1.1.10 mrg > k->host_end))
1376 1.1.1.10 mrg break;
1377 1.1.1.10 mrg else
1378 1.1.1.10 mrg {
1379 1.1.1.10 mrg if (*(void **) hostaddrs[j] == NULL)
1380 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = true;
1381 1.1.1.10 mrg tgt->list[j].key = k;
1382 1.1.1.10 mrg tgt->list[j].copy_from = false;
1383 1.1.1.10 mrg tgt->list[j].always_copy_from = false;
1384 1.1.1.10 mrg tgt->list[j].is_attach = false;
1385 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1386 1.1.1.10 mrg gomp_map_pointer (k->tgt, aq,
1387 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j],
1388 1.1.1.10 mrg k->tgt_offset + ((uintptr_t) hostaddrs[j]
1389 1.1.1.10 mrg - k->host_start),
1390 1.1.1.10 mrg sizes[j], cbufp, false);
1391 1.1.1.10 mrg }
1392 1.1.1.10 mrg }
1393 1.1.1.10 mrg i = j - 1;
1394 1.1.1.10 mrg }
1395 1.1.1.10 mrg else if (tgt->list[i].key == NULL)
1396 1.1 mrg {
1397 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
1398 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i);
1399 1.1 mrg if (hostaddrs[i] == NULL)
1400 1.1 mrg continue;
1401 1.1.1.2 mrg switch (kind & typemask)
1402 1.1.1.2 mrg {
1403 1.1.1.2 mrg size_t align, len, first, last;
1404 1.1.1.2 mrg splay_tree_key n;
1405 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE:
1406 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift);
1407 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1408 1.1.1.2 mrg tgt->list[i].offset = tgt_size;
1409 1.1.1.2 mrg len = sizes[i];
1410 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1411 1.1.1.3 mrg (void *) (tgt->tgt_start + tgt_size),
1412 1.1.1.10 mrg (void *) hostaddrs[i], len, false, cbufp);
1413 1.1.1.11 mrg /* Save device address in hostaddr to permit latter availablity
1414 1.1.1.11 mrg when doing a deep-firstprivate with pointer attach. */
1415 1.1.1.11 mrg hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1416 1.1.1.2 mrg tgt_size += len;
1417 1.1.1.11 mrg
1418 1.1.1.11 mrg /* If followed by GOMP_MAP_ATTACH, pointer assign this
1419 1.1.1.11 mrg firstprivate to hostaddrs[i+1], which is assumed to contain a
1420 1.1.1.11 mrg device address. */
1421 1.1.1.11 mrg if (i + 1 < mapnum
1422 1.1.1.11 mrg && (GOMP_MAP_ATTACH
1423 1.1.1.11 mrg == (typemask & get_kind (short_mapkind, kinds, i+1))))
1424 1.1.1.11 mrg {
1425 1.1.1.11 mrg uintptr_t target = (uintptr_t) hostaddrs[i];
1426 1.1.1.11 mrg void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1427 1.1.1.11 mrg /* Per
1428 1.1.1.11 mrg <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1429 1.1.1.11 mrg "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1430 1.1.1.11 mrg this probably needs revision for 'aq' usage. */
1431 1.1.1.11 mrg assert (!aq);
1432 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq, devptr, &target,
1433 1.1.1.11 mrg sizeof (void *), false, cbufp);
1434 1.1.1.11 mrg ++i;
1435 1.1.1.11 mrg }
1436 1.1.1.2 mrg continue;
1437 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
1438 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1439 1.1.1.2 mrg continue;
1440 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1441 1.1.1.8 mrg /* The OpenACC 'host_data' construct only allows 'use_device'
1442 1.1.1.8 mrg "mapping" clauses, so in the first loop, 'not_found_cnt'
1443 1.1.1.8 mrg must always have been zero, so all OpenACC 'use_device'
1444 1.1.1.8 mrg clauses have already been handled. (We can only easily test
1445 1.1.1.8 mrg 'use_device' with 'if_present' clause here.) */
1446 1.1.1.8 mrg assert (tgt->list[i].offset == OFFSET_INLINED);
1447 1.1.1.8 mrg /* Nevertheless, FALLTHRU to the normal handling, to keep the
1448 1.1.1.8 mrg code conceptually simple, similar to the first loop. */
1449 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR:
1450 1.1.1.8 mrg if (tgt->list[i].offset == 0)
1451 1.1.1.8 mrg {
1452 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1453 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
1454 1.1.1.8 mrg n = gomp_map_lookup (mem_map, &cur_node);
1455 1.1.1.8 mrg if (n != NULL)
1456 1.1.1.8 mrg {
1457 1.1.1.8 mrg cur_node.host_start -= n->host_start;
1458 1.1.1.8 mrg hostaddrs[i]
1459 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
1460 1.1.1.8 mrg + cur_node.host_start);
1461 1.1.1.8 mrg }
1462 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1463 1.1.1.8 mrg {
1464 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1465 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
1466 1.1.1.8 mrg }
1467 1.1.1.8 mrg else if ((kind & typemask)
1468 1.1.1.8 mrg == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1469 1.1.1.8 mrg /* If not present, continue using the host address. */
1470 1.1.1.8 mrg ;
1471 1.1.1.8 mrg else
1472 1.1.1.8 mrg __builtin_unreachable ();
1473 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1474 1.1.1.8 mrg }
1475 1.1.1.8 mrg continue;
1476 1.1.1.11 mrg case GOMP_MAP_STRUCT_UNORD:
1477 1.1.1.11 mrg if (sizes[i] > 1)
1478 1.1.1.11 mrg {
1479 1.1.1.11 mrg void *first = hostaddrs[i + 1];
1480 1.1.1.11 mrg for (size_t j = i + 1; j < i + sizes[i]; j++)
1481 1.1.1.11 mrg if (hostaddrs[j + 1] != first)
1482 1.1.1.11 mrg {
1483 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
1484 1.1.1.11 mrg gomp_fatal ("Mapped array elements must be the "
1485 1.1.1.11 mrg "same (%p vs %p)", first,
1486 1.1.1.11 mrg hostaddrs[j + 1]);
1487 1.1.1.11 mrg }
1488 1.1.1.11 mrg }
1489 1.1.1.11 mrg /* Fallthrough. */
1490 1.1.1.2 mrg case GOMP_MAP_STRUCT:
1491 1.1.1.2 mrg first = i + 1;
1492 1.1.1.2 mrg last = i + sizes[i];
1493 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1494 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
1495 1.1.1.2 mrg + sizes[last];
1496 1.1.1.2 mrg if (tgt->list[first].key != NULL)
1497 1.1.1.2 mrg continue;
1498 1.1.1.11 mrg if (sizes[last] == 0)
1499 1.1.1.11 mrg cur_node.host_end++;
1500 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1501 1.1.1.11 mrg if (sizes[last] == 0)
1502 1.1.1.11 mrg cur_node.host_end--;
1503 1.1.1.11 mrg if (n == NULL && cur_node.host_start == cur_node.host_end)
1504 1.1.1.11 mrg {
1505 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
1506 1.1.1.11 mrg gomp_fatal ("Struct pointer member not mapped (%p)",
1507 1.1.1.11 mrg (void*) hostaddrs[first]);
1508 1.1.1.11 mrg }
1509 1.1.1.2 mrg if (n == NULL)
1510 1.1.1.2 mrg {
1511 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1512 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
1513 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1514 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1515 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first]
1516 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1517 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first];
1518 1.1.1.2 mrg field_tgt_offset = tgt_size;
1519 1.1.1.2 mrg field_tgt_clear = last;
1520 1.1.1.10 mrg field_tgt_structelem_first = NULL;
1521 1.1.1.2 mrg tgt_size += cur_node.host_end
1522 1.1.1.2 mrg - (uintptr_t) hostaddrs[first];
1523 1.1.1.2 mrg continue;
1524 1.1.1.2 mrg }
1525 1.1.1.2 mrg for (i = first; i <= last; i++)
1526 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1527 1.1.1.10 mrg sizes, kinds, cbufp, refcount_set);
1528 1.1.1.2 mrg i--;
1529 1.1.1.2 mrg continue;
1530 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER:
1531 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1532 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1533 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1534 1.1.1.2 mrg if (n == NULL
1535 1.1.1.2 mrg || n->host_start > cur_node.host_start
1536 1.1.1.2 mrg || n->host_end < cur_node.host_end)
1537 1.1.1.2 mrg {
1538 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1539 1.1.1.2 mrg gomp_fatal ("always pointer not mapped");
1540 1.1.1.2 mrg }
1541 1.1.1.11 mrg if (i > 0
1542 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1543 1.1.1.11 mrg != GOMP_MAP_ALWAYS_POINTER))
1544 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1545 1.1.1.2 mrg if (cur_node.tgt_offset)
1546 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i];
1547 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1548 1.1.1.3 mrg (void *) (n->tgt->tgt_start
1549 1.1.1.3 mrg + n->tgt_offset
1550 1.1.1.3 mrg + cur_node.host_start
1551 1.1.1.3 mrg - n->host_start),
1552 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
1553 1.1.1.10 mrg sizeof (void *), true, cbufp);
1554 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1555 1.1.1.2 mrg + cur_node.host_start - n->host_start;
1556 1.1.1.2 mrg continue;
1557 1.1.1.8 mrg case GOMP_MAP_IF_PRESENT:
1558 1.1.1.8 mrg /* Not present - otherwise handled above. Skip over its
1559 1.1.1.8 mrg MAP_POINTER as well. */
1560 1.1.1.8 mrg if (i + 1 < mapnum
1561 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1562 1.1.1.8 mrg == GOMP_MAP_POINTER))
1563 1.1.1.8 mrg ++i;
1564 1.1.1.8 mrg continue;
1565 1.1.1.8 mrg case GOMP_MAP_ATTACH:
1566 1.1.1.10 mrg case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1567 1.1.1.8 mrg {
1568 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1569 1.1.1.8 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1570 1.1.1.8 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1571 1.1.1.8 mrg if (n != NULL)
1572 1.1.1.8 mrg {
1573 1.1.1.8 mrg tgt->list[i].key = n;
1574 1.1.1.8 mrg tgt->list[i].offset = cur_node.host_start - n->host_start;
1575 1.1.1.8 mrg tgt->list[i].length = n->host_end - n->host_start;
1576 1.1.1.8 mrg tgt->list[i].copy_from = false;
1577 1.1.1.8 mrg tgt->list[i].always_copy_from = false;
1578 1.1.1.8 mrg tgt->list[i].is_attach = true;
1579 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect
1580 1.1.1.8 mrg structured/dynamic reference counts ('n->refcount',
1581 1.1.1.8 mrg 'n->dynamic_refcount'). */
1582 1.1.1.10 mrg
1583 1.1.1.10 mrg bool zlas
1584 1.1.1.10 mrg = ((kind & typemask)
1585 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1586 1.1.1.10 mrg gomp_attach_pointer (devicep, aq, mem_map, n,
1587 1.1.1.10 mrg (uintptr_t) hostaddrs[i], sizes[i],
1588 1.1.1.10 mrg cbufp, zlas);
1589 1.1.1.8 mrg }
1590 1.1.1.10 mrg else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1591 1.1.1.8 mrg {
1592 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1593 1.1.1.8 mrg gomp_fatal ("outer struct not mapped for attach");
1594 1.1.1.8 mrg }
1595 1.1.1.8 mrg continue;
1596 1.1.1.8 mrg }
1597 1.1.1.2 mrg default:
1598 1.1.1.2 mrg break;
1599 1.1.1.2 mrg }
1600 1.1 mrg splay_tree_key k = &array->key;
1601 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
1602 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
1603 1.1 mrg k->host_end = k->host_start + sizes[i];
1604 1.1 mrg else
1605 1.1 mrg k->host_end = k->host_start + sizeof (void *);
1606 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
1607 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
1608 1.1.1.11 mrg {
1609 1.1.1.11 mrg if (field_tgt_clear != FIELD_TGT_EMPTY)
1610 1.1.1.11 mrg {
1611 1.1.1.11 mrg /* For this condition to be true, there must be a
1612 1.1.1.11 mrg duplicate struct element mapping. This can happen with
1613 1.1.1.11 mrg GOMP_MAP_STRUCT_UNORD mappings, for example. */
1614 1.1.1.11 mrg tgt->list[i].key = n;
1615 1.1.1.11 mrg if (openmp_p)
1616 1.1.1.11 mrg {
1617 1.1.1.11 mrg assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
1618 1.1.1.11 mrg assert (field_tgt_structelem_first != NULL);
1619 1.1.1.11 mrg
1620 1.1.1.11 mrg if (i == field_tgt_clear)
1621 1.1.1.11 mrg {
1622 1.1.1.11 mrg n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1623 1.1.1.11 mrg field_tgt_structelem_first = NULL;
1624 1.1.1.11 mrg }
1625 1.1.1.11 mrg }
1626 1.1.1.11 mrg if (i == field_tgt_clear)
1627 1.1.1.11 mrg field_tgt_clear = FIELD_TGT_EMPTY;
1628 1.1.1.11 mrg gomp_increment_refcount (n, refcount_set);
1629 1.1.1.11 mrg tgt->list[i].copy_from
1630 1.1.1.11 mrg = GOMP_MAP_COPY_FROM_P (kind & typemask);
1631 1.1.1.11 mrg tgt->list[i].always_copy_from
1632 1.1.1.11 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1633 1.1.1.11 mrg tgt->list[i].is_attach = false;
1634 1.1.1.11 mrg tgt->list[i].offset = 0;
1635 1.1.1.11 mrg tgt->list[i].length = k->host_end - k->host_start;
1636 1.1.1.11 mrg }
1637 1.1.1.11 mrg else
1638 1.1.1.11 mrg gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1639 1.1.1.11 mrg kind & typemask, false, implicit,
1640 1.1.1.11 mrg cbufp, refcount_set);
1641 1.1.1.11 mrg }
1642 1.1 mrg else
1643 1.1 mrg {
1644 1.1.1.8 mrg k->aux = NULL;
1645 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK)
1646 1.1.1.2 mrg {
1647 1.1.1.2 mrg /* Replace target address of the pointer with target address
1648 1.1.1.2 mrg of mapped object in the splay tree. */
1649 1.1.1.2 mrg splay_tree_remove (mem_map, n);
1650 1.1.1.8 mrg k->aux
1651 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1652 1.1.1.8 mrg k->aux->link_key = n;
1653 1.1.1.2 mrg }
1654 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
1655 1.1.1.2 mrg tgt->list[i].key = k;
1656 1.1 mrg k->tgt = tgt;
1657 1.1.1.10 mrg k->refcount = 0;
1658 1.1.1.10 mrg k->dynamic_refcount = 0;
1659 1.1.1.8 mrg if (field_tgt_clear != FIELD_TGT_EMPTY)
1660 1.1.1.2 mrg {
1661 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base
1662 1.1.1.2 mrg + field_tgt_offset;
1663 1.1.1.10 mrg if (openmp_p)
1664 1.1.1.10 mrg {
1665 1.1.1.10 mrg k->refcount = REFCOUNT_STRUCTELEM;
1666 1.1.1.10 mrg if (field_tgt_structelem_first == NULL)
1667 1.1.1.10 mrg {
1668 1.1.1.10 mrg /* Set to first structure element of sequence. */
1669 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1670 1.1.1.10 mrg field_tgt_structelem_first = k;
1671 1.1.1.10 mrg }
1672 1.1.1.10 mrg else
1673 1.1.1.10 mrg /* Point to refcount of leading element, but do not
1674 1.1.1.10 mrg increment again. */
1675 1.1.1.10 mrg k->structelem_refcount_ptr
1676 1.1.1.10 mrg = &field_tgt_structelem_first->structelem_refcount;
1677 1.1.1.10 mrg
1678 1.1.1.10 mrg if (i == field_tgt_clear)
1679 1.1.1.10 mrg {
1680 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1681 1.1.1.10 mrg field_tgt_structelem_first = NULL;
1682 1.1.1.10 mrg }
1683 1.1.1.10 mrg }
1684 1.1.1.2 mrg if (i == field_tgt_clear)
1685 1.1.1.8 mrg field_tgt_clear = FIELD_TGT_EMPTY;
1686 1.1.1.2 mrg }
1687 1.1.1.2 mrg else
1688 1.1.1.2 mrg {
1689 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1690 1.1.1.2 mrg k->tgt_offset = tgt_size;
1691 1.1.1.2 mrg tgt_size += k->host_end - k->host_start;
1692 1.1.1.2 mrg }
1693 1.1.1.10 mrg /* First increment, from 0 to 1. gomp_increment_refcount
1694 1.1.1.10 mrg encapsulates the different increment cases, so use this
1695 1.1.1.10 mrg instead of directly setting 1 during initialization. */
1696 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1697 1.1.1.10 mrg
1698 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1699 1.1.1.2 mrg tgt->list[i].always_copy_from
1700 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1701 1.1.1.8 mrg tgt->list[i].is_attach = false;
1702 1.1.1.2 mrg tgt->list[i].offset = 0;
1703 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start;
1704 1.1 mrg tgt->refcount++;
1705 1.1 mrg array->left = NULL;
1706 1.1 mrg array->right = NULL;
1707 1.1 mrg splay_tree_insert (mem_map, array);
1708 1.1 mrg switch (kind & typemask)
1709 1.1 mrg {
1710 1.1 mrg case GOMP_MAP_ALLOC:
1711 1.1 mrg case GOMP_MAP_FROM:
1712 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
1713 1.1 mrg case GOMP_MAP_FORCE_FROM:
1714 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
1715 1.1 mrg break;
1716 1.1 mrg case GOMP_MAP_TO:
1717 1.1 mrg case GOMP_MAP_TOFROM:
1718 1.1 mrg case GOMP_MAP_FORCE_TO:
1719 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
1720 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO:
1721 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM:
1722 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1723 1.1.1.3 mrg (void *) (tgt->tgt_start
1724 1.1.1.3 mrg + k->tgt_offset),
1725 1.1.1.3 mrg (void *) k->host_start,
1726 1.1.1.10 mrg k->host_end - k->host_start,
1727 1.1.1.10 mrg false, cbufp);
1728 1.1 mrg break;
1729 1.1 mrg case GOMP_MAP_POINTER:
1730 1.1.1.10 mrg case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1731 1.1.1.10 mrg gomp_map_pointer
1732 1.1.1.10 mrg (tgt, aq, (uintptr_t) *(void **) k->host_start,
1733 1.1.1.10 mrg k->tgt_offset, sizes[i], cbufp,
1734 1.1.1.10 mrg ((kind & typemask)
1735 1.1.1.10 mrg == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1736 1.1 mrg break;
1737 1.1 mrg case GOMP_MAP_TO_PSET:
1738 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1739 1.1.1.3 mrg (void *) (tgt->tgt_start
1740 1.1.1.3 mrg + k->tgt_offset),
1741 1.1.1.3 mrg (void *) k->host_start,
1742 1.1.1.10 mrg k->host_end - k->host_start,
1743 1.1.1.10 mrg false, cbufp);
1744 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false;
1745 1.1 mrg
1746 1.1 mrg for (j = i + 1; j < mapnum; j++)
1747 1.1.1.10 mrg {
1748 1.1.1.10 mrg int ptr_kind = (get_kind (short_mapkind, kinds, j)
1749 1.1.1.10 mrg & typemask);
1750 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (ptr_kind)
1751 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1752 1.1.1.10 mrg break;
1753 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
1754 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1755 1.1.1.10 mrg > k->host_end))
1756 1.1.1.10 mrg break;
1757 1.1.1.10 mrg else
1758 1.1.1.10 mrg {
1759 1.1.1.10 mrg tgt->list[j].key = k;
1760 1.1.1.10 mrg tgt->list[j].copy_from = false;
1761 1.1.1.10 mrg tgt->list[j].always_copy_from = false;
1762 1.1.1.10 mrg tgt->list[j].is_attach = false;
1763 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1764 1.1.1.10 mrg /* For OpenMP, the use of refcount_sets causes
1765 1.1.1.10 mrg errors if we set k->refcount = 1 above but also
1766 1.1.1.10 mrg increment it again here, for decrementing will
1767 1.1.1.10 mrg not properly match, since we decrement only once
1768 1.1.1.10 mrg for each key's refcount. Therefore avoid this
1769 1.1.1.10 mrg increment for OpenMP constructs. */
1770 1.1.1.10 mrg if (!openmp_p)
1771 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1772 1.1.1.10 mrg gomp_map_pointer (tgt, aq,
1773 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j],
1774 1.1.1.10 mrg k->tgt_offset
1775 1.1.1.10 mrg + ((uintptr_t) hostaddrs[j]
1776 1.1.1.10 mrg - k->host_start),
1777 1.1.1.10 mrg sizes[j], cbufp, false);
1778 1.1.1.10 mrg }
1779 1.1.1.11 mrg }
1780 1.1.1.10 mrg i = j - 1;
1781 1.1 mrg break;
1782 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
1783 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TO:
1784 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM:
1785 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1786 1.1 mrg {
1787 1.1 mrg /* We already looked up the memory region above and it
1788 1.1 mrg was missing. */
1789 1.1 mrg size_t size = k->host_end - k->host_start;
1790 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1791 1.1 mrg #ifdef HAVE_INTTYPES_H
1792 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device "
1793 1.1.1.11 mrg "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1794 1.1.1.11 mrg "dev: %d)", (void *) k->host_start,
1795 1.1.1.11 mrg (uint64_t) size, (uint64_t) size,
1796 1.1.1.11 mrg devicep->target_id);
1797 1.1 mrg #else
1798 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device "
1799 1.1.1.11 mrg "(addr: %p, size: %lu (0x%lx), dev: %d)",
1800 1.1.1.11 mrg (void *) k->host_start,
1801 1.1.1.11 mrg (unsigned long) size, (unsigned long) size,
1802 1.1.1.11 mrg devicep->target_id);
1803 1.1 mrg #endif
1804 1.1 mrg }
1805 1.1 mrg break;
1806 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
1807 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
1808 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1809 1.1.1.3 mrg (void *) (tgt->tgt_start
1810 1.1.1.3 mrg + k->tgt_offset),
1811 1.1.1.3 mrg (void *) k->host_start,
1812 1.1.1.10 mrg sizeof (void *), false, cbufp);
1813 1.1 mrg break;
1814 1.1 mrg default:
1815 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1816 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1817 1.1 mrg kind);
1818 1.1 mrg }
1819 1.1.1.2 mrg
1820 1.1.1.8 mrg if (k->aux && k->aux->link_key)
1821 1.1.1.2 mrg {
1822 1.1.1.2 mrg /* Set link pointer on target to the device address of the
1823 1.1.1.2 mrg mapped object. */
1824 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1825 1.1.1.7 mrg /* We intentionally do not use coalescing here, as it's not
1826 1.1.1.7 mrg data allocated by the current call to this function. */
1827 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1828 1.1.1.10 mrg &tgt_addr, sizeof (void *), true, NULL);
1829 1.1.1.2 mrg }
1830 1.1 mrg array++;
1831 1.1 mrg }
1832 1.1 mrg }
1833 1.1 mrg }
1834 1.1 mrg
1835 1.1.1.11 mrg if (pragma_kind & GOMP_MAP_VARS_TARGET)
1836 1.1 mrg {
1837 1.1 mrg for (i = 0; i < mapnum; i++)
1838 1.1 mrg {
1839 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1840 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1841 1.1.1.3 mrg (void *) (tgt->tgt_start + i * sizeof (void *)),
1842 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
1843 1.1.1.10 mrg true, cbufp);
1844 1.1 mrg }
1845 1.1 mrg }
1846 1.1 mrg
1847 1.1.1.6 mrg if (cbufp)
1848 1.1.1.6 mrg {
1849 1.1.1.6 mrg long c = 0;
1850 1.1.1.6 mrg for (c = 0; c < cbuf.chunk_cnt; ++c)
1851 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1852 1.1.1.7 mrg (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1853 1.1.1.7 mrg (char *) cbuf.buf + (cbuf.chunks[c].start
1854 1.1.1.7 mrg - cbuf.chunks[0].start),
1855 1.1.1.10 mrg cbuf.chunks[c].end - cbuf.chunks[c].start,
1856 1.1.1.11 mrg false, NULL);
1857 1.1.1.11 mrg if (aq)
1858 1.1.1.11 mrg /* Free once the transfer has completed. */
1859 1.1.1.11 mrg devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1860 1.1.1.11 mrg else
1861 1.1.1.11 mrg free (cbuf.buf);
1862 1.1.1.7 mrg cbuf.buf = NULL;
1863 1.1.1.7 mrg cbufp = NULL;
1864 1.1.1.6 mrg }
1865 1.1.1.6 mrg
1866 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped,
1867 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1868 1.1.1.2 mrg gomp_exit_data. */
1869 1.1.1.10 mrg if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1870 1.1.1.2 mrg {
1871 1.1.1.2 mrg free (tgt);
1872 1.1.1.2 mrg tgt = NULL;
1873 1.1.1.2 mrg }
1874 1.1.1.2 mrg
1875 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1876 1.1 mrg return tgt;
1877 1.1 mrg }
1878 1.1 mrg
1879 1.1.1.10 mrg static struct target_mem_desc *
1880 1.1.1.8 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1881 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1882 1.1.1.10 mrg bool short_mapkind, htab_t *refcount_set,
1883 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind)
1884 1.1.1.8 mrg {
1885 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers
1886 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */
1887 1.1.1.10 mrg htab_t local_refcount_set = NULL;
1888 1.1.1.10 mrg if (refcount_set == NULL)
1889 1.1.1.10 mrg {
1890 1.1.1.10 mrg local_refcount_set = htab_create (mapnum);
1891 1.1.1.10 mrg refcount_set = &local_refcount_set;
1892 1.1.1.10 mrg }
1893 1.1.1.10 mrg
1894 1.1.1.10 mrg struct target_mem_desc *tgt;
1895 1.1.1.10 mrg tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1896 1.1.1.10 mrg sizes, kinds, short_mapkind, refcount_set,
1897 1.1.1.10 mrg pragma_kind);
1898 1.1.1.10 mrg if (local_refcount_set)
1899 1.1.1.10 mrg htab_free (local_refcount_set);
1900 1.1.1.10 mrg
1901 1.1.1.10 mrg return tgt;
1902 1.1.1.8 mrg }
1903 1.1.1.8 mrg
1904 1.1.1.8 mrg attribute_hidden struct target_mem_desc *
1905 1.1.1.10 mrg goacc_map_vars (struct gomp_device_descr *devicep,
1906 1.1.1.10 mrg struct goacc_asyncqueue *aq, size_t mapnum,
1907 1.1.1.10 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
1908 1.1.1.10 mrg void *kinds, bool short_mapkind,
1909 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind)
1910 1.1.1.8 mrg {
1911 1.1.1.8 mrg return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1912 1.1.1.10 mrg sizes, kinds, short_mapkind, NULL,
1913 1.1.1.10 mrg GOMP_MAP_VARS_OPENACC | pragma_kind);
1914 1.1.1.8 mrg }
1915 1.1.1.8 mrg
1916 1.1 mrg static void
1917 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
1918 1.1 mrg {
1919 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1920 1.1 mrg if (tgt->tgt_end)
1921 1.1.1.3 mrg gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1922 1.1 mrg
1923 1.1 mrg free (tgt->array);
1924 1.1 mrg free (tgt);
1925 1.1 mrg }
1926 1.1 mrg
1927 1.1.1.8 mrg static bool
1928 1.1.1.8 mrg gomp_unref_tgt (void *ptr)
1929 1.1.1.7 mrg {
1930 1.1.1.7 mrg bool is_tgt_unmapped = false;
1931 1.1.1.8 mrg
1932 1.1.1.8 mrg struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1933 1.1.1.8 mrg
1934 1.1.1.8 mrg if (tgt->refcount > 1)
1935 1.1.1.8 mrg tgt->refcount--;
1936 1.1.1.7 mrg else
1937 1.1.1.7 mrg {
1938 1.1.1.8 mrg gomp_unmap_tgt (tgt);
1939 1.1.1.7 mrg is_tgt_unmapped = true;
1940 1.1.1.7 mrg }
1941 1.1.1.8 mrg
1942 1.1.1.8 mrg return is_tgt_unmapped;
1943 1.1.1.8 mrg }
1944 1.1.1.8 mrg
1945 1.1.1.8 mrg static void
1946 1.1.1.8 mrg gomp_unref_tgt_void (void *ptr)
1947 1.1.1.8 mrg {
1948 1.1.1.8 mrg (void) gomp_unref_tgt (ptr);
1949 1.1.1.8 mrg }
1950 1.1.1.8 mrg
1951 1.1.1.10 mrg static void
1952 1.1.1.10 mrg gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1953 1.1.1.8 mrg {
1954 1.1.1.10 mrg splay_tree_remove (sp, k);
1955 1.1.1.8 mrg if (k->aux)
1956 1.1.1.8 mrg {
1957 1.1.1.8 mrg if (k->aux->link_key)
1958 1.1.1.10 mrg splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1959 1.1.1.8 mrg if (k->aux->attach_count)
1960 1.1.1.8 mrg free (k->aux->attach_count);
1961 1.1.1.8 mrg free (k->aux);
1962 1.1.1.8 mrg k->aux = NULL;
1963 1.1.1.8 mrg }
1964 1.1.1.10 mrg }
1965 1.1.1.10 mrg
1966 1.1.1.10 mrg static inline __attribute__((always_inline)) bool
1967 1.1.1.10 mrg gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1968 1.1.1.10 mrg struct goacc_asyncqueue *aq)
1969 1.1.1.10 mrg {
1970 1.1.1.10 mrg bool is_tgt_unmapped = false;
1971 1.1.1.10 mrg
1972 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_P (k->refcount))
1973 1.1.1.10 mrg {
1974 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1975 1.1.1.10 mrg /* Infer the splay_tree_key of the first structelem key using the
1976 1.1.1.10 mrg pointer to the first structleme_refcount. */
1977 1.1.1.10 mrg k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1978 1.1.1.10 mrg - offsetof (struct splay_tree_key_s,
1979 1.1.1.10 mrg structelem_refcount));
1980 1.1.1.10 mrg assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1981 1.1.1.10 mrg
1982 1.1.1.10 mrg /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1983 1.1.1.10 mrg with the splay_tree_keys embedded inside. */
1984 1.1.1.10 mrg splay_tree_node node =
1985 1.1.1.10 mrg (splay_tree_node) ((char *) k
1986 1.1.1.10 mrg - offsetof (struct splay_tree_node_s, key));
1987 1.1.1.10 mrg while (true)
1988 1.1.1.10 mrg {
1989 1.1.1.10 mrg /* Starting from the _FIRST key, and continue for all following
1990 1.1.1.10 mrg sibling keys. */
1991 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k);
1992 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1993 1.1.1.10 mrg break;
1994 1.1.1.10 mrg else
1995 1.1.1.10 mrg k = &(++node)->key;
1996 1.1.1.10 mrg }
1997 1.1.1.10 mrg }
1998 1.1.1.10 mrg else
1999 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k);
2000 1.1.1.10 mrg
2001 1.1.1.8 mrg if (aq)
2002 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2003 1.1.1.8 mrg (void *) k->tgt);
2004 1.1.1.8 mrg else
2005 1.1.1.8 mrg is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2006 1.1.1.7 mrg return is_tgt_unmapped;
2007 1.1.1.7 mrg }
2008 1.1.1.7 mrg
2009 1.1.1.8 mrg attribute_hidden bool
2010 1.1.1.8 mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2011 1.1.1.8 mrg {
2012 1.1.1.8 mrg return gomp_remove_var_internal (devicep, k, NULL);
2013 1.1.1.8 mrg }
2014 1.1.1.8 mrg
2015 1.1.1.8 mrg /* Remove a variable asynchronously. This actually removes the variable
2016 1.1.1.8 mrg mapping immediately, but retains the linked target_mem_desc until the
2017 1.1.1.8 mrg asynchronous operation has completed (as it may still refer to target
2018 1.1.1.8 mrg memory). The device lock must be held before entry, and remains locked on
2019 1.1.1.8 mrg exit. */
2020 1.1.1.8 mrg
2021 1.1.1.8 mrg attribute_hidden void
2022 1.1.1.8 mrg gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2023 1.1.1.8 mrg struct goacc_asyncqueue *aq)
2024 1.1.1.8 mrg {
2025 1.1.1.8 mrg (void) gomp_remove_var_internal (devicep, k, aq);
2026 1.1.1.8 mrg }
2027 1.1.1.8 mrg
2028 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2029 1.1 mrg variables back from device to host: if it is false, it is assumed that this
2030 1.1.1.3 mrg has been done already. */
2031 1.1 mrg
2032 1.1.1.8 mrg static inline __attribute__((always_inline)) void
2033 1.1.1.8 mrg gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2034 1.1.1.10 mrg htab_t *refcount_set, struct goacc_asyncqueue *aq)
2035 1.1 mrg {
2036 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
2037 1.1 mrg
2038 1.1 mrg if (tgt->list_count == 0)
2039 1.1 mrg {
2040 1.1 mrg free (tgt);
2041 1.1 mrg return;
2042 1.1 mrg }
2043 1.1 mrg
2044 1.1 mrg gomp_mutex_lock (&devicep->lock);
2045 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2046 1.1.1.2 mrg {
2047 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2048 1.1.1.2 mrg free (tgt->array);
2049 1.1.1.2 mrg free (tgt);
2050 1.1.1.2 mrg return;
2051 1.1.1.2 mrg }
2052 1.1 mrg
2053 1.1 mrg size_t i;
2054 1.1.1.8 mrg
2055 1.1.1.8 mrg /* We must perform detachments before any copies back to the host. */
2056 1.1.1.8 mrg for (i = 0; i < tgt->list_count; i++)
2057 1.1.1.8 mrg {
2058 1.1.1.8 mrg splay_tree_key k = tgt->list[i].key;
2059 1.1.1.8 mrg
2060 1.1.1.8 mrg if (k != NULL && tgt->list[i].is_attach)
2061 1.1.1.8 mrg gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2062 1.1.1.8 mrg + tgt->list[i].offset,
2063 1.1.1.8 mrg false, NULL);
2064 1.1.1.8 mrg }
2065 1.1.1.8 mrg
2066 1.1 mrg for (i = 0; i < tgt->list_count; i++)
2067 1.1.1.2 mrg {
2068 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key;
2069 1.1.1.2 mrg if (k == NULL)
2070 1.1.1.2 mrg continue;
2071 1.1.1.2 mrg
2072 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2073 1.1.1.8 mrg counts ('n->refcount', 'n->dynamic_refcount'). */
2074 1.1.1.8 mrg if (tgt->list[i].is_attach)
2075 1.1.1.8 mrg continue;
2076 1.1.1.8 mrg
2077 1.1.1.10 mrg bool do_copy, do_remove;
2078 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2079 1.1.1.2 mrg
2080 1.1.1.10 mrg if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2081 1.1.1.2 mrg || tgt->list[i].always_copy_from)
2082 1.1.1.8 mrg gomp_copy_dev2host (devicep, aq,
2083 1.1.1.3 mrg (void *) (k->host_start + tgt->list[i].offset),
2084 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
2085 1.1.1.3 mrg + tgt->list[i].offset),
2086 1.1.1.3 mrg tgt->list[i].length);
2087 1.1.1.10 mrg if (do_remove)
2088 1.1.1.8 mrg {
2089 1.1.1.8 mrg struct target_mem_desc *k_tgt = k->tgt;
2090 1.1.1.8 mrg bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2091 1.1.1.8 mrg /* It would be bad if TGT got unmapped while we're still iterating
2092 1.1.1.8 mrg over its LIST_COUNT, and also expect to use it in the following
2093 1.1.1.8 mrg code. */
2094 1.1.1.8 mrg assert (!is_tgt_unmapped
2095 1.1.1.8 mrg || k_tgt != tgt);
2096 1.1.1.8 mrg }
2097 1.1.1.2 mrg }
2098 1.1 mrg
2099 1.1.1.8 mrg if (aq)
2100 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2101 1.1.1.8 mrg (void *) tgt);
2102 1.1 mrg else
2103 1.1.1.8 mrg gomp_unref_tgt ((void *) tgt);
2104 1.1 mrg
2105 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2106 1.1 mrg }
2107 1.1 mrg
2108 1.1.1.10 mrg static void
2109 1.1.1.10 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2110 1.1.1.10 mrg htab_t *refcount_set)
2111 1.1.1.8 mrg {
2112 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers
2113 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */
2114 1.1.1.10 mrg htab_t local_refcount_set = NULL;
2115 1.1.1.10 mrg if (refcount_set == NULL)
2116 1.1.1.10 mrg {
2117 1.1.1.10 mrg local_refcount_set = htab_create (tgt->list_count);
2118 1.1.1.10 mrg refcount_set = &local_refcount_set;
2119 1.1.1.10 mrg }
2120 1.1.1.10 mrg
2121 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2122 1.1.1.10 mrg
2123 1.1.1.10 mrg if (local_refcount_set)
2124 1.1.1.10 mrg htab_free (local_refcount_set);
2125 1.1.1.8 mrg }
2126 1.1.1.8 mrg
2127 1.1.1.8 mrg attribute_hidden void
2128 1.1.1.10 mrg goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2129 1.1.1.10 mrg struct goacc_asyncqueue *aq)
2130 1.1.1.8 mrg {
2131 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2132 1.1.1.8 mrg }
2133 1.1.1.8 mrg
2134 1.1 mrg static void
2135 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2136 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind)
2137 1.1 mrg {
2138 1.1 mrg size_t i;
2139 1.1 mrg struct splay_tree_key_s cur_node;
2140 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
2141 1.1 mrg
2142 1.1 mrg if (!devicep)
2143 1.1 mrg return;
2144 1.1 mrg
2145 1.1 mrg if (mapnum == 0)
2146 1.1 mrg return;
2147 1.1 mrg
2148 1.1 mrg gomp_mutex_lock (&devicep->lock);
2149 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2150 1.1.1.2 mrg {
2151 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2152 1.1.1.2 mrg return;
2153 1.1.1.2 mrg }
2154 1.1.1.2 mrg
2155 1.1 mrg for (i = 0; i < mapnum; i++)
2156 1.1 mrg if (sizes[i])
2157 1.1 mrg {
2158 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
2159 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
2160 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2161 1.1 mrg if (n)
2162 1.1 mrg {
2163 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
2164 1.1 mrg if (n->host_start > cur_node.host_start
2165 1.1 mrg || n->host_end < cur_node.host_end)
2166 1.1 mrg {
2167 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2168 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
2169 1.1 mrg "only [%p..%p) is mapped",
2170 1.1 mrg (void *) cur_node.host_start,
2171 1.1 mrg (void *) cur_node.host_end,
2172 1.1 mrg (void *) n->host_start,
2173 1.1 mrg (void *) n->host_end);
2174 1.1 mrg }
2175 1.1.1.3 mrg
2176 1.1.1.10 mrg if (n->aux && n->aux->attach_count)
2177 1.1.1.10 mrg {
2178 1.1.1.10 mrg uintptr_t addr = cur_node.host_start;
2179 1.1.1.10 mrg while (addr < cur_node.host_end)
2180 1.1.1.10 mrg {
2181 1.1.1.10 mrg /* We have to be careful not to overwrite still attached
2182 1.1.1.10 mrg pointers during host<->device updates. */
2183 1.1.1.10 mrg size_t i = (addr - cur_node.host_start) / sizeof (void *);
2184 1.1.1.10 mrg if (n->aux->attach_count[i] == 0)
2185 1.1.1.10 mrg {
2186 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start
2187 1.1.1.10 mrg + n->tgt_offset
2188 1.1.1.10 mrg + addr - n->host_start);
2189 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
2190 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL,
2191 1.1.1.10 mrg devaddr, (void *) addr,
2192 1.1.1.10 mrg sizeof (void *), false, NULL);
2193 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2194 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL,
2195 1.1.1.10 mrg (void *) addr, devaddr,
2196 1.1.1.10 mrg sizeof (void *));
2197 1.1.1.10 mrg }
2198 1.1.1.10 mrg addr += sizeof (void *);
2199 1.1.1.10 mrg }
2200 1.1.1.10 mrg }
2201 1.1.1.10 mrg else
2202 1.1.1.10 mrg {
2203 1.1.1.10 mrg void *hostaddr = (void *) cur_node.host_start;
2204 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2205 1.1.1.10 mrg + cur_node.host_start
2206 1.1.1.10 mrg - n->host_start);
2207 1.1.1.10 mrg size_t size = cur_node.host_end - cur_node.host_start;
2208 1.1.1.3 mrg
2209 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
2210 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2211 1.1.1.10 mrg false, NULL);
2212 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2213 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2214 1.1.1.10 mrg }
2215 1.1 mrg }
2216 1.1.1.11 mrg else
2217 1.1.1.11 mrg {
2218 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i);
2219 1.1.1.11 mrg
2220 1.1.1.11 mrg if (GOMP_MAP_PRESENT_P (kind))
2221 1.1.1.11 mrg {
2222 1.1.1.11 mrg /* We already looked up the memory region above and it
2223 1.1.1.11 mrg was missing. */
2224 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
2225 1.1.1.11 mrg #ifdef HAVE_INTTYPES_H
2226 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device "
2227 1.1.1.11 mrg "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2228 1.1.1.11 mrg "dev: %d)", (void *) hostaddrs[i],
2229 1.1.1.11 mrg (uint64_t) sizes[i], (uint64_t) sizes[i],
2230 1.1.1.11 mrg devicep->target_id);
2231 1.1.1.11 mrg #else
2232 1.1.1.11 mrg gomp_fatal ("present clause: not present on the device "
2233 1.1.1.11 mrg "(addr: %p, size: %lu (0x%lx), dev: %d)",
2234 1.1.1.11 mrg (void *) hostaddrs[i], (unsigned long) sizes[i],
2235 1.1.1.11 mrg (unsigned long) sizes[i], devicep->target_id);
2236 1.1.1.11 mrg #endif
2237 1.1.1.11 mrg }
2238 1.1.1.11 mrg }
2239 1.1 mrg }
2240 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2241 1.1 mrg }
2242 1.1 mrg
2243 1.1.1.11 mrg static struct gomp_offload_icv_list *
2244 1.1.1.11 mrg gomp_get_offload_icv_item (int dev_num)
2245 1.1.1.11 mrg {
2246 1.1.1.11 mrg struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2247 1.1.1.11 mrg while (l != NULL && l->device_num != dev_num)
2248 1.1.1.11 mrg l = l->next;
2249 1.1.1.11 mrg
2250 1.1.1.11 mrg return l;
2251 1.1.1.11 mrg }
2252 1.1.1.11 mrg
2253 1.1.1.11 mrg /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2254 1.1.1.11 mrg depending on the device num and the variable hierarchy
2255 1.1.1.11 mrg (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2256 1.1.1.11 mrg device and thus no item with that device number is contained in
2257 1.1.1.11 mrg gomp_offload_icv_list, then a new item is created and added to the list. */
2258 1.1.1.11 mrg
2259 1.1.1.11 mrg static struct gomp_offload_icvs *
2260 1.1.1.11 mrg get_gomp_offload_icvs (int dev_num)
2261 1.1.1.11 mrg {
2262 1.1.1.11 mrg struct gomp_icv_list *dev
2263 1.1.1.11 mrg = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2264 1.1.1.11 mrg struct gomp_icv_list *all
2265 1.1.1.11 mrg = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2266 1.1.1.11 mrg struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2267 1.1.1.11 mrg struct gomp_offload_icv_list *offload_icvs
2268 1.1.1.11 mrg = gomp_get_offload_icv_item (dev_num);
2269 1.1.1.11 mrg
2270 1.1.1.11 mrg if (offload_icvs != NULL)
2271 1.1.1.11 mrg return &offload_icvs->icvs;
2272 1.1.1.11 mrg
2273 1.1.1.11 mrg struct gomp_offload_icv_list *new
2274 1.1.1.11 mrg = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2275 1.1.1.11 mrg
2276 1.1.1.11 mrg new->device_num = dev_num;
2277 1.1.1.11 mrg new->icvs.device_num = dev_num;
2278 1.1.1.11 mrg new->next = gomp_offload_icv_list;
2279 1.1.1.11 mrg
2280 1.1.1.11 mrg if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2281 1.1.1.11 mrg new->icvs.nteams = dev_x->icvs.nteams_var;
2282 1.1.1.11 mrg else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2283 1.1.1.11 mrg new->icvs.nteams = dev->icvs.nteams_var;
2284 1.1.1.11 mrg else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2285 1.1.1.11 mrg new->icvs.nteams = all->icvs.nteams_var;
2286 1.1.1.11 mrg else
2287 1.1.1.11 mrg new->icvs.nteams = gomp_default_icv_values.nteams_var;
2288 1.1.1.11 mrg
2289 1.1.1.11 mrg if (dev_x != NULL
2290 1.1.1.11 mrg && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2291 1.1.1.11 mrg new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2292 1.1.1.11 mrg else if (dev != NULL
2293 1.1.1.11 mrg && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2294 1.1.1.11 mrg new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2295 1.1.1.11 mrg else if (all != NULL
2296 1.1.1.11 mrg && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2297 1.1.1.11 mrg new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2298 1.1.1.11 mrg else
2299 1.1.1.11 mrg new->icvs.teams_thread_limit
2300 1.1.1.11 mrg = gomp_default_icv_values.teams_thread_limit_var;
2301 1.1.1.11 mrg
2302 1.1.1.11 mrg if (dev_x != NULL
2303 1.1.1.11 mrg && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2304 1.1.1.11 mrg new->icvs.default_device = dev_x->icvs.default_device_var;
2305 1.1.1.11 mrg else if (dev != NULL
2306 1.1.1.11 mrg && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2307 1.1.1.11 mrg new->icvs.default_device = dev->icvs.default_device_var;
2308 1.1.1.11 mrg else if (all != NULL
2309 1.1.1.11 mrg && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2310 1.1.1.11 mrg new->icvs.default_device = all->icvs.default_device_var;
2311 1.1.1.11 mrg else
2312 1.1.1.11 mrg new->icvs.default_device = gomp_default_icv_values.default_device_var;
2313 1.1.1.11 mrg
2314 1.1.1.11 mrg gomp_offload_icv_list = new;
2315 1.1.1.11 mrg return &new->icvs;
2316 1.1.1.11 mrg }
2317 1.1.1.11 mrg
2318 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2319 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
2320 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler
2321 1.1.1.2 mrg emitting variable and functions in the same order. */
2322 1.1 mrg
2323 1.1 mrg static void
2324 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2325 1.1.1.2 mrg const void *host_table, const void *target_data,
2326 1.1.1.2 mrg bool is_register_lock)
2327 1.1 mrg {
2328 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
2329 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
2330 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
2331 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
2332 1.1.1.11 mrg void **host_ind_func_table = NULL;
2333 1.1.1.11 mrg void **host_ind_funcs_end = NULL;
2334 1.1 mrg
2335 1.1.1.11 mrg if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2336 1.1.1.11 mrg {
2337 1.1.1.11 mrg host_ind_func_table = ((void ***) host_table)[4];
2338 1.1.1.11 mrg host_ind_funcs_end = ((void ***) host_table)[5];
2339 1.1.1.11 mrg }
2340 1.1.1.11 mrg
2341 1.1.1.11 mrg /* The func and ind_func tables contain only addresses, the var table
2342 1.1.1.11 mrg contains addresses and corresponding sizes. */
2343 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
2344 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
2345 1.1.1.11 mrg int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2346 1.1.1.10 mrg
2347 1.1 mrg /* Load image to device and get target addresses for the image. */
2348 1.1 mrg struct addr_pair *target_table = NULL;
2349 1.1.1.11 mrg uint64_t *rev_target_fn_table = NULL;
2350 1.1.1.2 mrg int i, num_target_entries;
2351 1.1.1.2 mrg
2352 1.1.1.11 mrg /* With reverse offload, insert also target-host addresses. */
2353 1.1.1.11 mrg bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2354 1.1.1.11 mrg
2355 1.1.1.2 mrg num_target_entries
2356 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version,
2357 1.1.1.11 mrg target_data, &target_table,
2358 1.1.1.11 mrg rev_lookup ? &rev_target_fn_table : NULL,
2359 1.1.1.11 mrg num_ind_funcs
2360 1.1.1.11 mrg ? (uint64_t *) host_ind_func_table : NULL);
2361 1.1 mrg
2362 1.1.1.10 mrg if (num_target_entries != num_funcs + num_vars
2363 1.1.1.11 mrg /* "+1" due to the additional ICV struct. */
2364 1.1.1.11 mrg && num_target_entries != num_funcs + num_vars + 1)
2365 1.1 mrg {
2366 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2367 1.1 mrg if (is_register_lock)
2368 1.1 mrg gomp_mutex_unlock (®ister_lock);
2369 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables"
2370 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars,
2371 1.1.1.2 mrg num_target_entries);
2372 1.1 mrg }
2373 1.1 mrg
2374 1.1 mrg /* Insert host-target address mapping into splay tree. */
2375 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2376 1.1.1.11 mrg /* "+1" due to the additional ICV struct. */
2377 1.1.1.11 mrg tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2378 1.1.1.11 mrg * sizeof (*tgt->array));
2379 1.1.1.11 mrg if (rev_target_fn_table)
2380 1.1.1.11 mrg tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2381 1.1.1.11 mrg else
2382 1.1.1.11 mrg tgt->rev_array = NULL;
2383 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY;
2384 1.1 mrg tgt->tgt_start = 0;
2385 1.1 mrg tgt->tgt_end = 0;
2386 1.1 mrg tgt->to_free = NULL;
2387 1.1 mrg tgt->prev = NULL;
2388 1.1 mrg tgt->list_count = 0;
2389 1.1 mrg tgt->device_descr = devicep;
2390 1.1 mrg splay_tree_node array = tgt->array;
2391 1.1.1.11 mrg reverse_splay_tree_node rev_array = tgt->rev_array;
2392 1.1 mrg
2393 1.1 mrg for (i = 0; i < num_funcs; i++)
2394 1.1 mrg {
2395 1.1 mrg splay_tree_key k = &array->key;
2396 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
2397 1.1 mrg k->host_end = k->host_start + 1;
2398 1.1 mrg k->tgt = tgt;
2399 1.1 mrg k->tgt_offset = target_table[i].start;
2400 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
2401 1.1.1.7 mrg k->dynamic_refcount = 0;
2402 1.1.1.8 mrg k->aux = NULL;
2403 1.1 mrg array->left = NULL;
2404 1.1 mrg array->right = NULL;
2405 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
2406 1.1.1.11 mrg if (rev_target_fn_table)
2407 1.1.1.11 mrg {
2408 1.1.1.11 mrg reverse_splay_tree_key k2 = &rev_array->key;
2409 1.1.1.11 mrg k2->dev = rev_target_fn_table[i];
2410 1.1.1.11 mrg k2->k = k;
2411 1.1.1.11 mrg rev_array->left = NULL;
2412 1.1.1.11 mrg rev_array->right = NULL;
2413 1.1.1.11 mrg if (k2->dev != 0)
2414 1.1.1.11 mrg reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2415 1.1.1.11 mrg rev_array++;
2416 1.1.1.11 mrg }
2417 1.1 mrg array++;
2418 1.1 mrg }
2419 1.1 mrg
2420 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
2421 1.1.1.2 mrg "omp declare target link" variables. */
2422 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2423 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
2424 1.1.1.2 mrg
2425 1.1 mrg for (i = 0; i < num_vars; i++)
2426 1.1 mrg {
2427 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
2428 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start;
2429 1.1.1.8 mrg bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2430 1.1.1.2 mrg
2431 1.1.1.8 mrg if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2432 1.1 mrg {
2433 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2434 1.1 mrg if (is_register_lock)
2435 1.1 mrg gomp_mutex_unlock (®ister_lock);
2436 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)");
2437 1.1 mrg }
2438 1.1 mrg
2439 1.1 mrg splay_tree_key k = &array->key;
2440 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
2441 1.1.1.2 mrg k->host_end
2442 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2443 1.1 mrg k->tgt = tgt;
2444 1.1 mrg k->tgt_offset = target_var->start;
2445 1.1.1.8 mrg k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2446 1.1.1.7 mrg k->dynamic_refcount = 0;
2447 1.1.1.8 mrg k->aux = NULL;
2448 1.1 mrg array->left = NULL;
2449 1.1 mrg array->right = NULL;
2450 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
2451 1.1 mrg array++;
2452 1.1 mrg }
2453 1.1 mrg
2454 1.1.1.11 mrg /* Last entry is for a ICVs variable.
2455 1.1.1.11 mrg Tolerate case where plugin does not return those entries. */
2456 1.1.1.10 mrg if (num_funcs + num_vars < num_target_entries)
2457 1.1.1.10 mrg {
2458 1.1.1.11 mrg struct addr_pair *var = &target_table[num_funcs + num_vars];
2459 1.1.1.11 mrg
2460 1.1.1.11 mrg /* Start address will be non-zero for the ICVs variable if
2461 1.1.1.11 mrg the variable was found in this image. */
2462 1.1.1.11 mrg if (var->start != 0)
2463 1.1.1.10 mrg {
2464 1.1.1.10 mrg /* The index of the devicep within devices[] is regarded as its
2465 1.1.1.10 mrg 'device number', which is different from the per-device type
2466 1.1.1.10 mrg devicep->target_id. */
2467 1.1.1.11 mrg int dev_num = (int) (devicep - &devices[0]);
2468 1.1.1.11 mrg struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2469 1.1.1.11 mrg size_t var_size = var->end - var->start;
2470 1.1.1.11 mrg if (var_size != sizeof (struct gomp_offload_icvs))
2471 1.1.1.10 mrg {
2472 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
2473 1.1.1.10 mrg if (is_register_lock)
2474 1.1.1.10 mrg gomp_mutex_unlock (®ister_lock);
2475 1.1.1.11 mrg gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2476 1.1.1.10 mrg "format");
2477 1.1.1.10 mrg }
2478 1.1.1.11 mrg /* Copy the ICVs variable to place on device memory, hereby
2479 1.1.1.11 mrg actually designating its device number into effect. */
2480 1.1.1.11 mrg gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2481 1.1.1.11 mrg var_size, false, NULL);
2482 1.1.1.11 mrg splay_tree_key k = &array->key;
2483 1.1.1.11 mrg k->host_start = (uintptr_t) icvs;
2484 1.1.1.11 mrg k->host_end =
2485 1.1.1.11 mrg k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2486 1.1.1.11 mrg k->tgt = tgt;
2487 1.1.1.11 mrg k->tgt_offset = var->start;
2488 1.1.1.11 mrg k->refcount = REFCOUNT_INFINITY;
2489 1.1.1.11 mrg k->dynamic_refcount = 0;
2490 1.1.1.11 mrg k->aux = NULL;
2491 1.1.1.11 mrg array->left = NULL;
2492 1.1.1.11 mrg array->right = NULL;
2493 1.1.1.11 mrg splay_tree_insert (&devicep->mem_map, array);
2494 1.1.1.11 mrg array++;
2495 1.1.1.10 mrg }
2496 1.1.1.10 mrg }
2497 1.1.1.10 mrg
2498 1.1 mrg free (target_table);
2499 1.1 mrg }
2500 1.1 mrg
2501 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P.
2502 1.1.1.2 mrg The device must be locked. */
2503 1.1.1.2 mrg
2504 1.1.1.2 mrg static void
2505 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2506 1.1.1.2 mrg unsigned version,
2507 1.1.1.2 mrg const void *host_table, const void *target_data)
2508 1.1.1.2 mrg {
2509 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0];
2510 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1];
2511 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2];
2512 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3];
2513 1.1.1.2 mrg
2514 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses
2515 1.1.1.2 mrg and corresponding sizes. */
2516 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table;
2517 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2;
2518 1.1.1.2 mrg
2519 1.1.1.2 mrg struct splay_tree_key_s k;
2520 1.1.1.2 mrg splay_tree_key node = NULL;
2521 1.1.1.2 mrg
2522 1.1.1.2 mrg /* Find mapping at start of node array */
2523 1.1.1.2 mrg if (num_funcs || num_vars)
2524 1.1.1.2 mrg {
2525 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2526 1.1.1.2 mrg : (uintptr_t) host_var_table[0]);
2527 1.1.1.2 mrg k.host_end = k.host_start + 1;
2528 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
2529 1.1.1.2 mrg }
2530 1.1.1.2 mrg
2531 1.1.1.3 mrg if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2532 1.1.1.3 mrg {
2533 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
2534 1.1.1.3 mrg gomp_fatal ("image unload fail");
2535 1.1.1.3 mrg }
2536 1.1.1.11 mrg if (devicep->mem_map_rev.root)
2537 1.1.1.11 mrg {
2538 1.1.1.11 mrg /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2539 1.1.1.11 mrg real allocation. */
2540 1.1.1.11 mrg assert (node && node->tgt && node->tgt->rev_array);
2541 1.1.1.11 mrg assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2542 1.1.1.11 mrg free (node->tgt->rev_array);
2543 1.1.1.11 mrg devicep->mem_map_rev.root = NULL;
2544 1.1.1.11 mrg }
2545 1.1.1.2 mrg
2546 1.1.1.2 mrg /* Remove mappings from splay tree. */
2547 1.1.1.2 mrg int i;
2548 1.1.1.2 mrg for (i = 0; i < num_funcs; i++)
2549 1.1.1.2 mrg {
2550 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i];
2551 1.1.1.2 mrg k.host_end = k.host_start + 1;
2552 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
2553 1.1.1.2 mrg }
2554 1.1.1.2 mrg
2555 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
2556 1.1.1.2 mrg "omp declare target link" variables. */
2557 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2558 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
2559 1.1.1.2 mrg bool is_tgt_unmapped = false;
2560 1.1.1.2 mrg
2561 1.1.1.2 mrg for (i = 0; i < num_vars; i++)
2562 1.1.1.2 mrg {
2563 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2];
2564 1.1.1.2 mrg k.host_end
2565 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2566 1.1.1.2 mrg
2567 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2568 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
2569 1.1.1.2 mrg else
2570 1.1.1.2 mrg {
2571 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2572 1.1.1.7 mrg is_tgt_unmapped = gomp_remove_var (devicep, n);
2573 1.1.1.2 mrg }
2574 1.1.1.2 mrg }
2575 1.1.1.2 mrg
2576 1.1.1.2 mrg if (node && !is_tgt_unmapped)
2577 1.1.1.2 mrg {
2578 1.1.1.2 mrg free (node->tgt);
2579 1.1.1.2 mrg free (node);
2580 1.1.1.2 mrg }
2581 1.1.1.2 mrg }
2582 1.1.1.2 mrg
2583 1.1.1.11 mrg static void
2584 1.1.1.11 mrg gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2585 1.1.1.11 mrg {
2586 1.1.1.11 mrg char *end = buf + size, *p = buf;
2587 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2588 1.1.1.11 mrg p += snprintf (p, end - p, "unified_address");
2589 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2590 1.1.1.11 mrg p += snprintf (p, end - p, "%sunified_shared_memory",
2591 1.1.1.11 mrg (p == buf ? "" : ", "));
2592 1.1.1.11 mrg if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2593 1.1.1.11 mrg p += snprintf (p, end - p, "%sreverse_offload",
2594 1.1.1.11 mrg (p == buf ? "" : ", "));
2595 1.1.1.11 mrg }
2596 1.1.1.11 mrg
2597 1.1 mrg /* This function should be called from every offload image while loading.
2598 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2599 1.1.1.11 mrg the target, and DATA. */
2600 1.1 mrg
2601 1.1 mrg void
2602 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
2603 1.1.1.11 mrg int target_type, const void *data)
2604 1.1 mrg {
2605 1.1 mrg int i;
2606 1.1.1.2 mrg
2607 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2608 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
2609 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
2610 1.1.1.11 mrg
2611 1.1.1.11 mrg int omp_req;
2612 1.1.1.11 mrg const void *target_data;
2613 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > 1)
2614 1.1.1.11 mrg {
2615 1.1.1.11 mrg omp_req = (int) (size_t) ((void **) data)[0];
2616 1.1.1.11 mrg target_data = &((void **) data)[1];
2617 1.1.1.11 mrg }
2618 1.1.1.11 mrg else
2619 1.1.1.11 mrg {
2620 1.1.1.11 mrg omp_req = 0;
2621 1.1.1.11 mrg target_data = data;
2622 1.1.1.11 mrg }
2623 1.1.1.11 mrg
2624 1.1 mrg gomp_mutex_lock (®ister_lock);
2625 1.1 mrg
2626 1.1.1.11 mrg if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2627 1.1.1.11 mrg {
2628 1.1.1.11 mrg char buf1[sizeof ("unified_address, unified_shared_memory, "
2629 1.1.1.11 mrg "reverse_offload")];
2630 1.1.1.11 mrg char buf2[sizeof ("unified_address, unified_shared_memory, "
2631 1.1.1.11 mrg "reverse_offload")];
2632 1.1.1.11 mrg gomp_requires_to_name (buf2, sizeof (buf2),
2633 1.1.1.11 mrg omp_req != GOMP_REQUIRES_TARGET_USED
2634 1.1.1.11 mrg ? omp_req : omp_requires_mask);
2635 1.1.1.11 mrg if (omp_req != GOMP_REQUIRES_TARGET_USED
2636 1.1.1.11 mrg && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2637 1.1.1.11 mrg {
2638 1.1.1.11 mrg gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2639 1.1.1.11 mrg gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2640 1.1.1.11 mrg "in multiple compilation units: '%s' vs. '%s'",
2641 1.1.1.11 mrg buf1, buf2);
2642 1.1.1.11 mrg }
2643 1.1.1.11 mrg else
2644 1.1.1.11 mrg gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2645 1.1.1.11 mrg "some compilation units", buf2);
2646 1.1.1.11 mrg }
2647 1.1.1.11 mrg omp_requires_mask = omp_req;
2648 1.1.1.11 mrg
2649 1.1 mrg /* Load image to all initialized devices. */
2650 1.1 mrg for (i = 0; i < num_devices; i++)
2651 1.1 mrg {
2652 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
2653 1.1 mrg gomp_mutex_lock (&devicep->lock);
2654 1.1.1.2 mrg if (devicep->type == target_type
2655 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
2656 1.1.1.2 mrg gomp_load_image_to_device (devicep, version,
2657 1.1.1.2 mrg host_table, target_data, true);
2658 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2659 1.1 mrg }
2660 1.1 mrg
2661 1.1 mrg /* Insert image to array of pending images. */
2662 1.1 mrg offload_images
2663 1.1 mrg = gomp_realloc_unlock (offload_images,
2664 1.1 mrg (num_offload_images + 1)
2665 1.1 mrg * sizeof (struct offload_image_descr));
2666 1.1.1.2 mrg offload_images[num_offload_images].version = version;
2667 1.1 mrg offload_images[num_offload_images].type = target_type;
2668 1.1 mrg offload_images[num_offload_images].host_table = host_table;
2669 1.1 mrg offload_images[num_offload_images].target_data = target_data;
2670 1.1 mrg
2671 1.1 mrg num_offload_images++;
2672 1.1 mrg gomp_mutex_unlock (®ister_lock);
2673 1.1 mrg }
2674 1.1 mrg
2675 1.1.1.11 mrg /* Legacy entry point. */
2676 1.1.1.11 mrg
2677 1.1.1.2 mrg void
2678 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type,
2679 1.1.1.2 mrg const void *target_data)
2680 1.1.1.2 mrg {
2681 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data);
2682 1.1.1.2 mrg }
2683 1.1.1.2 mrg
2684 1.1 mrg /* This function should be called from every offload image while unloading.
2685 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2686 1.1.1.11 mrg the target, and DATA. */
2687 1.1 mrg
2688 1.1 mrg void
2689 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2690 1.1.1.11 mrg int target_type, const void *data)
2691 1.1 mrg {
2692 1.1 mrg int i;
2693 1.1 mrg
2694 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2695 1.1.1.11 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
2696 1.1.1.11 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
2697 1.1.1.11 mrg
2698 1.1.1.11 mrg const void *target_data;
2699 1.1.1.11 mrg if (GOMP_VERSION_LIB (version) > 1)
2700 1.1.1.11 mrg target_data = &((void **) data)[1];
2701 1.1.1.11 mrg else
2702 1.1.1.11 mrg target_data = data;
2703 1.1.1.11 mrg
2704 1.1 mrg gomp_mutex_lock (®ister_lock);
2705 1.1 mrg
2706 1.1 mrg /* Unload image from all initialized devices. */
2707 1.1 mrg for (i = 0; i < num_devices; i++)
2708 1.1 mrg {
2709 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
2710 1.1 mrg gomp_mutex_lock (&devicep->lock);
2711 1.1.1.2 mrg if (devicep->type == target_type
2712 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
2713 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version,
2714 1.1.1.2 mrg host_table, target_data);
2715 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2716 1.1 mrg }
2717 1.1 mrg
2718 1.1 mrg /* Remove image from array of pending images. */
2719 1.1 mrg for (i = 0; i < num_offload_images; i++)
2720 1.1 mrg if (offload_images[i].target_data == target_data)
2721 1.1 mrg {
2722 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
2723 1.1 mrg break;
2724 1.1 mrg }
2725 1.1 mrg
2726 1.1 mrg gomp_mutex_unlock (®ister_lock);
2727 1.1 mrg }
2728 1.1 mrg
2729 1.1.1.11 mrg /* Legacy entry point. */
2730 1.1.1.11 mrg
2731 1.1.1.2 mrg void
2732 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type,
2733 1.1.1.2 mrg const void *target_data)
2734 1.1.1.2 mrg {
2735 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2736 1.1.1.2 mrg }
2737 1.1.1.2 mrg
2738 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
2739 1.1 mrg must be locked on entry, and remains locked on return. */
2740 1.1 mrg
2741 1.1 mrg attribute_hidden void
2742 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
2743 1.1 mrg {
2744 1.1 mrg int i;
2745 1.1.1.3 mrg if (!devicep->init_device_func (devicep->target_id))
2746 1.1.1.3 mrg {
2747 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
2748 1.1.1.3 mrg gomp_fatal ("device initialization failed");
2749 1.1.1.3 mrg }
2750 1.1 mrg
2751 1.1 mrg /* Load to device all images registered by the moment. */
2752 1.1 mrg for (i = 0; i < num_offload_images; i++)
2753 1.1 mrg {
2754 1.1 mrg struct offload_image_descr *image = &offload_images[i];
2755 1.1 mrg if (image->type == devicep->type)
2756 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version,
2757 1.1.1.2 mrg image->host_table, image->target_data,
2758 1.1.1.2 mrg false);
2759 1.1 mrg }
2760 1.1 mrg
2761 1.1.1.8 mrg /* Initialize OpenACC asynchronous queues. */
2762 1.1.1.8 mrg goacc_init_asyncqueues (devicep);
2763 1.1.1.8 mrg
2764 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED;
2765 1.1.1.2 mrg }
2766 1.1.1.2 mrg
2767 1.1.1.8 mrg /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2768 1.1.1.8 mrg must be locked on entry, and remains locked on return. */
2769 1.1.1.8 mrg
2770 1.1.1.8 mrg attribute_hidden bool
2771 1.1.1.8 mrg gomp_fini_device (struct gomp_device_descr *devicep)
2772 1.1.1.8 mrg {
2773 1.1.1.8 mrg bool ret = goacc_fini_asyncqueues (devicep);
2774 1.1.1.8 mrg ret &= devicep->fini_device_func (devicep->target_id);
2775 1.1.1.8 mrg devicep->state = GOMP_DEVICE_FINALIZED;
2776 1.1.1.8 mrg return ret;
2777 1.1.1.8 mrg }
2778 1.1.1.8 mrg
2779 1.1.1.2 mrg attribute_hidden void
2780 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep)
2781 1.1.1.2 mrg {
2782 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
2783 1.1.1.2 mrg {
2784 1.1.1.2 mrg unsigned i;
2785 1.1.1.11 mrg
2786 1.1.1.2 mrg /* Unload from device all images registered at the moment. */
2787 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++)
2788 1.1.1.2 mrg {
2789 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i];
2790 1.1.1.2 mrg if (image->type == devicep->type)
2791 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version,
2792 1.1.1.2 mrg image->host_table,
2793 1.1.1.2 mrg image->target_data);
2794 1.1.1.2 mrg }
2795 1.1.1.2 mrg }
2796 1.1 mrg }
2797 1.1 mrg
2798 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */
2799 1.1 mrg
2800 1.1.1.2 mrg static void
2801 1.1.1.10 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2802 1.1.1.10 mrg struct gomp_device_descr *devicep, void **args)
2803 1.1 mrg {
2804 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
2805 1.1.1.10 mrg
2806 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2807 1.1.1.10 mrg && devicep != NULL)
2808 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2809 1.1.1.10 mrg "be used for offloading");
2810 1.1.1.10 mrg
2811 1.1.1.2 mrg old_thr = *thr;
2812 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr));
2813 1.1.1.2 mrg if (gomp_places_list)
2814 1.1.1.2 mrg {
2815 1.1.1.2 mrg thr->place = old_thr.place;
2816 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len;
2817 1.1.1.2 mrg }
2818 1.1.1.10 mrg if (args)
2819 1.1.1.10 mrg while (*args)
2820 1.1.1.10 mrg {
2821 1.1.1.10 mrg intptr_t id = (intptr_t) *args++, val;
2822 1.1.1.10 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2823 1.1.1.10 mrg val = (intptr_t) *args++;
2824 1.1.1.10 mrg else
2825 1.1.1.10 mrg val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2826 1.1.1.10 mrg if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2827 1.1.1.10 mrg continue;
2828 1.1.1.10 mrg id &= GOMP_TARGET_ARG_ID_MASK;
2829 1.1.1.10 mrg if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2830 1.1.1.10 mrg continue;
2831 1.1.1.10 mrg val = val > INT_MAX ? INT_MAX : val;
2832 1.1.1.10 mrg if (val)
2833 1.1.1.10 mrg gomp_icv (true)->thread_limit_var = val;
2834 1.1.1.10 mrg break;
2835 1.1.1.10 mrg }
2836 1.1.1.10 mrg
2837 1.1.1.2 mrg fn (hostaddrs);
2838 1.1.1.2 mrg gomp_free_thread (thr);
2839 1.1.1.2 mrg *thr = old_thr;
2840 1.1 mrg }
2841 1.1 mrg
2842 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared
2843 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2844 1.1.1.2 mrg
2845 1.1.1.2 mrg static inline void
2846 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2847 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align,
2848 1.1.1.2 mrg size_t *tgt_size)
2849 1.1.1.2 mrg {
2850 1.1.1.2 mrg size_t i;
2851 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2852 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2853 1.1.1.2 mrg {
2854 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
2855 1.1.1.2 mrg if (*tgt_align < align)
2856 1.1.1.2 mrg *tgt_align = align;
2857 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2858 1.1.1.2 mrg *tgt_size += sizes[i];
2859 1.1.1.2 mrg }
2860 1.1.1.2 mrg }
2861 1.1.1.2 mrg
2862 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2863 1.1.1.2 mrg
2864 1.1.1.2 mrg static inline void
2865 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2866 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align,
2867 1.1.1.2 mrg size_t tgt_size)
2868 1.1.1.2 mrg {
2869 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2870 1.1.1.2 mrg if (al)
2871 1.1.1.2 mrg tgt += tgt_align - al;
2872 1.1.1.2 mrg tgt_size = 0;
2873 1.1.1.2 mrg size_t i;
2874 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2875 1.1.1.10 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2876 1.1.1.2 mrg {
2877 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
2878 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
2879 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2880 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size;
2881 1.1.1.2 mrg tgt_size = tgt_size + sizes[i];
2882 1.1.1.11 mrg if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2883 1.1.1.11 mrg {
2884 1.1.1.11 mrg *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2885 1.1.1.11 mrg ++i;
2886 1.1.1.11 mrg }
2887 1.1.1.2 mrg }
2888 1.1.1.2 mrg }
2889 1.1.1.2 mrg
2890 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */
2891 1.1.1.2 mrg
2892 1.1.1.2 mrg static void *
2893 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2894 1.1.1.2 mrg void (*host_fn) (void *))
2895 1.1.1.2 mrg {
2896 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2897 1.1.1.2 mrg return (void *) host_fn;
2898 1.1.1.2 mrg else
2899 1.1.1.2 mrg {
2900 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2901 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2902 1.1.1.2 mrg {
2903 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2904 1.1.1.2 mrg return NULL;
2905 1.1.1.2 mrg }
2906 1.1.1.2 mrg
2907 1.1.1.2 mrg struct splay_tree_key_s k;
2908 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn;
2909 1.1.1.2 mrg k.host_end = k.host_start + 1;
2910 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2911 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2912 1.1.1.2 mrg if (tgt_fn == NULL)
2913 1.1.1.2 mrg return NULL;
2914 1.1.1.2 mrg
2915 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset;
2916 1.1.1.2 mrg }
2917 1.1.1.2 mrg }
2918 1.1.1.2 mrg
2919 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE
2920 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2921 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
2922 1.1 mrg larger than last available hw device), use host fallback.
2923 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
2924 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2925 1.1 mrg with MAPNUM entries, with addresses of the host objects,
2926 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
2927 1.1 mrg and assumed sizeof (void *) size) and kinds. */
2928 1.1 mrg
2929 1.1 mrg void
2930 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
2931 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
2932 1.1 mrg unsigned char *kinds)
2933 1.1 mrg {
2934 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
2935 1.1 mrg
2936 1.1.1.2 mrg void *fn_addr;
2937 1.1 mrg if (devicep == NULL
2938 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2939 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */
2940 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2941 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2942 1.1.1.10 mrg return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2943 1.1.1.2 mrg
2944 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum);
2945 1.1.1.2 mrg struct target_mem_desc *tgt_vars
2946 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2947 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_TARGET);
2948 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2949 1.1.1.2 mrg NULL);
2950 1.1.1.10 mrg htab_clear (refcount_set);
2951 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set);
2952 1.1.1.10 mrg htab_free (refcount_set);
2953 1.1.1.2 mrg }
2954 1.1.1.2 mrg
2955 1.1.1.8 mrg static inline unsigned int
2956 1.1.1.8 mrg clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2957 1.1.1.8 mrg {
2958 1.1.1.8 mrg /* If we cannot run asynchronously, simply ignore nowait. */
2959 1.1.1.8 mrg if (devicep != NULL && devicep->async_run_func == NULL)
2960 1.1.1.8 mrg flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2961 1.1.1.8 mrg
2962 1.1.1.8 mrg return flags;
2963 1.1.1.8 mrg }
2964 1.1.1.8 mrg
2965 1.1.1.11 mrg static void
2966 1.1.1.11 mrg gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2967 1.1.1.11 mrg {
2968 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2969 1.1.1.11 mrg if (item == NULL)
2970 1.1.1.11 mrg return;
2971 1.1.1.11 mrg
2972 1.1.1.11 mrg void *host_ptr = &item->icvs;
2973 1.1.1.11 mrg void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2974 1.1.1.11 mrg if (dev_ptr != NULL)
2975 1.1.1.11 mrg gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2976 1.1.1.11 mrg sizeof (struct gomp_offload_icvs));
2977 1.1.1.11 mrg }
2978 1.1.1.11 mrg
2979 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2980 1.1.1.2 mrg and several arguments have been added:
2981 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2982 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details.
2983 1.1.1.2 mrg
2984 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both
2985 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two
2986 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type
2987 1.1.1.2 mrg and optionally also the value. If the value is not present in the first
2988 1.1.1.2 mrg one, the whole second element the actual value. The last element of the
2989 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example
2990 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT.
2991 1.1.1.2 mrg
2992 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with
2993 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if
2994 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is
2995 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host
2996 1.1.1.2 mrg what value will GOMP_teams have on the device.
2997 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2998 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit
2999 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be
3000 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */
3001 1.1.1.2 mrg
3002 1.1.1.2 mrg void
3003 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3004 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
3005 1.1.1.2 mrg unsigned int flags, void **depend, void **args)
3006 1.1.1.2 mrg {
3007 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
3008 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0;
3009 1.1.1.2 mrg bool fpc_done = false;
3010 1.1.1.2 mrg
3011 1.1.1.11 mrg /* Obtain the original TEAMS and THREADS values from ARGS. */
3012 1.1.1.11 mrg intptr_t orig_teams = 1, orig_threads = 0;
3013 1.1.1.11 mrg size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3014 1.1.1.11 mrg void **tmpargs = args;
3015 1.1.1.11 mrg while (*tmpargs)
3016 1.1.1.11 mrg {
3017 1.1.1.11 mrg intptr_t id = (intptr_t) *tmpargs++, val;
3018 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3019 1.1.1.11 mrg {
3020 1.1.1.11 mrg val = (intptr_t) *tmpargs++;
3021 1.1.1.11 mrg len = 2;
3022 1.1.1.11 mrg }
3023 1.1.1.11 mrg else
3024 1.1.1.11 mrg {
3025 1.1.1.11 mrg val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3026 1.1.1.11 mrg len = 1;
3027 1.1.1.11 mrg }
3028 1.1.1.11 mrg num_args += len;
3029 1.1.1.11 mrg if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3030 1.1.1.11 mrg continue;
3031 1.1.1.11 mrg val = val > INT_MAX ? INT_MAX : val;
3032 1.1.1.11 mrg if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3033 1.1.1.11 mrg {
3034 1.1.1.11 mrg orig_teams = val;
3035 1.1.1.11 mrg teams_len = len;
3036 1.1.1.11 mrg }
3037 1.1.1.11 mrg else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3038 1.1.1.11 mrg {
3039 1.1.1.11 mrg orig_threads = val;
3040 1.1.1.11 mrg threads_len = len;
3041 1.1.1.11 mrg }
3042 1.1.1.11 mrg }
3043 1.1.1.11 mrg
3044 1.1.1.11 mrg intptr_t new_teams = orig_teams, new_threads = orig_threads;
3045 1.1.1.11 mrg /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3046 1.1.1.11 mrg ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3047 1.1.1.11 mrg value could not be determined. No change.
3048 1.1.1.11 mrg ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3049 1.1.1.11 mrg Set device-specific value.
3050 1.1.1.11 mrg ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3051 1.1.1.11 mrg No change. */
3052 1.1.1.11 mrg if (orig_teams == -2)
3053 1.1.1.11 mrg new_teams = 1;
3054 1.1.1.11 mrg else if (orig_teams == 0)
3055 1.1.1.11 mrg {
3056 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3057 1.1.1.11 mrg if (item != NULL)
3058 1.1.1.11 mrg new_teams = item->icvs.nteams;
3059 1.1.1.11 mrg }
3060 1.1.1.11 mrg /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3061 1.1.1.11 mrg region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3062 1.1.1.11 mrg e.g. a THREAD_LIMIT clause. */
3063 1.1.1.11 mrg if (orig_teams > -2 && orig_threads == 0)
3064 1.1.1.11 mrg {
3065 1.1.1.11 mrg struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3066 1.1.1.11 mrg if (item != NULL)
3067 1.1.1.11 mrg new_threads = item->icvs.teams_thread_limit;
3068 1.1.1.11 mrg }
3069 1.1.1.11 mrg
3070 1.1.1.11 mrg /* Copy and change the arguments list only if TEAMS or THREADS need to be
3071 1.1.1.11 mrg updated. */
3072 1.1.1.11 mrg void **new_args = args;
3073 1.1.1.11 mrg if (orig_teams != new_teams || orig_threads != new_threads)
3074 1.1.1.11 mrg {
3075 1.1.1.11 mrg size_t tms_len = (orig_teams == new_teams
3076 1.1.1.11 mrg ? teams_len
3077 1.1.1.11 mrg : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3078 1.1.1.11 mrg ? 1 : 2));
3079 1.1.1.11 mrg size_t ths_len = (orig_threads == new_threads
3080 1.1.1.11 mrg ? threads_len
3081 1.1.1.11 mrg : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3082 1.1.1.11 mrg ? 1 : 2));
3083 1.1.1.11 mrg /* One additional item after the last arg must be NULL. */
3084 1.1.1.11 mrg size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3085 1.1.1.11 mrg + ths_len + 1;
3086 1.1.1.11 mrg new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3087 1.1.1.11 mrg
3088 1.1.1.11 mrg tmpargs = args;
3089 1.1.1.11 mrg void **tmp_new_args = new_args;
3090 1.1.1.11 mrg /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3091 1.1.1.11 mrg too if they have not been changed and skipped otherwise. */
3092 1.1.1.11 mrg while (*tmpargs)
3093 1.1.1.11 mrg {
3094 1.1.1.11 mrg intptr_t id = (intptr_t) *tmpargs;
3095 1.1.1.11 mrg if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3096 1.1.1.11 mrg && orig_teams != new_teams)
3097 1.1.1.11 mrg || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3098 1.1.1.11 mrg && orig_threads != new_threads))
3099 1.1.1.11 mrg {
3100 1.1.1.11 mrg tmpargs++;
3101 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3102 1.1.1.11 mrg tmpargs++;
3103 1.1.1.11 mrg }
3104 1.1.1.11 mrg else
3105 1.1.1.11 mrg {
3106 1.1.1.11 mrg *tmp_new_args++ = *tmpargs++;
3107 1.1.1.11 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3108 1.1.1.11 mrg *tmp_new_args++ = *tmpargs++;
3109 1.1.1.11 mrg }
3110 1.1.1.11 mrg }
3111 1.1.1.11 mrg
3112 1.1.1.11 mrg /* Add the new TEAMS arg to the new args list if it has been changed. */
3113 1.1.1.11 mrg if (orig_teams != new_teams)
3114 1.1.1.11 mrg {
3115 1.1.1.11 mrg intptr_t new_val = new_teams;
3116 1.1.1.11 mrg if (tms_len == 1)
3117 1.1.1.11 mrg {
3118 1.1.1.11 mrg new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3119 1.1.1.11 mrg | GOMP_TARGET_ARG_NUM_TEAMS;
3120 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val;
3121 1.1.1.11 mrg }
3122 1.1.1.11 mrg else
3123 1.1.1.11 mrg {
3124 1.1.1.11 mrg *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3125 1.1.1.11 mrg | GOMP_TARGET_ARG_NUM_TEAMS);
3126 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val;
3127 1.1.1.11 mrg }
3128 1.1.1.11 mrg }
3129 1.1.1.11 mrg
3130 1.1.1.11 mrg /* Add the new THREADS arg to the new args list if it has been changed. */
3131 1.1.1.11 mrg if (orig_threads != new_threads)
3132 1.1.1.11 mrg {
3133 1.1.1.11 mrg intptr_t new_val = new_threads;
3134 1.1.1.11 mrg if (ths_len == 1)
3135 1.1.1.11 mrg {
3136 1.1.1.11 mrg new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3137 1.1.1.11 mrg | GOMP_TARGET_ARG_THREAD_LIMIT;
3138 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val;
3139 1.1.1.11 mrg }
3140 1.1.1.11 mrg else
3141 1.1.1.11 mrg {
3142 1.1.1.11 mrg *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3143 1.1.1.11 mrg | GOMP_TARGET_ARG_THREAD_LIMIT);
3144 1.1.1.11 mrg *tmp_new_args++ = (void *) new_val;
3145 1.1.1.11 mrg }
3146 1.1.1.11 mrg }
3147 1.1.1.11 mrg
3148 1.1.1.11 mrg *tmp_new_args = NULL;
3149 1.1.1.11 mrg }
3150 1.1.1.11 mrg
3151 1.1.1.8 mrg flags = clear_unsupported_flags (devicep, flags);
3152 1.1.1.8 mrg
3153 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT)
3154 1.1 mrg {
3155 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
3156 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait
3157 1.1.1.2 mrg target tasks make sense to run asynchronously even when
3158 1.1.1.2 mrg outside of any parallel. */
3159 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0))
3160 1.1.1.2 mrg {
3161 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1);
3162 1.1.1.2 mrg struct gomp_task *task = thr->task;
3163 1.1.1.9 mrg struct gomp_task **implicit_task = &task;
3164 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3165 1.1.1.2 mrg team->prev_ts = thr->ts;
3166 1.1.1.2 mrg thr->ts.team = team;
3167 1.1.1.2 mrg thr->ts.team_id = 0;
3168 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
3169 1.1.1.2 mrg thr->ts.last_work_share = NULL;
3170 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS
3171 1.1.1.2 mrg thr->ts.single_count = 0;
3172 1.1.1.2 mrg #endif
3173 1.1.1.2 mrg thr->ts.static_trip = 0;
3174 1.1.1.2 mrg thr->task = &team->implicit_task[0];
3175 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv);
3176 1.1.1.9 mrg while (*implicit_task
3177 1.1.1.9 mrg && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3178 1.1.1.9 mrg implicit_task = &(*implicit_task)->parent;
3179 1.1.1.9 mrg if (*implicit_task)
3180 1.1.1.2 mrg {
3181 1.1.1.9 mrg thr->task = *implicit_task;
3182 1.1.1.2 mrg gomp_end_task ();
3183 1.1.1.9 mrg free (*implicit_task);
3184 1.1.1.2 mrg thr->task = &team->implicit_task[0];
3185 1.1.1.2 mrg }
3186 1.1.1.2 mrg else
3187 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr);
3188 1.1.1.9 mrg if (implicit_task != &task)
3189 1.1.1.9 mrg {
3190 1.1.1.9 mrg *implicit_task = thr->task;
3191 1.1.1.9 mrg thr->task = task;
3192 1.1.1.9 mrg }
3193 1.1.1.2 mrg }
3194 1.1.1.2 mrg if (thr->ts.team
3195 1.1.1.2 mrg && !thr->task->final_task)
3196 1.1.1.2 mrg {
3197 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3198 1.1.1.11 mrg sizes, kinds, flags, depend, new_args,
3199 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP);
3200 1.1.1.2 mrg return;
3201 1.1.1.2 mrg }
3202 1.1 mrg }
3203 1.1 mrg
3204 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present
3205 1.1.1.2 mrg (or we are in a final task), block the parent task until the
3206 1.1.1.2 mrg dependencies are resolved and then just continue with the rest
3207 1.1.1.2 mrg of the function as if it is a merged task. */
3208 1.1.1.2 mrg if (depend != NULL)
3209 1.1.1.2 mrg {
3210 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
3211 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
3212 1.1.1.2 mrg {
3213 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */
3214 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
3215 1.1.1.2 mrg &tgt_align, &tgt_size);
3216 1.1.1.2 mrg if (tgt_align)
3217 1.1.1.2 mrg {
3218 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3219 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3220 1.1.1.2 mrg tgt_align, tgt_size);
3221 1.1.1.2 mrg }
3222 1.1.1.2 mrg fpc_done = true;
3223 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
3224 1.1.1.2 mrg }
3225 1.1.1.2 mrg }
3226 1.1 mrg
3227 1.1 mrg void *fn_addr;
3228 1.1.1.2 mrg if (devicep == NULL
3229 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3230 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3231 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3232 1.1 mrg {
3233 1.1.1.2 mrg if (!fpc_done)
3234 1.1 mrg {
3235 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
3236 1.1.1.2 mrg &tgt_align, &tgt_size);
3237 1.1.1.2 mrg if (tgt_align)
3238 1.1.1.2 mrg {
3239 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3240 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3241 1.1.1.2 mrg tgt_align, tgt_size);
3242 1.1.1.2 mrg }
3243 1.1 mrg }
3244 1.1.1.11 mrg gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3245 1.1.1.2 mrg return;
3246 1.1.1.2 mrg }
3247 1.1 mrg
3248 1.1.1.2 mrg struct target_mem_desc *tgt_vars;
3249 1.1.1.10 mrg htab_t refcount_set = NULL;
3250 1.1.1.10 mrg
3251 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3252 1.1.1.2 mrg {
3253 1.1.1.2 mrg if (!fpc_done)
3254 1.1.1.2 mrg {
3255 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
3256 1.1.1.2 mrg &tgt_align, &tgt_size);
3257 1.1.1.2 mrg if (tgt_align)
3258 1.1.1.2 mrg {
3259 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3260 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3261 1.1.1.2 mrg tgt_align, tgt_size);
3262 1.1.1.2 mrg }
3263 1.1.1.2 mrg }
3264 1.1.1.2 mrg tgt_vars = NULL;
3265 1.1 mrg }
3266 1.1.1.2 mrg else
3267 1.1.1.10 mrg {
3268 1.1.1.10 mrg refcount_set = htab_create (mapnum);
3269 1.1.1.10 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3270 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_TARGET);
3271 1.1.1.10 mrg }
3272 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr,
3273 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3274 1.1.1.11 mrg new_args);
3275 1.1.1.2 mrg if (tgt_vars)
3276 1.1.1.10 mrg {
3277 1.1.1.10 mrg htab_clear (refcount_set);
3278 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set);
3279 1.1.1.10 mrg }
3280 1.1.1.10 mrg if (refcount_set)
3281 1.1.1.10 mrg htab_free (refcount_set);
3282 1.1.1.11 mrg
3283 1.1.1.11 mrg /* Copy back ICVs from device to host.
3284 1.1.1.11 mrg HOST_PTR is expected to exist since it was added in
3285 1.1.1.11 mrg gomp_load_image_to_device if not already available. */
3286 1.1.1.11 mrg gomp_copy_back_icvs (devicep, device);
3287 1.1.1.11 mrg
3288 1.1.1.2 mrg }
3289 1.1 mrg
3290 1.1.1.2 mrg
3291 1.1.1.11 mrg /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3292 1.1.1.11 mrg keeping track of all variable handling - assuming that reverse offload occurs
3293 1.1.1.11 mrg ony very rarely. Downside is that the reverse search is slow. */
3294 1.1.1.11 mrg
3295 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data {
3296 1.1.1.11 mrg uintptr_t tgt_start;
3297 1.1.1.11 mrg uintptr_t tgt_end;
3298 1.1.1.11 mrg splay_tree_key key;
3299 1.1.1.11 mrg };
3300 1.1.1.11 mrg
3301 1.1.1.11 mrg static int
3302 1.1.1.11 mrg gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3303 1.1.1.2 mrg {
3304 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data *data;
3305 1.1.1.11 mrg data = (struct gomp_splay_tree_rev_lookup_data *)d;
3306 1.1.1.11 mrg uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3307 1.1.1.10 mrg
3308 1.1.1.11 mrg if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3309 1.1.1.11 mrg return 0;
3310 1.1.1.10 mrg
3311 1.1.1.11 mrg size_t j;
3312 1.1.1.11 mrg for (j = 0; j < key->tgt->list_count; j++)
3313 1.1.1.11 mrg if (key->tgt->list[j].key == key)
3314 1.1.1.11 mrg break;
3315 1.1.1.11 mrg assert (j < key->tgt->list_count);
3316 1.1.1.11 mrg uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3317 1.1.1.11 mrg
3318 1.1.1.11 mrg if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3319 1.1.1.11 mrg || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3320 1.1 mrg {
3321 1.1.1.11 mrg data->key = key;
3322 1.1.1.11 mrg return 1;
3323 1.1 mrg }
3324 1.1.1.11 mrg return 0;
3325 1.1 mrg }
3326 1.1 mrg
3327 1.1.1.11 mrg static inline splay_tree_key
3328 1.1.1.11 mrg gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3329 1.1.1.11 mrg bool zero_len)
3330 1.1 mrg {
3331 1.1.1.11 mrg struct gomp_splay_tree_rev_lookup_data data;
3332 1.1.1.11 mrg data.key = NULL;
3333 1.1.1.11 mrg data.tgt_start = tgt_start;
3334 1.1.1.11 mrg data.tgt_end = tgt_end;
3335 1.1 mrg
3336 1.1.1.11 mrg if (tgt_start != tgt_end)
3337 1.1.1.11 mrg {
3338 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3339 1.1.1.11 mrg return data.key;
3340 1.1.1.11 mrg }
3341 1.1 mrg
3342 1.1.1.11 mrg data.tgt_end++;
3343 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3344 1.1.1.11 mrg if (data.key != NULL || zero_len)
3345 1.1.1.11 mrg return data.key;
3346 1.1.1.11 mrg data.tgt_end--;
3347 1.1.1.11 mrg
3348 1.1.1.11 mrg data.tgt_start--;
3349 1.1.1.11 mrg splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3350 1.1.1.11 mrg return data.key;
3351 1.1.1.2 mrg }
3352 1.1.1.2 mrg
3353 1.1.1.11 mrg struct cpy_data
3354 1.1.1.2 mrg {
3355 1.1.1.11 mrg uint64_t devaddr;
3356 1.1.1.11 mrg bool present, aligned;
3357 1.1.1.11 mrg };
3358 1.1.1.2 mrg
3359 1.1.1.2 mrg
3360 1.1.1.11 mrg /* Search just mapped reverse-offload data; returns index if found,
3361 1.1.1.11 mrg otherwise >= n. */
3362 1.1.1.11 mrg
3363 1.1.1.11 mrg static inline int
3364 1.1.1.11 mrg gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3365 1.1.1.11 mrg unsigned short *kinds, uint64_t *sizes, size_t n,
3366 1.1.1.11 mrg uint64_t tgt_start, uint64_t tgt_end)
3367 1.1.1.11 mrg {
3368 1.1.1.11 mrg const bool short_mapkind = true;
3369 1.1.1.11 mrg const int typemask = short_mapkind ? 0xff : 0x7;
3370 1.1.1.11 mrg size_t i;
3371 1.1.1.11 mrg for (i = 0; i < n; i++)
3372 1.1.1.11 mrg {
3373 1.1.1.11 mrg bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3374 1.1.1.11 mrg == GOMP_MAP_STRUCT);
3375 1.1.1.11 mrg uint64_t dev_end;
3376 1.1.1.11 mrg if (!is_struct)
3377 1.1.1.11 mrg dev_end = d[i].devaddr + sizes[i];
3378 1.1.1.11 mrg else
3379 1.1.1.11 mrg {
3380 1.1.1.11 mrg if (i + sizes[i] < n)
3381 1.1.1.11 mrg dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3382 1.1.1.11 mrg else
3383 1.1.1.11 mrg dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3384 1.1.1.11 mrg }
3385 1.1.1.11 mrg if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3386 1.1.1.11 mrg || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3387 1.1.1.11 mrg break;
3388 1.1.1.11 mrg if (is_struct)
3389 1.1.1.11 mrg i += sizes[i];
3390 1.1.1.11 mrg }
3391 1.1.1.11 mrg return i;
3392 1.1.1.11 mrg }
3393 1.1.1.11 mrg
3394 1.1.1.11 mrg static inline int
3395 1.1.1.11 mrg gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3396 1.1.1.11 mrg unsigned short *kinds, uint64_t *sizes,
3397 1.1.1.11 mrg size_t n, uint64_t tgt_start, uint64_t tgt_end,
3398 1.1.1.11 mrg bool zero_len)
3399 1.1.1.11 mrg {
3400 1.1.1.11 mrg size_t i;
3401 1.1.1.11 mrg if (tgt_start != tgt_end)
3402 1.1.1.11 mrg return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3403 1.1.1.11 mrg tgt_start, tgt_end);
3404 1.1.1.11 mrg tgt_end++;
3405 1.1.1.11 mrg i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3406 1.1.1.11 mrg tgt_start, tgt_end);
3407 1.1.1.11 mrg if (i < n || zero_len)
3408 1.1.1.11 mrg return i;
3409 1.1.1.11 mrg tgt_end--;
3410 1.1.1.11 mrg
3411 1.1.1.11 mrg tgt_start--;
3412 1.1.1.11 mrg return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3413 1.1.1.11 mrg tgt_start, tgt_end);
3414 1.1.1.11 mrg }
3415 1.1.1.11 mrg
3416 1.1.1.11 mrg /* Handle reverse offload. This is called by the device plugins for a
3417 1.1.1.11 mrg reverse offload; it is not called if the outer target runs on the host.
3418 1.1.1.11 mrg The mapping is simplified device-affecting constructs (except for target
3419 1.1.1.11 mrg with device(ancestor:1)) must not be encountered; in particular not
3420 1.1.1.11 mrg target (enter/exit) data. */
3421 1.1.1.11 mrg
3422 1.1.1.11 mrg void
3423 1.1.1.11 mrg gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3424 1.1.1.11 mrg uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3425 1.1.1.11 mrg struct goacc_asyncqueue *aq)
3426 1.1.1.11 mrg {
3427 1.1.1.11 mrg /* Return early if there is no offload code. */
3428 1.1.1.11 mrg if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3429 1.1.1.11 mrg return;
3430 1.1.1.11 mrg /* Currently, this fails because of calculate_firstprivate_requirements
3431 1.1.1.11 mrg below; it could be fixed but additional code needs to be updated to
3432 1.1.1.11 mrg handle 32bit hosts - thus, it is not worthwhile. */
3433 1.1.1.11 mrg if (sizeof (void *) != sizeof (uint64_t))
3434 1.1.1.11 mrg gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3435 1.1.1.11 mrg
3436 1.1.1.11 mrg struct cpy_data *cdata = NULL;
3437 1.1.1.11 mrg uint64_t *devaddrs;
3438 1.1.1.11 mrg uint64_t *sizes;
3439 1.1.1.11 mrg unsigned short *kinds;
3440 1.1.1.11 mrg const bool short_mapkind = true;
3441 1.1.1.11 mrg const int typemask = short_mapkind ? 0xff : 0x7;
3442 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3443 1.1.1.11 mrg
3444 1.1.1.11 mrg reverse_splay_tree_key n;
3445 1.1.1.11 mrg struct reverse_splay_tree_key_s k;
3446 1.1.1.11 mrg k.dev = fn_ptr;
3447 1.1.1.11 mrg
3448 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock);
3449 1.1.1.11 mrg n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3450 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3451 1.1.1.11 mrg
3452 1.1.1.11 mrg if (n == NULL)
3453 1.1.1.11 mrg gomp_fatal ("Cannot find reverse-offload function");
3454 1.1.1.11 mrg void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start;
3455 1.1.1.11 mrg
3456 1.1.1.11 mrg if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3457 1.1.1.11 mrg {
3458 1.1.1.11 mrg devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3459 1.1.1.11 mrg sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3460 1.1.1.11 mrg kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3461 1.1.1.11 mrg }
3462 1.1.1.11 mrg else
3463 1.1.1.11 mrg {
3464 1.1.1.11 mrg devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3465 1.1.1.11 mrg sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3466 1.1.1.11 mrg kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3467 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, devaddrs,
3468 1.1.1.11 mrg (const void *) (uintptr_t) devaddrs_ptr,
3469 1.1.1.11 mrg mapnum * sizeof (uint64_t));
3470 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, sizes,
3471 1.1.1.11 mrg (const void *) (uintptr_t) sizes_ptr,
3472 1.1.1.11 mrg mapnum * sizeof (uint64_t));
3473 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, kinds,
3474 1.1.1.11 mrg (const void *) (uintptr_t) kinds_ptr,
3475 1.1.1.11 mrg mapnum * sizeof (unsigned short));
3476 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq))
3477 1.1.1.11 mrg exit (EXIT_FAILURE);
3478 1.1.1.11 mrg }
3479 1.1.1.11 mrg
3480 1.1.1.11 mrg size_t tgt_align = 0, tgt_size = 0;
3481 1.1.1.11 mrg
3482 1.1.1.11 mrg /* If actually executed on 32bit systems, the casts lead to wrong code;
3483 1.1.1.11 mrg but 32bit with offloading is not supported; see top of this function. */
3484 1.1.1.11 mrg calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3485 1.1.1.11 mrg (void *) (uintptr_t) kinds,
3486 1.1.1.11 mrg &tgt_align, &tgt_size);
3487 1.1.1.11 mrg
3488 1.1.1.11 mrg if (tgt_align)
3489 1.1.1.11 mrg {
3490 1.1.1.11 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3491 1.1.1.11 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3492 1.1.1.11 mrg if (al)
3493 1.1.1.11 mrg tgt += tgt_align - al;
3494 1.1.1.11 mrg tgt_size = 0;
3495 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++)
3496 1.1.1.11 mrg if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3497 1.1.1.11 mrg && devaddrs[i] != 0)
3498 1.1.1.11 mrg {
3499 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
3500 1.1.1.11 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
3501 1.1.1.11 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3502 1.1.1.11 mrg memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3503 1.1.1.11 mrg (size_t) sizes[i]);
3504 1.1.1.11 mrg else
3505 1.1.1.11 mrg {
3506 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3507 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3508 1.1.1.11 mrg (size_t) sizes[i]);
3509 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq))
3510 1.1.1.11 mrg exit (EXIT_FAILURE);
3511 1.1.1.11 mrg }
3512 1.1.1.11 mrg devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3513 1.1.1.11 mrg tgt_size = tgt_size + sizes[i];
3514 1.1.1.11 mrg if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3515 1.1.1.11 mrg && i + 1 < mapnum
3516 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i) & typemask)
3517 1.1.1.11 mrg == GOMP_MAP_ATTACH))
3518 1.1.1.11 mrg {
3519 1.1.1.11 mrg *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3520 1.1.1.11 mrg = (uint64_t) devaddrs[i];
3521 1.1.1.11 mrg ++i;
3522 1.1.1.11 mrg }
3523 1.1.1.11 mrg }
3524 1.1.1.11 mrg }
3525 1.1.1.11 mrg
3526 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3527 1.1.1.11 mrg {
3528 1.1.1.11 mrg size_t j, struct_cpy = 0;
3529 1.1.1.11 mrg splay_tree_key n2;
3530 1.1.1.11 mrg cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3531 1.1.1.11 mrg memset (cdata, '\0', sizeof (*cdata) * mapnum);
3532 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock);
3533 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++)
3534 1.1.1.11 mrg {
3535 1.1.1.11 mrg if (devaddrs[i] == 0)
3536 1.1.1.11 mrg continue;
3537 1.1.1.11 mrg n = NULL;
3538 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i) & typemask;
3539 1.1.1.11 mrg switch (kind)
3540 1.1.1.11 mrg {
3541 1.1.1.11 mrg case GOMP_MAP_FIRSTPRIVATE:
3542 1.1.1.11 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
3543 1.1.1.11 mrg continue;
3544 1.1.1.11 mrg
3545 1.1.1.11 mrg case GOMP_MAP_DELETE:
3546 1.1.1.11 mrg case GOMP_MAP_RELEASE:
3547 1.1.1.11 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3548 1.1.1.11 mrg /* Assume it is present; look it up - but ignore unless the
3549 1.1.1.11 mrg present clause is there. */
3550 1.1.1.11 mrg case GOMP_MAP_ALLOC:
3551 1.1.1.11 mrg case GOMP_MAP_FROM:
3552 1.1.1.11 mrg case GOMP_MAP_FORCE_ALLOC:
3553 1.1.1.11 mrg case GOMP_MAP_FORCE_FROM:
3554 1.1.1.11 mrg case GOMP_MAP_ALWAYS_FROM:
3555 1.1.1.11 mrg case GOMP_MAP_TO:
3556 1.1.1.11 mrg case GOMP_MAP_TOFROM:
3557 1.1.1.11 mrg case GOMP_MAP_FORCE_TO:
3558 1.1.1.11 mrg case GOMP_MAP_FORCE_TOFROM:
3559 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TO:
3560 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TOFROM:
3561 1.1.1.11 mrg case GOMP_MAP_FORCE_PRESENT:
3562 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM:
3563 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TO:
3564 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3565 1.1.1.11 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3566 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i];
3567 1.1.1.11 mrg bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3568 1.1.1.11 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3569 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3570 1.1.1.11 mrg devaddrs[i],
3571 1.1.1.11 mrg devaddrs[i] + sizes[i], zero_len);
3572 1.1.1.11 mrg if (j < i)
3573 1.1.1.11 mrg {
3574 1.1.1.11 mrg n2 = NULL;
3575 1.1.1.11 mrg cdata[i].present = true;
3576 1.1.1.11 mrg devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3577 1.1.1.11 mrg }
3578 1.1.1.11 mrg else
3579 1.1.1.11 mrg {
3580 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map,
3581 1.1.1.11 mrg devaddrs[i],
3582 1.1.1.11 mrg devaddrs[i] + sizes[i], zero_len);
3583 1.1.1.11 mrg cdata[i].present = n2 != NULL;
3584 1.1.1.11 mrg }
3585 1.1.1.11 mrg if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3586 1.1.1.11 mrg {
3587 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3588 1.1.1.11 mrg #ifdef HAVE_INTTYPES_H
3589 1.1.1.11 mrg gomp_fatal ("present clause: no corresponding data on "
3590 1.1.1.11 mrg "parent device at %p with size %"PRIu64,
3591 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3592 1.1.1.11 mrg (uint64_t) sizes[i]);
3593 1.1.1.11 mrg #else
3594 1.1.1.11 mrg gomp_fatal ("present clause: no corresponding data on "
3595 1.1.1.11 mrg "parent device at %p with size %lu",
3596 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3597 1.1.1.11 mrg (unsigned long) sizes[i]);
3598 1.1.1.11 mrg #endif
3599 1.1.1.11 mrg break;
3600 1.1.1.11 mrg }
3601 1.1.1.11 mrg else if (!cdata[i].present
3602 1.1.1.11 mrg && kind != GOMP_MAP_DELETE
3603 1.1.1.11 mrg && kind != GOMP_MAP_RELEASE
3604 1.1.1.11 mrg && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3605 1.1.1.11 mrg {
3606 1.1.1.11 mrg cdata[i].aligned = true;
3607 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
3608 1.1.1.11 mrg devaddrs[i]
3609 1.1.1.11 mrg = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3610 1.1.1.11 mrg sizes[i]);
3611 1.1.1.11 mrg }
3612 1.1.1.11 mrg else if (n2 != NULL)
3613 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr
3614 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset));
3615 1.1.1.11 mrg if (((!cdata[i].present || struct_cpy)
3616 1.1.1.11 mrg && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3617 1.1.1.11 mrg || kind == GOMP_MAP_FORCE_TO
3618 1.1.1.11 mrg || kind == GOMP_MAP_FORCE_TOFROM
3619 1.1.1.11 mrg || GOMP_MAP_ALWAYS_TO_P (kind))
3620 1.1.1.11 mrg {
3621 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq,
3622 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3623 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr,
3624 1.1.1.11 mrg sizes[i]);
3625 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq))
3626 1.1.1.11 mrg {
3627 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3628 1.1.1.11 mrg exit (EXIT_FAILURE);
3629 1.1.1.11 mrg }
3630 1.1.1.11 mrg }
3631 1.1.1.11 mrg if (struct_cpy)
3632 1.1.1.11 mrg struct_cpy--;
3633 1.1.1.11 mrg break;
3634 1.1.1.11 mrg case GOMP_MAP_ATTACH:
3635 1.1.1.11 mrg case GOMP_MAP_POINTER:
3636 1.1.1.11 mrg case GOMP_MAP_ALWAYS_POINTER:
3637 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map,
3638 1.1.1.11 mrg devaddrs[i] + sizes[i],
3639 1.1.1.11 mrg devaddrs[i] + sizes[i]
3640 1.1.1.11 mrg + sizeof (void*), false);
3641 1.1.1.11 mrg cdata[i].present = n2 != NULL;
3642 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i];
3643 1.1.1.11 mrg if (n2)
3644 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr
3645 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset));
3646 1.1.1.11 mrg else
3647 1.1.1.11 mrg {
3648 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3649 1.1.1.11 mrg devaddrs[i] + sizes[i],
3650 1.1.1.11 mrg devaddrs[i] + sizes[i]
3651 1.1.1.11 mrg + sizeof (void*), false);
3652 1.1.1.11 mrg if (j < i)
3653 1.1.1.11 mrg {
3654 1.1.1.11 mrg cdata[i].present = true;
3655 1.1.1.11 mrg devaddrs[i] = (devaddrs[j] + devaddrs[i]
3656 1.1.1.11 mrg - cdata[j].devaddr);
3657 1.1.1.11 mrg }
3658 1.1.1.11 mrg }
3659 1.1.1.11 mrg if (!cdata[i].present)
3660 1.1.1.11 mrg devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3661 1.1.1.11 mrg /* Assume that when present, the pointer is already correct. */
3662 1.1.1.11 mrg if (!n2)
3663 1.1.1.11 mrg *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3664 1.1.1.11 mrg = devaddrs[i-1];
3665 1.1.1.11 mrg break;
3666 1.1.1.11 mrg case GOMP_MAP_TO_PSET:
3667 1.1.1.11 mrg /* Assume that when present, the pointers are fine and no 'to:'
3668 1.1.1.11 mrg is required. */
3669 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map,
3670 1.1.1.11 mrg devaddrs[i], devaddrs[i] + sizes[i],
3671 1.1.1.11 mrg false);
3672 1.1.1.11 mrg cdata[i].present = n2 != NULL;
3673 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i];
3674 1.1.1.11 mrg if (n2)
3675 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr
3676 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset));
3677 1.1.1.11 mrg else
3678 1.1.1.11 mrg {
3679 1.1.1.11 mrg j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3680 1.1.1.11 mrg devaddrs[i],
3681 1.1.1.11 mrg devaddrs[i] + sizes[i], false);
3682 1.1.1.11 mrg if (j < i)
3683 1.1.1.11 mrg {
3684 1.1.1.11 mrg cdata[i].present = true;
3685 1.1.1.11 mrg devaddrs[i] = (devaddrs[j] + devaddrs[i]
3686 1.1.1.11 mrg - cdata[j].devaddr);
3687 1.1.1.11 mrg }
3688 1.1.1.11 mrg }
3689 1.1.1.11 mrg if (!cdata[i].present)
3690 1.1.1.11 mrg {
3691 1.1.1.11 mrg cdata[i].aligned = true;
3692 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
3693 1.1.1.11 mrg devaddrs[i]
3694 1.1.1.11 mrg = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3695 1.1.1.11 mrg sizes[i]);
3696 1.1.1.11 mrg gomp_copy_dev2host (devicep, aq,
3697 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3698 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr,
3699 1.1.1.11 mrg sizes[i]);
3700 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq))
3701 1.1.1.11 mrg {
3702 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3703 1.1.1.11 mrg exit (EXIT_FAILURE);
3704 1.1.1.11 mrg }
3705 1.1.1.11 mrg }
3706 1.1.1.11 mrg for (j = i + 1; j < mapnum; j++)
3707 1.1.1.11 mrg {
3708 1.1.1.11 mrg kind = get_kind (short_mapkind, kinds, j) & typemask;
3709 1.1.1.11 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3710 1.1.1.11 mrg && !GOMP_MAP_POINTER_P (kind))
3711 1.1.1.11 mrg break;
3712 1.1.1.11 mrg if (devaddrs[j] < devaddrs[i])
3713 1.1.1.11 mrg break;
3714 1.1.1.11 mrg if (cdata[i].present)
3715 1.1.1.11 mrg continue;
3716 1.1.1.11 mrg if (devaddrs[j] == 0)
3717 1.1.1.11 mrg {
3718 1.1.1.11 mrg *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3719 1.1.1.11 mrg continue;
3720 1.1.1.11 mrg }
3721 1.1.1.11 mrg int k;
3722 1.1.1.11 mrg n2 = NULL;
3723 1.1.1.11 mrg /* Dereference devaddrs[j] to get the device addr. */
3724 1.1.1.11 mrg assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3725 1.1.1.11 mrg devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3726 1.1.1.11 mrg + sizes[j]);
3727 1.1.1.11 mrg cdata[j].present = true;
3728 1.1.1.11 mrg cdata[j].devaddr = devaddrs[j];
3729 1.1.1.11 mrg if (devaddrs[j] == 0)
3730 1.1.1.11 mrg continue;
3731 1.1.1.11 mrg k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3732 1.1.1.11 mrg devaddrs[j],
3733 1.1.1.11 mrg devaddrs[j] + sizeof (void*),
3734 1.1.1.11 mrg false);
3735 1.1.1.11 mrg if (k < j)
3736 1.1.1.11 mrg devaddrs[j] = (devaddrs[k] + devaddrs[j]
3737 1.1.1.11 mrg - cdata[k].devaddr);
3738 1.1.1.11 mrg else
3739 1.1.1.11 mrg {
3740 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map,
3741 1.1.1.11 mrg devaddrs[j],
3742 1.1.1.11 mrg devaddrs[j] + sizeof (void*),
3743 1.1.1.11 mrg false);
3744 1.1.1.11 mrg if (n2 == NULL)
3745 1.1.1.11 mrg {
3746 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3747 1.1.1.11 mrg gomp_fatal ("Pointer target wasn't mapped");
3748 1.1.1.11 mrg }
3749 1.1.1.11 mrg devaddrs[j] = (n2->host_start + cdata[j].devaddr
3750 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset));
3751 1.1.1.11 mrg }
3752 1.1.1.11 mrg *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3753 1.1.1.11 mrg = (void *) (uintptr_t) devaddrs[j];
3754 1.1.1.11 mrg }
3755 1.1.1.11 mrg i = j -1;
3756 1.1.1.11 mrg break;
3757 1.1.1.11 mrg case GOMP_MAP_STRUCT:
3758 1.1.1.11 mrg n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3759 1.1.1.11 mrg devaddrs[i + sizes[i]]
3760 1.1.1.11 mrg + sizes[i + sizes[i]], false);
3761 1.1.1.11 mrg cdata[i].present = n2 != NULL;
3762 1.1.1.11 mrg cdata[i].devaddr = devaddrs[i];
3763 1.1.1.11 mrg struct_cpy = cdata[i].present ? 0 : sizes[i];
3764 1.1.1.11 mrg if (!n2)
3765 1.1.1.11 mrg {
3766 1.1.1.11 mrg size_t sz = (size_t) (devaddrs[i + sizes[i]]
3767 1.1.1.11 mrg - devaddrs[i+1]
3768 1.1.1.11 mrg + sizes[i + sizes[i]]);
3769 1.1.1.11 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
3770 1.1.1.11 mrg cdata[i].aligned = true;
3771 1.1.1.11 mrg devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3772 1.1.1.11 mrg devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3773 1.1.1.11 mrg }
3774 1.1.1.11 mrg else
3775 1.1.1.11 mrg devaddrs[i] = (n2->host_start + cdata[i].devaddr
3776 1.1.1.11 mrg - (n2->tgt->tgt_start + n2->tgt_offset));
3777 1.1.1.11 mrg break;
3778 1.1.1.11 mrg default:
3779 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3780 1.1.1.11 mrg gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3781 1.1.1.11 mrg }
3782 1.1.1.11 mrg }
3783 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
3784 1.1.1.11 mrg }
3785 1.1.1.11 mrg
3786 1.1.1.11 mrg host_fn (devaddrs);
3787 1.1.1.11 mrg
3788 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3789 1.1.1.11 mrg {
3790 1.1.1.11 mrg uint64_t struct_cpy = 0;
3791 1.1.1.11 mrg bool clean_struct = false;
3792 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++)
3793 1.1.1.11 mrg {
3794 1.1.1.11 mrg if (cdata[i].devaddr == 0)
3795 1.1.1.11 mrg continue;
3796 1.1.1.11 mrg int kind = get_kind (short_mapkind, kinds, i) & typemask;
3797 1.1.1.11 mrg bool copy = !cdata[i].present || struct_cpy;
3798 1.1.1.11 mrg switch (kind)
3799 1.1.1.11 mrg {
3800 1.1.1.11 mrg case GOMP_MAP_FORCE_FROM:
3801 1.1.1.11 mrg case GOMP_MAP_FORCE_TOFROM:
3802 1.1.1.11 mrg case GOMP_MAP_ALWAYS_FROM:
3803 1.1.1.11 mrg case GOMP_MAP_ALWAYS_TOFROM:
3804 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_FROM:
3805 1.1.1.11 mrg case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3806 1.1.1.11 mrg copy = true;
3807 1.1.1.11 mrg /* FALLTHRU */
3808 1.1.1.11 mrg case GOMP_MAP_FROM:
3809 1.1.1.11 mrg case GOMP_MAP_TOFROM:
3810 1.1.1.11 mrg if (copy)
3811 1.1.1.11 mrg {
3812 1.1.1.11 mrg gomp_copy_host2dev (devicep, aq,
3813 1.1.1.11 mrg (void *) (uintptr_t) cdata[i].devaddr,
3814 1.1.1.11 mrg (void *) (uintptr_t) devaddrs[i],
3815 1.1.1.11 mrg sizes[i], false, NULL);
3816 1.1.1.11 mrg if (aq && !devicep->openacc.async.synchronize_func (aq))
3817 1.1.1.11 mrg exit (EXIT_FAILURE);
3818 1.1.1.11 mrg }
3819 1.1.1.11 mrg default:
3820 1.1.1.11 mrg break;
3821 1.1.1.11 mrg }
3822 1.1.1.11 mrg if (struct_cpy)
3823 1.1.1.11 mrg {
3824 1.1.1.11 mrg struct_cpy--;
3825 1.1.1.11 mrg continue;
3826 1.1.1.11 mrg }
3827 1.1.1.11 mrg if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3828 1.1.1.11 mrg {
3829 1.1.1.11 mrg clean_struct = true;
3830 1.1.1.11 mrg struct_cpy = sizes[i];
3831 1.1.1.11 mrg }
3832 1.1.1.11 mrg else if (!cdata[i].present && cdata[i].aligned)
3833 1.1.1.11 mrg gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3834 1.1.1.11 mrg else if (!cdata[i].present)
3835 1.1.1.11 mrg free ((void *) (uintptr_t) devaddrs[i]);
3836 1.1.1.11 mrg }
3837 1.1.1.11 mrg if (clean_struct)
3838 1.1.1.11 mrg for (uint64_t i = 0; i < mapnum; i++)
3839 1.1.1.11 mrg if (!cdata[i].present
3840 1.1.1.11 mrg && ((get_kind (short_mapkind, kinds, i) & typemask)
3841 1.1.1.11 mrg == GOMP_MAP_STRUCT))
3842 1.1.1.11 mrg {
3843 1.1.1.11 mrg devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3844 1.1.1.11 mrg gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3845 1.1.1.11 mrg }
3846 1.1.1.11 mrg
3847 1.1.1.11 mrg free (devaddrs);
3848 1.1.1.11 mrg free (sizes);
3849 1.1.1.11 mrg free (kinds);
3850 1.1.1.11 mrg }
3851 1.1.1.11 mrg }
3852 1.1.1.11 mrg
3853 1.1.1.11 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */
3854 1.1.1.11 mrg
3855 1.1.1.11 mrg static void
3856 1.1.1.11 mrg gomp_target_data_fallback (struct gomp_device_descr *devicep)
3857 1.1.1.11 mrg {
3858 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (false);
3859 1.1.1.11 mrg
3860 1.1.1.11 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3861 1.1.1.11 mrg && devicep != NULL)
3862 1.1.1.11 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3863 1.1.1.11 mrg "be used for offloading");
3864 1.1.1.11 mrg
3865 1.1.1.11 mrg if (icv->target_data)
3866 1.1.1.11 mrg {
3867 1.1.1.11 mrg /* Even when doing a host fallback, if there are any active
3868 1.1.1.11 mrg #pragma omp target data constructs, need to remember the
3869 1.1.1.11 mrg new #pragma omp target data, otherwise GOMP_target_end_data
3870 1.1.1.11 mrg would get out of sync. */
3871 1.1.1.11 mrg struct target_mem_desc *tgt
3872 1.1.1.11 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3873 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA);
3874 1.1.1.11 mrg tgt->prev = icv->target_data;
3875 1.1.1.11 mrg icv->target_data = tgt;
3876 1.1.1.11 mrg }
3877 1.1.1.11 mrg }
3878 1.1.1.11 mrg
3879 1.1.1.11 mrg void
3880 1.1.1.11 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
3881 1.1.1.11 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
3882 1.1.1.11 mrg {
3883 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
3884 1.1.1.11 mrg
3885 1.1.1.11 mrg if (devicep == NULL
3886 1.1.1.11 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3887 1.1.1.11 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3888 1.1.1.11 mrg return gomp_target_data_fallback (devicep);
3889 1.1.1.11 mrg
3890 1.1.1.11 mrg struct target_mem_desc *tgt
3891 1.1.1.11 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3892 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA);
3893 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (true);
3894 1.1.1.11 mrg tgt->prev = icv->target_data;
3895 1.1.1.11 mrg icv->target_data = tgt;
3896 1.1.1.11 mrg }
3897 1.1.1.11 mrg
3898 1.1.1.11 mrg void
3899 1.1.1.11 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3900 1.1.1.11 mrg size_t *sizes, unsigned short *kinds)
3901 1.1.1.11 mrg {
3902 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
3903 1.1.1.11 mrg
3904 1.1.1.11 mrg if (devicep == NULL
3905 1.1.1.11 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3906 1.1.1.11 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3907 1.1.1.11 mrg return gomp_target_data_fallback (devicep);
3908 1.1.1.11 mrg
3909 1.1.1.11 mrg struct target_mem_desc *tgt
3910 1.1.1.11 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3911 1.1.1.11 mrg NULL, GOMP_MAP_VARS_DATA);
3912 1.1.1.11 mrg struct gomp_task_icv *icv = gomp_icv (true);
3913 1.1.1.11 mrg tgt->prev = icv->target_data;
3914 1.1 mrg icv->target_data = tgt;
3915 1.1 mrg }
3916 1.1 mrg
3917 1.1 mrg void
3918 1.1 mrg GOMP_target_end_data (void)
3919 1.1 mrg {
3920 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
3921 1.1 mrg if (icv->target_data)
3922 1.1 mrg {
3923 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
3924 1.1 mrg icv->target_data = tgt->prev;
3925 1.1.1.10 mrg gomp_unmap_vars (tgt, true, NULL);
3926 1.1 mrg }
3927 1.1 mrg }
3928 1.1 mrg
3929 1.1 mrg void
3930 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
3931 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
3932 1.1 mrg {
3933 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
3934 1.1 mrg
3935 1.1 mrg if (devicep == NULL
3936 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3937 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3938 1.1.1.2 mrg return;
3939 1.1.1.2 mrg
3940 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3941 1.1.1.2 mrg }
3942 1.1.1.2 mrg
3943 1.1.1.2 mrg void
3944 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3945 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
3946 1.1.1.2 mrg unsigned int flags, void **depend)
3947 1.1.1.2 mrg {
3948 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
3949 1.1.1.2 mrg
3950 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
3951 1.1.1.2 mrg block the parent task until the dependencies are resolved
3952 1.1.1.2 mrg and then just continue with the rest of the function as if it
3953 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
3954 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
3955 1.1.1.2 mrg are not present. */
3956 1.1.1.2 mrg if (depend != NULL)
3957 1.1.1.2 mrg {
3958 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
3959 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
3960 1.1.1.2 mrg {
3961 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3962 1.1.1.2 mrg && thr->ts.team
3963 1.1.1.2 mrg && !thr->task->final_task)
3964 1.1.1.2 mrg {
3965 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3966 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
3967 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE,
3968 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA))
3969 1.1.1.2 mrg return;
3970 1.1.1.2 mrg }
3971 1.1.1.2 mrg else
3972 1.1.1.2 mrg {
3973 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
3974 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
3975 1.1.1.2 mrg tasks. */
3976 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
3977 1.1.1.7 mrg {
3978 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
3979 1.1.1.7 mrg return;
3980 1.1.1.7 mrg if (thr->task->taskgroup)
3981 1.1.1.7 mrg {
3982 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
3983 1.1.1.7 mrg return;
3984 1.1.1.7 mrg if (thr->task->taskgroup->workshare
3985 1.1.1.7 mrg && thr->task->taskgroup->prev
3986 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
3987 1.1.1.7 mrg return;
3988 1.1.1.7 mrg }
3989 1.1.1.7 mrg }
3990 1.1.1.2 mrg
3991 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
3992 1.1.1.2 mrg }
3993 1.1.1.2 mrg }
3994 1.1.1.2 mrg }
3995 1.1.1.2 mrg
3996 1.1.1.2 mrg if (devicep == NULL
3997 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3998 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3999 1.1.1.2 mrg return;
4000 1.1.1.2 mrg
4001 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
4002 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
4003 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4004 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
4005 1.1.1.7 mrg {
4006 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
4007 1.1.1.7 mrg return;
4008 1.1.1.7 mrg if (thr->task->taskgroup)
4009 1.1.1.7 mrg {
4010 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
4011 1.1.1.7 mrg return;
4012 1.1.1.7 mrg if (thr->task->taskgroup->workshare
4013 1.1.1.7 mrg && thr->task->taskgroup->prev
4014 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
4015 1.1.1.7 mrg return;
4016 1.1.1.7 mrg }
4017 1.1.1.7 mrg }
4018 1.1 mrg
4019 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4020 1.1.1.2 mrg }
4021 1.1.1.2 mrg
4022 1.1.1.2 mrg static void
4023 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4024 1.1.1.10 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
4025 1.1.1.10 mrg htab_t *refcount_set)
4026 1.1.1.2 mrg {
4027 1.1.1.2 mrg const int typemask = 0xff;
4028 1.1.1.2 mrg size_t i;
4029 1.1 mrg gomp_mutex_lock (&devicep->lock);
4030 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
4031 1.1.1.2 mrg {
4032 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4033 1.1.1.2 mrg return;
4034 1.1.1.2 mrg }
4035 1.1.1.2 mrg
4036 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
4037 1.1.1.10 mrg if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4038 1.1.1.10 mrg {
4039 1.1.1.10 mrg struct splay_tree_key_s cur_node;
4040 1.1.1.10 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
4041 1.1.1.10 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
4042 1.1.1.10 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4043 1.1.1.10 mrg
4044 1.1.1.10 mrg if (n)
4045 1.1.1.10 mrg gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4046 1.1.1.10 mrg false, NULL);
4047 1.1.1.10 mrg }
4048 1.1.1.10 mrg
4049 1.1.1.10 mrg int nrmvars = 0;
4050 1.1.1.10 mrg splay_tree_key remove_vars[mapnum];
4051 1.1.1.10 mrg
4052 1.1.1.10 mrg for (i = 0; i < mapnum; i++)
4053 1.1.1.2 mrg {
4054 1.1.1.2 mrg struct splay_tree_key_s cur_node;
4055 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask;
4056 1.1.1.2 mrg switch (kind)
4057 1.1.1.2 mrg {
4058 1.1.1.2 mrg case GOMP_MAP_FROM:
4059 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
4060 1.1.1.2 mrg case GOMP_MAP_DELETE:
4061 1.1.1.2 mrg case GOMP_MAP_RELEASE:
4062 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4063 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4064 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
4065 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
4066 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4067 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4068 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4069 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node);
4070 1.1.1.2 mrg if (!k)
4071 1.1.1.2 mrg continue;
4072 1.1.1.2 mrg
4073 1.1.1.10 mrg bool delete_p = (kind == GOMP_MAP_DELETE
4074 1.1.1.10 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4075 1.1.1.10 mrg bool do_copy, do_remove;
4076 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4077 1.1.1.10 mrg &do_remove);
4078 1.1.1.2 mrg
4079 1.1.1.10 mrg if ((kind == GOMP_MAP_FROM && do_copy)
4080 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM)
4081 1.1.1.10 mrg {
4082 1.1.1.10 mrg if (k->aux && k->aux->attach_count)
4083 1.1.1.10 mrg {
4084 1.1.1.10 mrg /* We have to be careful not to overwrite still attached
4085 1.1.1.10 mrg pointers during the copyback to host. */
4086 1.1.1.10 mrg uintptr_t addr = k->host_start;
4087 1.1.1.10 mrg while (addr < k->host_end)
4088 1.1.1.10 mrg {
4089 1.1.1.10 mrg size_t i = (addr - k->host_start) / sizeof (void *);
4090 1.1.1.10 mrg if (k->aux->attach_count[i] == 0)
4091 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) addr,
4092 1.1.1.10 mrg (void *) (k->tgt->tgt_start
4093 1.1.1.10 mrg + k->tgt_offset
4094 1.1.1.10 mrg + addr - k->host_start),
4095 1.1.1.10 mrg sizeof (void *));
4096 1.1.1.10 mrg addr += sizeof (void *);
4097 1.1.1.10 mrg }
4098 1.1.1.10 mrg }
4099 1.1.1.10 mrg else
4100 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4101 1.1.1.10 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
4102 1.1.1.10 mrg + cur_node.host_start
4103 1.1.1.10 mrg - k->host_start),
4104 1.1.1.10 mrg cur_node.host_end - cur_node.host_start);
4105 1.1.1.10 mrg }
4106 1.1.1.10 mrg
4107 1.1.1.10 mrg /* Structure elements lists are removed altogether at once, which
4108 1.1.1.10 mrg may cause immediate deallocation of the target_mem_desc, causing
4109 1.1.1.10 mrg errors if we still have following element siblings to copy back.
4110 1.1.1.10 mrg While we're at it, it also seems more disciplined to simply
4111 1.1.1.10 mrg queue all removals together for processing below.
4112 1.1.1.10 mrg
4113 1.1.1.10 mrg Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4114 1.1.1.10 mrg not have this problem, since they maintain an additional
4115 1.1.1.10 mrg tgt->refcount = 1 reference to the target_mem_desc to start with.
4116 1.1.1.10 mrg */
4117 1.1.1.10 mrg if (do_remove)
4118 1.1.1.10 mrg remove_vars[nrmvars++] = k;
4119 1.1.1.10 mrg break;
4120 1.1.1.2 mrg
4121 1.1.1.10 mrg case GOMP_MAP_DETACH:
4122 1.1.1.2 mrg break;
4123 1.1.1.2 mrg default:
4124 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4125 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4126 1.1.1.2 mrg kind);
4127 1.1.1.2 mrg }
4128 1.1.1.2 mrg }
4129 1.1.1.2 mrg
4130 1.1.1.10 mrg for (int i = 0; i < nrmvars; i++)
4131 1.1.1.10 mrg gomp_remove_var (devicep, remove_vars[i]);
4132 1.1.1.10 mrg
4133 1.1 mrg gomp_mutex_unlock (&devicep->lock);
4134 1.1.1.2 mrg }
4135 1.1 mrg
4136 1.1.1.2 mrg void
4137 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4138 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
4139 1.1.1.2 mrg unsigned int flags, void **depend)
4140 1.1.1.2 mrg {
4141 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device, true);
4142 1.1.1.2 mrg
4143 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
4144 1.1.1.2 mrg block the parent task until the dependencies are resolved
4145 1.1.1.2 mrg and then just continue with the rest of the function as if it
4146 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
4147 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
4148 1.1.1.2 mrg are not present. */
4149 1.1.1.2 mrg if (depend != NULL)
4150 1.1.1.2 mrg {
4151 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
4152 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
4153 1.1.1.2 mrg {
4154 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4155 1.1.1.2 mrg && thr->ts.team
4156 1.1.1.2 mrg && !thr->task->final_task)
4157 1.1.1.2 mrg {
4158 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4159 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
4160 1.1.1.2 mrg flags, depend, NULL,
4161 1.1.1.2 mrg GOMP_TARGET_TASK_DATA))
4162 1.1.1.2 mrg return;
4163 1.1.1.2 mrg }
4164 1.1.1.2 mrg else
4165 1.1.1.2 mrg {
4166 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
4167 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
4168 1.1.1.2 mrg tasks. */
4169 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
4170 1.1.1.7 mrg {
4171 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
4172 1.1.1.7 mrg return;
4173 1.1.1.7 mrg if (thr->task->taskgroup)
4174 1.1.1.7 mrg {
4175 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
4176 1.1.1.7 mrg return;
4177 1.1.1.7 mrg if (thr->task->taskgroup->workshare
4178 1.1.1.7 mrg && thr->task->taskgroup->prev
4179 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
4180 1.1.1.7 mrg return;
4181 1.1.1.7 mrg }
4182 1.1.1.7 mrg }
4183 1.1.1.2 mrg
4184 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
4185 1.1.1.2 mrg }
4186 1.1.1.2 mrg }
4187 1.1.1.2 mrg }
4188 1.1.1.2 mrg
4189 1.1.1.2 mrg if (devicep == NULL
4190 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4191 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4192 1.1.1.2 mrg return;
4193 1.1.1.2 mrg
4194 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
4195 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
4196 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4197 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
4198 1.1.1.7 mrg {
4199 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
4200 1.1.1.7 mrg return;
4201 1.1.1.7 mrg if (thr->task->taskgroup)
4202 1.1.1.7 mrg {
4203 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
4204 1.1.1.7 mrg return;
4205 1.1.1.7 mrg if (thr->task->taskgroup->workshare
4206 1.1.1.7 mrg && thr->task->taskgroup->prev
4207 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
4208 1.1.1.7 mrg return;
4209 1.1.1.7 mrg }
4210 1.1.1.7 mrg }
4211 1.1.1.2 mrg
4212 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum);
4213 1.1.1.10 mrg
4214 1.1.1.8 mrg /* The variables are mapped separately such that they can be released
4215 1.1.1.8 mrg independently. */
4216 1.1.1.8 mrg size_t i, j;
4217 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4218 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
4219 1.1.1.11 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
4220 1.1.1.11 mrg || (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4221 1.1.1.2 mrg {
4222 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4223 1.1.1.10 mrg &kinds[i], true, &refcount_set,
4224 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
4225 1.1.1.2 mrg i += sizes[i];
4226 1.1.1.2 mrg }
4227 1.1.1.8 mrg else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4228 1.1.1.8 mrg {
4229 1.1.1.8 mrg for (j = i + 1; j < mapnum; j++)
4230 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4231 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4232 1.1.1.8 mrg break;
4233 1.1.1.8 mrg gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4234 1.1.1.10 mrg &kinds[i], true, &refcount_set,
4235 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
4236 1.1.1.8 mrg i += j - i - 1;
4237 1.1.1.8 mrg }
4238 1.1.1.11 mrg else if (i + 1 < mapnum
4239 1.1.1.11 mrg && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4240 1.1.1.11 mrg || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4241 1.1.1.11 mrg && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4242 1.1.1.10 mrg {
4243 1.1.1.10 mrg /* An attach operation must be processed together with the mapped
4244 1.1.1.10 mrg base-pointer list item. */
4245 1.1.1.10 mrg gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4246 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4247 1.1.1.10 mrg i += 1;
4248 1.1.1.10 mrg }
4249 1.1.1.2 mrg else
4250 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4251 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4252 1.1.1.2 mrg else
4253 1.1.1.10 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4254 1.1.1.10 mrg htab_free (refcount_set);
4255 1.1.1.2 mrg }
4256 1.1.1.2 mrg
4257 1.1.1.2 mrg bool
4258 1.1.1.2 mrg gomp_target_task_fn (void *data)
4259 1.1.1.2 mrg {
4260 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4261 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep;
4262 1.1.1.2 mrg
4263 1.1.1.2 mrg if (ttask->fn != NULL)
4264 1.1.1.2 mrg {
4265 1.1.1.2 mrg void *fn_addr;
4266 1.1.1.2 mrg if (devicep == NULL
4267 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4268 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4269 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4270 1.1.1.2 mrg {
4271 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK;
4272 1.1.1.10 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4273 1.1.1.10 mrg ttask->args);
4274 1.1.1.2 mrg return false;
4275 1.1.1.2 mrg }
4276 1.1.1.2 mrg
4277 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4278 1.1.1.2 mrg {
4279 1.1.1.2 mrg if (ttask->tgt)
4280 1.1.1.10 mrg gomp_unmap_vars (ttask->tgt, true, NULL);
4281 1.1.1.2 mrg return false;
4282 1.1.1.2 mrg }
4283 1.1.1.2 mrg
4284 1.1.1.2 mrg void *actual_arguments;
4285 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4286 1.1.1.2 mrg {
4287 1.1.1.2 mrg ttask->tgt = NULL;
4288 1.1.1.2 mrg actual_arguments = ttask->hostaddrs;
4289 1.1.1.2 mrg }
4290 1.1.1.2 mrg else
4291 1.1.1.2 mrg {
4292 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4293 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true,
4294 1.1.1.10 mrg NULL, GOMP_MAP_VARS_TARGET);
4295 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start;
4296 1.1.1.2 mrg }
4297 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4298 1.1.1.2 mrg
4299 1.1.1.8 mrg assert (devicep->async_run_func);
4300 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4301 1.1.1.2 mrg ttask->args, (void *) ttask);
4302 1.1.1.2 mrg return true;
4303 1.1.1.2 mrg }
4304 1.1.1.2 mrg else if (devicep == NULL
4305 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4306 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4307 1.1.1.2 mrg return false;
4308 1.1.1.2 mrg
4309 1.1.1.2 mrg size_t i;
4310 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4311 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4312 1.1.1.2 mrg ttask->kinds, true);
4313 1.1.1.2 mrg else
4314 1.1.1.10 mrg {
4315 1.1.1.10 mrg htab_t refcount_set = htab_create (ttask->mapnum);
4316 1.1.1.10 mrg if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4317 1.1.1.10 mrg for (i = 0; i < ttask->mapnum; i++)
4318 1.1.1.11 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
4319 1.1.1.11 mrg || (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
4320 1.1.1.10 mrg {
4321 1.1.1.10 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4322 1.1.1.10 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4323 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4324 1.1.1.10 mrg i += ttask->sizes[i];
4325 1.1.1.10 mrg }
4326 1.1.1.10 mrg else
4327 1.1.1.10 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4328 1.1.1.10 mrg &ttask->kinds[i], true, &refcount_set,
4329 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
4330 1.1.1.10 mrg else
4331 1.1.1.10 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4332 1.1.1.10 mrg ttask->kinds, &refcount_set);
4333 1.1.1.10 mrg htab_free (refcount_set);
4334 1.1.1.10 mrg }
4335 1.1.1.2 mrg return false;
4336 1.1 mrg }
4337 1.1 mrg
4338 1.1 mrg void
4339 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4340 1.1 mrg {
4341 1.1 mrg if (thread_limit)
4342 1.1 mrg {
4343 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
4344 1.1 mrg icv->thread_limit_var
4345 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4346 1.1 mrg }
4347 1.1 mrg (void) num_teams;
4348 1.1 mrg }
4349 1.1 mrg
4350 1.1.1.10 mrg bool
4351 1.1.1.10 mrg GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4352 1.1.1.10 mrg unsigned int thread_limit, bool first)
4353 1.1.1.10 mrg {
4354 1.1.1.10 mrg struct gomp_thread *thr = gomp_thread ();
4355 1.1.1.10 mrg if (first)
4356 1.1.1.10 mrg {
4357 1.1.1.10 mrg if (thread_limit)
4358 1.1.1.10 mrg {
4359 1.1.1.10 mrg struct gomp_task_icv *icv = gomp_icv (true);
4360 1.1.1.10 mrg icv->thread_limit_var
4361 1.1.1.10 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4362 1.1.1.10 mrg }
4363 1.1.1.10 mrg (void) num_teams_high;
4364 1.1.1.10 mrg if (num_teams_low == 0)
4365 1.1.1.10 mrg num_teams_low = 1;
4366 1.1.1.10 mrg thr->num_teams = num_teams_low - 1;
4367 1.1.1.10 mrg thr->team_num = 0;
4368 1.1.1.10 mrg }
4369 1.1.1.10 mrg else if (thr->team_num == thr->num_teams)
4370 1.1.1.10 mrg return false;
4371 1.1.1.10 mrg else
4372 1.1.1.10 mrg ++thr->team_num;
4373 1.1.1.10 mrg return true;
4374 1.1.1.10 mrg }
4375 1.1.1.10 mrg
4376 1.1.1.2 mrg void *
4377 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num)
4378 1.1.1.2 mrg {
4379 1.1.1.11 mrg if (device_num == omp_initial_device
4380 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
4381 1.1.1.2 mrg return malloc (size);
4382 1.1.1.2 mrg
4383 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
4384 1.1.1.2 mrg if (devicep == NULL)
4385 1.1.1.2 mrg return NULL;
4386 1.1.1.2 mrg
4387 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4388 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4389 1.1.1.2 mrg return malloc (size);
4390 1.1.1.2 mrg
4391 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
4392 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size);
4393 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4394 1.1.1.2 mrg return ret;
4395 1.1.1.2 mrg }
4396 1.1.1.2 mrg
4397 1.1.1.2 mrg void
4398 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num)
4399 1.1.1.2 mrg {
4400 1.1.1.11 mrg if (device_num == omp_initial_device
4401 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
4402 1.1.1.2 mrg {
4403 1.1.1.2 mrg free (device_ptr);
4404 1.1.1.2 mrg return;
4405 1.1.1.2 mrg }
4406 1.1.1.2 mrg
4407 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
4408 1.1.1.11 mrg if (devicep == NULL || device_ptr == NULL)
4409 1.1.1.2 mrg return;
4410 1.1.1.2 mrg
4411 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4412 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4413 1.1.1.2 mrg {
4414 1.1.1.2 mrg free (device_ptr);
4415 1.1.1.2 mrg return;
4416 1.1.1.2 mrg }
4417 1.1.1.2 mrg
4418 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
4419 1.1.1.3 mrg gomp_free_device_memory (devicep, device_ptr);
4420 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4421 1.1.1.2 mrg }
4422 1.1.1.2 mrg
4423 1.1.1.2 mrg int
4424 1.1.1.7 mrg omp_target_is_present (const void *ptr, int device_num)
4425 1.1.1.2 mrg {
4426 1.1.1.11 mrg if (device_num == omp_initial_device
4427 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
4428 1.1.1.2 mrg return 1;
4429 1.1.1.2 mrg
4430 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
4431 1.1.1.2 mrg if (devicep == NULL)
4432 1.1.1.2 mrg return 0;
4433 1.1.1.2 mrg
4434 1.1.1.11 mrg if (ptr == NULL)
4435 1.1.1.11 mrg return 1;
4436 1.1.1.11 mrg
4437 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4438 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4439 1.1.1.2 mrg return 1;
4440 1.1.1.2 mrg
4441 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
4442 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
4443 1.1.1.2 mrg struct splay_tree_key_s cur_node;
4444 1.1.1.2 mrg
4445 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
4446 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
4447 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4448 1.1.1.2 mrg int ret = n != NULL;
4449 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4450 1.1.1.2 mrg return ret;
4451 1.1.1.2 mrg }
4452 1.1.1.2 mrg
4453 1.1.1.11 mrg static int
4454 1.1.1.11 mrg omp_target_memcpy_check (int dst_device_num, int src_device_num,
4455 1.1.1.11 mrg struct gomp_device_descr **dst_devicep,
4456 1.1.1.11 mrg struct gomp_device_descr **src_devicep)
4457 1.1.1.11 mrg {
4458 1.1.1.11 mrg if (dst_device_num != gomp_get_num_devices ()
4459 1.1.1.11 mrg /* Above gomp_get_num_devices has to be called unconditionally. */
4460 1.1.1.11 mrg && dst_device_num != omp_initial_device)
4461 1.1.1.2 mrg {
4462 1.1.1.11 mrg *dst_devicep = resolve_device (dst_device_num, false);
4463 1.1.1.11 mrg if (*dst_devicep == NULL)
4464 1.1.1.2 mrg return EINVAL;
4465 1.1.1.2 mrg
4466 1.1.1.11 mrg if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4467 1.1.1.11 mrg || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4468 1.1.1.11 mrg *dst_devicep = NULL;
4469 1.1.1.2 mrg }
4470 1.1.1.2 mrg
4471 1.1.1.11 mrg if (src_device_num != num_devices_openmp
4472 1.1.1.11 mrg && src_device_num != omp_initial_device)
4473 1.1.1.11 mrg {
4474 1.1.1.11 mrg *src_devicep = resolve_device (src_device_num, false);
4475 1.1.1.11 mrg if (*src_devicep == NULL)
4476 1.1.1.2 mrg return EINVAL;
4477 1.1.1.2 mrg
4478 1.1.1.11 mrg if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4479 1.1.1.11 mrg || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4480 1.1.1.11 mrg *src_devicep = NULL;
4481 1.1.1.2 mrg }
4482 1.1.1.11 mrg
4483 1.1.1.11 mrg return 0;
4484 1.1.1.11 mrg }
4485 1.1.1.11 mrg
4486 1.1.1.11 mrg static int
4487 1.1.1.11 mrg omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4488 1.1.1.11 mrg size_t dst_offset, size_t src_offset,
4489 1.1.1.11 mrg struct gomp_device_descr *dst_devicep,
4490 1.1.1.11 mrg struct gomp_device_descr *src_devicep)
4491 1.1.1.11 mrg {
4492 1.1.1.11 mrg bool ret;
4493 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL)
4494 1.1.1.2 mrg {
4495 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4496 1.1.1.2 mrg return 0;
4497 1.1.1.2 mrg }
4498 1.1.1.2 mrg if (src_devicep == NULL)
4499 1.1.1.2 mrg {
4500 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
4501 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4502 1.1.1.3 mrg (char *) dst + dst_offset,
4503 1.1.1.3 mrg (char *) src + src_offset, length);
4504 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
4505 1.1.1.3 mrg return (ret ? 0 : EINVAL);
4506 1.1.1.2 mrg }
4507 1.1.1.2 mrg if (dst_devicep == NULL)
4508 1.1.1.2 mrg {
4509 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
4510 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
4511 1.1.1.3 mrg (char *) dst + dst_offset,
4512 1.1.1.3 mrg (char *) src + src_offset, length);
4513 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
4514 1.1.1.3 mrg return (ret ? 0 : EINVAL);
4515 1.1.1.2 mrg }
4516 1.1.1.2 mrg if (src_devicep == dst_devicep)
4517 1.1.1.2 mrg {
4518 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
4519 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
4520 1.1.1.3 mrg (char *) dst + dst_offset,
4521 1.1.1.3 mrg (char *) src + src_offset, length);
4522 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
4523 1.1.1.3 mrg return (ret ? 0 : EINVAL);
4524 1.1.1.2 mrg }
4525 1.1.1.2 mrg return EINVAL;
4526 1.1.1.2 mrg }
4527 1.1.1.2 mrg
4528 1.1.1.11 mrg int
4529 1.1.1.11 mrg omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4530 1.1.1.11 mrg size_t src_offset, int dst_device_num, int src_device_num)
4531 1.1.1.11 mrg {
4532 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4533 1.1.1.11 mrg int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4534 1.1.1.11 mrg &dst_devicep, &src_devicep);
4535 1.1.1.11 mrg
4536 1.1.1.11 mrg if (ret)
4537 1.1.1.11 mrg return ret;
4538 1.1.1.11 mrg
4539 1.1.1.11 mrg ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4540 1.1.1.11 mrg dst_devicep, src_devicep);
4541 1.1.1.11 mrg
4542 1.1.1.11 mrg return ret;
4543 1.1.1.11 mrg }
4544 1.1.1.11 mrg
4545 1.1.1.11 mrg typedef struct
4546 1.1.1.11 mrg {
4547 1.1.1.11 mrg void *dst;
4548 1.1.1.11 mrg const void *src;
4549 1.1.1.11 mrg size_t length;
4550 1.1.1.11 mrg size_t dst_offset;
4551 1.1.1.11 mrg size_t src_offset;
4552 1.1.1.11 mrg struct gomp_device_descr *dst_devicep;
4553 1.1.1.11 mrg struct gomp_device_descr *src_devicep;
4554 1.1.1.11 mrg } omp_target_memcpy_data;
4555 1.1.1.11 mrg
4556 1.1.1.11 mrg static void
4557 1.1.1.11 mrg omp_target_memcpy_async_helper (void *args)
4558 1.1.1.11 mrg {
4559 1.1.1.11 mrg omp_target_memcpy_data *a = args;
4560 1.1.1.11 mrg if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4561 1.1.1.11 mrg a->src_offset, a->dst_devicep, a->src_devicep))
4562 1.1.1.11 mrg gomp_fatal ("omp_target_memcpy failed");
4563 1.1.1.11 mrg }
4564 1.1.1.11 mrg
4565 1.1.1.11 mrg int
4566 1.1.1.11 mrg omp_target_memcpy_async (void *dst, const void *src, size_t length,
4567 1.1.1.11 mrg size_t dst_offset, size_t src_offset,
4568 1.1.1.11 mrg int dst_device_num, int src_device_num,
4569 1.1.1.11 mrg int depobj_count, omp_depend_t *depobj_list)
4570 1.1.1.11 mrg {
4571 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4572 1.1.1.11 mrg unsigned int flags = 0;
4573 1.1.1.11 mrg void *depend[depobj_count + 5];
4574 1.1.1.11 mrg int i;
4575 1.1.1.11 mrg int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4576 1.1.1.11 mrg &dst_devicep, &src_devicep);
4577 1.1.1.11 mrg
4578 1.1.1.11 mrg omp_target_memcpy_data s = {
4579 1.1.1.11 mrg .dst = dst,
4580 1.1.1.11 mrg .src = src,
4581 1.1.1.11 mrg .length = length,
4582 1.1.1.11 mrg .dst_offset = dst_offset,
4583 1.1.1.11 mrg .src_offset = src_offset,
4584 1.1.1.11 mrg .dst_devicep = dst_devicep,
4585 1.1.1.11 mrg .src_devicep = src_devicep
4586 1.1.1.11 mrg };
4587 1.1.1.11 mrg
4588 1.1.1.11 mrg if (check)
4589 1.1.1.11 mrg return check;
4590 1.1.1.11 mrg
4591 1.1.1.11 mrg if (depobj_count > 0 && depobj_list != NULL)
4592 1.1.1.11 mrg {
4593 1.1.1.11 mrg flags |= GOMP_TASK_FLAG_DEPEND;
4594 1.1.1.11 mrg depend[0] = 0;
4595 1.1.1.11 mrg depend[1] = (void *) (uintptr_t) depobj_count;
4596 1.1.1.11 mrg depend[2] = depend[3] = depend[4] = 0;
4597 1.1.1.11 mrg for (i = 0; i < depobj_count; ++i)
4598 1.1.1.11 mrg depend[i + 5] = &depobj_list[i];
4599 1.1.1.11 mrg }
4600 1.1.1.11 mrg
4601 1.1.1.11 mrg GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4602 1.1.1.11 mrg __alignof__ (s), true, flags, depend, 0, NULL);
4603 1.1.1.11 mrg
4604 1.1.1.11 mrg return 0;
4605 1.1.1.11 mrg }
4606 1.1.1.11 mrg
4607 1.1.1.2 mrg static int
4608 1.1.1.7 mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4609 1.1.1.2 mrg int num_dims, const size_t *volume,
4610 1.1.1.2 mrg const size_t *dst_offsets,
4611 1.1.1.2 mrg const size_t *src_offsets,
4612 1.1.1.2 mrg const size_t *dst_dimensions,
4613 1.1.1.2 mrg const size_t *src_dimensions,
4614 1.1.1.2 mrg struct gomp_device_descr *dst_devicep,
4615 1.1.1.11 mrg struct gomp_device_descr *src_devicep,
4616 1.1.1.11 mrg size_t *tmp_size, void **tmp)
4617 1.1.1.2 mrg {
4618 1.1.1.2 mrg size_t dst_slice = element_size;
4619 1.1.1.2 mrg size_t src_slice = element_size;
4620 1.1.1.2 mrg size_t j, dst_off, src_off, length;
4621 1.1.1.2 mrg int i, ret;
4622 1.1.1.2 mrg
4623 1.1.1.2 mrg if (num_dims == 1)
4624 1.1.1.2 mrg {
4625 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length)
4626 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4627 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4628 1.1.1.2 mrg return EINVAL;
4629 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL)
4630 1.1.1.3 mrg {
4631 1.1.1.7 mrg memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4632 1.1.1.7 mrg length);
4633 1.1.1.3 mrg ret = 1;
4634 1.1.1.3 mrg }
4635 1.1.1.2 mrg else if (src_devicep == NULL)
4636 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4637 1.1.1.3 mrg (char *) dst + dst_off,
4638 1.1.1.7 mrg (const char *) src + src_off,
4639 1.1.1.7 mrg length);
4640 1.1.1.2 mrg else if (dst_devicep == NULL)
4641 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
4642 1.1.1.3 mrg (char *) dst + dst_off,
4643 1.1.1.7 mrg (const char *) src + src_off,
4644 1.1.1.7 mrg length);
4645 1.1.1.2 mrg else if (src_devicep == dst_devicep)
4646 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
4647 1.1.1.3 mrg (char *) dst + dst_off,
4648 1.1.1.7 mrg (const char *) src + src_off,
4649 1.1.1.7 mrg length);
4650 1.1.1.2 mrg else
4651 1.1.1.11 mrg {
4652 1.1.1.11 mrg if (*tmp_size == 0)
4653 1.1.1.11 mrg {
4654 1.1.1.11 mrg *tmp_size = length;
4655 1.1.1.11 mrg *tmp = malloc (length);
4656 1.1.1.11 mrg if (*tmp == NULL)
4657 1.1.1.11 mrg return ENOMEM;
4658 1.1.1.11 mrg }
4659 1.1.1.11 mrg else if (*tmp_size < length)
4660 1.1.1.11 mrg {
4661 1.1.1.11 mrg *tmp_size = length;
4662 1.1.1.11 mrg free (*tmp);
4663 1.1.1.11 mrg *tmp = malloc (length);
4664 1.1.1.11 mrg if (*tmp == NULL)
4665 1.1.1.11 mrg return ENOMEM;
4666 1.1.1.11 mrg }
4667 1.1.1.11 mrg ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4668 1.1.1.11 mrg (const char *) src + src_off,
4669 1.1.1.11 mrg length);
4670 1.1.1.11 mrg if (ret == 1)
4671 1.1.1.11 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4672 1.1.1.11 mrg (char *) dst + dst_off, *tmp,
4673 1.1.1.11 mrg length);
4674 1.1.1.11 mrg }
4675 1.1.1.3 mrg return ret ? 0 : EINVAL;
4676 1.1.1.2 mrg }
4677 1.1.1.2 mrg
4678 1.1.1.11 mrg /* host->device, device->host and intra device. */
4679 1.1.1.11 mrg if (num_dims == 2
4680 1.1.1.11 mrg && ((src_devicep
4681 1.1.1.11 mrg && src_devicep == dst_devicep
4682 1.1.1.11 mrg && src_devicep->memcpy2d_func)
4683 1.1.1.11 mrg || (!src_devicep != !dst_devicep
4684 1.1.1.11 mrg && ((src_devicep && src_devicep->memcpy2d_func)
4685 1.1.1.11 mrg || (dst_devicep && dst_devicep->memcpy2d_func)))))
4686 1.1.1.11 mrg {
4687 1.1.1.11 mrg size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4688 1.1.1.11 mrg int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4689 1.1.1.11 mrg int src_id = src_devicep ? src_devicep->target_id : -1;
4690 1.1.1.11 mrg struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4691 1.1.1.11 mrg
4692 1.1.1.11 mrg if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4693 1.1.1.11 mrg || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4694 1.1.1.11 mrg || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4695 1.1.1.11 mrg || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4696 1.1.1.11 mrg || __builtin_mul_overflow (src_offsets[1], element_size,
4697 1.1.1.11 mrg &src_off_sz1))
4698 1.1.1.11 mrg return EINVAL;
4699 1.1.1.11 mrg ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4700 1.1.1.11 mrg dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4701 1.1.1.11 mrg src, src_off_sz1, src_offsets[0], src_sz1);
4702 1.1.1.11 mrg if (ret != -1)
4703 1.1.1.11 mrg return ret ? 0 : EINVAL;
4704 1.1.1.11 mrg }
4705 1.1.1.11 mrg else if (num_dims == 3
4706 1.1.1.11 mrg && ((src_devicep
4707 1.1.1.11 mrg && src_devicep == dst_devicep
4708 1.1.1.11 mrg && src_devicep->memcpy3d_func)
4709 1.1.1.11 mrg || (!src_devicep != !dst_devicep
4710 1.1.1.11 mrg && ((src_devicep && src_devicep->memcpy3d_func)
4711 1.1.1.11 mrg || (dst_devicep && dst_devicep->memcpy3d_func)))))
4712 1.1.1.11 mrg {
4713 1.1.1.11 mrg size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4714 1.1.1.11 mrg int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4715 1.1.1.11 mrg int src_id = src_devicep ? src_devicep->target_id : -1;
4716 1.1.1.11 mrg struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4717 1.1.1.11 mrg
4718 1.1.1.11 mrg if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4719 1.1.1.11 mrg || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4720 1.1.1.11 mrg || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4721 1.1.1.11 mrg || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4722 1.1.1.11 mrg || __builtin_mul_overflow (src_offsets[2], element_size,
4723 1.1.1.11 mrg &src_off_sz2))
4724 1.1.1.11 mrg return EINVAL;
4725 1.1.1.11 mrg ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4726 1.1.1.11 mrg dst, dst_off_sz2, dst_offsets[1],
4727 1.1.1.11 mrg dst_offsets[0], dst_sz2, dst_dimensions[1],
4728 1.1.1.11 mrg src, src_off_sz2, src_offsets[1],
4729 1.1.1.11 mrg src_offsets[0], src_sz2, src_dimensions[1]);
4730 1.1.1.11 mrg if (ret != -1)
4731 1.1.1.11 mrg return ret ? 0 : EINVAL;
4732 1.1.1.11 mrg }
4733 1.1.1.2 mrg
4734 1.1.1.2 mrg for (i = 1; i < num_dims; i++)
4735 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4736 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4737 1.1.1.2 mrg return EINVAL;
4738 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4739 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4740 1.1.1.2 mrg return EINVAL;
4741 1.1.1.2 mrg for (j = 0; j < volume[0]; j++)
4742 1.1.1.2 mrg {
4743 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4744 1.1.1.7 mrg (const char *) src + src_off,
4745 1.1.1.2 mrg element_size, num_dims - 1,
4746 1.1.1.2 mrg volume + 1, dst_offsets + 1,
4747 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1,
4748 1.1.1.2 mrg src_dimensions + 1, dst_devicep,
4749 1.1.1.11 mrg src_devicep, tmp_size, tmp);
4750 1.1.1.2 mrg if (ret)
4751 1.1.1.2 mrg return ret;
4752 1.1.1.2 mrg dst_off += dst_slice;
4753 1.1.1.2 mrg src_off += src_slice;
4754 1.1.1.2 mrg }
4755 1.1.1.2 mrg return 0;
4756 1.1.1.2 mrg }
4757 1.1.1.2 mrg
4758 1.1.1.11 mrg static int
4759 1.1.1.11 mrg omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4760 1.1.1.11 mrg int src_device_num,
4761 1.1.1.11 mrg struct gomp_device_descr **dst_devicep,
4762 1.1.1.11 mrg struct gomp_device_descr **src_devicep)
4763 1.1.1.11 mrg {
4764 1.1.1.11 mrg if (!dst && !src)
4765 1.1.1.11 mrg return INT_MAX;
4766 1.1.1.11 mrg
4767 1.1.1.11 mrg int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4768 1.1.1.11 mrg dst_devicep, src_devicep);
4769 1.1.1.11 mrg if (ret)
4770 1.1.1.11 mrg return ret;
4771 1.1.1.11 mrg
4772 1.1.1.11 mrg return 0;
4773 1.1.1.11 mrg }
4774 1.1.1.11 mrg
4775 1.1.1.11 mrg static int
4776 1.1.1.11 mrg omp_target_memcpy_rect_copy (void *dst, const void *src,
4777 1.1.1.11 mrg size_t element_size, int num_dims,
4778 1.1.1.11 mrg const size_t *volume, const size_t *dst_offsets,
4779 1.1.1.11 mrg const size_t *src_offsets,
4780 1.1.1.11 mrg const size_t *dst_dimensions,
4781 1.1.1.11 mrg const size_t *src_dimensions,
4782 1.1.1.11 mrg struct gomp_device_descr *dst_devicep,
4783 1.1.1.11 mrg struct gomp_device_descr *src_devicep)
4784 1.1.1.11 mrg {
4785 1.1.1.11 mrg size_t tmp_size = 0;
4786 1.1.1.11 mrg void *tmp = NULL;
4787 1.1.1.11 mrg bool lock_src;
4788 1.1.1.11 mrg bool lock_dst;
4789 1.1.1.11 mrg
4790 1.1.1.11 mrg lock_src = src_devicep != NULL;
4791 1.1.1.11 mrg lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4792 1.1.1.11 mrg if (lock_src)
4793 1.1.1.11 mrg gomp_mutex_lock (&src_devicep->lock);
4794 1.1.1.11 mrg if (lock_dst)
4795 1.1.1.11 mrg gomp_mutex_lock (&dst_devicep->lock);
4796 1.1.1.11 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4797 1.1.1.11 mrg volume, dst_offsets, src_offsets,
4798 1.1.1.11 mrg dst_dimensions, src_dimensions,
4799 1.1.1.11 mrg dst_devicep, src_devicep,
4800 1.1.1.11 mrg &tmp_size, &tmp);
4801 1.1.1.11 mrg if (lock_src)
4802 1.1.1.11 mrg gomp_mutex_unlock (&src_devicep->lock);
4803 1.1.1.11 mrg if (lock_dst)
4804 1.1.1.11 mrg gomp_mutex_unlock (&dst_devicep->lock);
4805 1.1.1.11 mrg if (tmp)
4806 1.1.1.11 mrg free (tmp);
4807 1.1.1.11 mrg
4808 1.1.1.11 mrg return ret;
4809 1.1.1.11 mrg }
4810 1.1.1.11 mrg
4811 1.1.1.2 mrg int
4812 1.1.1.7 mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4813 1.1.1.2 mrg int num_dims, const size_t *volume,
4814 1.1.1.2 mrg const size_t *dst_offsets,
4815 1.1.1.2 mrg const size_t *src_offsets,
4816 1.1.1.2 mrg const size_t *dst_dimensions,
4817 1.1.1.2 mrg const size_t *src_dimensions,
4818 1.1.1.2 mrg int dst_device_num, int src_device_num)
4819 1.1.1.2 mrg {
4820 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4821 1.1.1.2 mrg
4822 1.1.1.11 mrg int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4823 1.1.1.11 mrg src_device_num, &dst_devicep,
4824 1.1.1.11 mrg &src_devicep);
4825 1.1.1.11 mrg
4826 1.1.1.11 mrg if (check)
4827 1.1.1.11 mrg return check;
4828 1.1.1.11 mrg
4829 1.1.1.11 mrg int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4830 1.1.1.11 mrg volume, dst_offsets, src_offsets,
4831 1.1.1.11 mrg dst_dimensions, src_dimensions,
4832 1.1.1.11 mrg dst_devicep, src_devicep);
4833 1.1.1.2 mrg
4834 1.1.1.11 mrg return ret;
4835 1.1.1.11 mrg }
4836 1.1.1.2 mrg
4837 1.1.1.11 mrg typedef struct
4838 1.1.1.11 mrg {
4839 1.1.1.11 mrg void *dst;
4840 1.1.1.11 mrg const void *src;
4841 1.1.1.11 mrg size_t element_size;
4842 1.1.1.11 mrg const size_t *volume;
4843 1.1.1.11 mrg const size_t *dst_offsets;
4844 1.1.1.11 mrg const size_t *src_offsets;
4845 1.1.1.11 mrg const size_t *dst_dimensions;
4846 1.1.1.11 mrg const size_t *src_dimensions;
4847 1.1.1.11 mrg struct gomp_device_descr *dst_devicep;
4848 1.1.1.11 mrg struct gomp_device_descr *src_devicep;
4849 1.1.1.11 mrg int num_dims;
4850 1.1.1.11 mrg } omp_target_memcpy_rect_data;
4851 1.1.1.2 mrg
4852 1.1.1.11 mrg static void
4853 1.1.1.11 mrg omp_target_memcpy_rect_async_helper (void *args)
4854 1.1.1.11 mrg {
4855 1.1.1.11 mrg omp_target_memcpy_rect_data *a = args;
4856 1.1.1.11 mrg int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4857 1.1.1.11 mrg a->num_dims, a->volume, a->dst_offsets,
4858 1.1.1.11 mrg a->src_offsets, a->dst_dimensions,
4859 1.1.1.11 mrg a->src_dimensions, a->dst_devicep,
4860 1.1.1.11 mrg a->src_devicep);
4861 1.1.1.11 mrg if (ret)
4862 1.1.1.11 mrg gomp_fatal ("omp_target_memcpy_rect failed");
4863 1.1.1.11 mrg }
4864 1.1.1.2 mrg
4865 1.1.1.11 mrg int
4866 1.1.1.11 mrg omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4867 1.1.1.11 mrg int num_dims, const size_t *volume,
4868 1.1.1.11 mrg const size_t *dst_offsets,
4869 1.1.1.11 mrg const size_t *src_offsets,
4870 1.1.1.11 mrg const size_t *dst_dimensions,
4871 1.1.1.11 mrg const size_t *src_dimensions,
4872 1.1.1.11 mrg int dst_device_num, int src_device_num,
4873 1.1.1.11 mrg int depobj_count, omp_depend_t *depobj_list)
4874 1.1.1.11 mrg {
4875 1.1.1.11 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4876 1.1.1.11 mrg unsigned flags = 0;
4877 1.1.1.11 mrg int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4878 1.1.1.11 mrg src_device_num, &dst_devicep,
4879 1.1.1.11 mrg &src_devicep);
4880 1.1.1.11 mrg void *depend[depobj_count + 5];
4881 1.1.1.11 mrg int i;
4882 1.1.1.2 mrg
4883 1.1.1.11 mrg omp_target_memcpy_rect_data s = {
4884 1.1.1.11 mrg .dst = dst,
4885 1.1.1.11 mrg .src = src,
4886 1.1.1.11 mrg .element_size = element_size,
4887 1.1.1.11 mrg .num_dims = num_dims,
4888 1.1.1.11 mrg .volume = volume,
4889 1.1.1.11 mrg .dst_offsets = dst_offsets,
4890 1.1.1.11 mrg .src_offsets = src_offsets,
4891 1.1.1.11 mrg .dst_dimensions = dst_dimensions,
4892 1.1.1.11 mrg .src_dimensions = src_dimensions,
4893 1.1.1.11 mrg .dst_devicep = dst_devicep,
4894 1.1.1.11 mrg .src_devicep = src_devicep
4895 1.1.1.11 mrg };
4896 1.1.1.11 mrg
4897 1.1.1.11 mrg if (check)
4898 1.1.1.11 mrg return check;
4899 1.1.1.11 mrg
4900 1.1.1.11 mrg if (depobj_count > 0 && depobj_list != NULL)
4901 1.1.1.11 mrg {
4902 1.1.1.11 mrg flags |= GOMP_TASK_FLAG_DEPEND;
4903 1.1.1.11 mrg depend[0] = 0;
4904 1.1.1.11 mrg depend[1] = (void *) (uintptr_t) depobj_count;
4905 1.1.1.11 mrg depend[2] = depend[3] = depend[4] = 0;
4906 1.1.1.11 mrg for (i = 0; i < depobj_count; ++i)
4907 1.1.1.11 mrg depend[i + 5] = &depobj_list[i];
4908 1.1.1.2 mrg }
4909 1.1.1.2 mrg
4910 1.1.1.11 mrg GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4911 1.1.1.11 mrg __alignof__ (s), true, flags, depend, 0, NULL);
4912 1.1.1.2 mrg
4913 1.1.1.11 mrg return 0;
4914 1.1.1.2 mrg }
4915 1.1.1.2 mrg
4916 1.1.1.2 mrg int
4917 1.1.1.7 mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4918 1.1.1.7 mrg size_t size, size_t device_offset, int device_num)
4919 1.1.1.2 mrg {
4920 1.1.1.11 mrg if (device_num == omp_initial_device
4921 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
4922 1.1.1.2 mrg return EINVAL;
4923 1.1.1.2 mrg
4924 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
4925 1.1.1.2 mrg if (devicep == NULL)
4926 1.1.1.2 mrg return EINVAL;
4927 1.1.1.2 mrg
4928 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4929 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4930 1.1.1.2 mrg return EINVAL;
4931 1.1.1.2 mrg
4932 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
4933 1.1.1.2 mrg
4934 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
4935 1.1.1.2 mrg struct splay_tree_key_s cur_node;
4936 1.1.1.2 mrg int ret = EINVAL;
4937 1.1.1.2 mrg
4938 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr;
4939 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size;
4940 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4941 1.1.1.2 mrg if (n)
4942 1.1.1.2 mrg {
4943 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset
4944 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset
4945 1.1.1.2 mrg && n->host_start <= cur_node.host_start
4946 1.1.1.2 mrg && n->host_end >= cur_node.host_end)
4947 1.1.1.2 mrg ret = 0;
4948 1.1.1.2 mrg }
4949 1.1.1.2 mrg else
4950 1.1.1.2 mrg {
4951 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4952 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array));
4953 1.1.1.2 mrg tgt->refcount = 1;
4954 1.1.1.2 mrg tgt->tgt_start = 0;
4955 1.1.1.2 mrg tgt->tgt_end = 0;
4956 1.1.1.2 mrg tgt->to_free = NULL;
4957 1.1.1.2 mrg tgt->prev = NULL;
4958 1.1.1.2 mrg tgt->list_count = 0;
4959 1.1.1.2 mrg tgt->device_descr = devicep;
4960 1.1.1.2 mrg splay_tree_node array = tgt->array;
4961 1.1.1.2 mrg splay_tree_key k = &array->key;
4962 1.1.1.2 mrg k->host_start = cur_node.host_start;
4963 1.1.1.2 mrg k->host_end = cur_node.host_end;
4964 1.1.1.2 mrg k->tgt = tgt;
4965 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4966 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
4967 1.1.1.7 mrg k->dynamic_refcount = 0;
4968 1.1.1.8 mrg k->aux = NULL;
4969 1.1.1.2 mrg array->left = NULL;
4970 1.1.1.2 mrg array->right = NULL;
4971 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array);
4972 1.1.1.2 mrg ret = 0;
4973 1.1.1.2 mrg }
4974 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
4975 1.1.1.2 mrg return ret;
4976 1.1.1.2 mrg }
4977 1.1.1.2 mrg
4978 1.1.1.2 mrg int
4979 1.1.1.7 mrg omp_target_disassociate_ptr (const void *ptr, int device_num)
4980 1.1.1.2 mrg {
4981 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
4982 1.1.1.2 mrg if (devicep == NULL)
4983 1.1.1.2 mrg return EINVAL;
4984 1.1.1.2 mrg
4985 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4986 1.1.1.2 mrg return EINVAL;
4987 1.1.1.2 mrg
4988 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
4989 1.1.1.2 mrg
4990 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
4991 1.1.1.2 mrg struct splay_tree_key_s cur_node;
4992 1.1.1.2 mrg int ret = EINVAL;
4993 1.1.1.2 mrg
4994 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
4995 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
4996 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4997 1.1.1.2 mrg if (n
4998 1.1.1.2 mrg && n->host_start == cur_node.host_start
4999 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY
5000 1.1.1.2 mrg && n->tgt->tgt_start == 0
5001 1.1.1.2 mrg && n->tgt->to_free == NULL
5002 1.1.1.2 mrg && n->tgt->refcount == 1
5003 1.1.1.2 mrg && n->tgt->list_count == 0)
5004 1.1.1.2 mrg {
5005 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
5006 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
5007 1.1.1.2 mrg ret = 0;
5008 1.1.1.2 mrg }
5009 1.1.1.2 mrg
5010 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
5011 1.1.1.2 mrg return ret;
5012 1.1.1.2 mrg }
5013 1.1.1.2 mrg
5014 1.1.1.11 mrg void *
5015 1.1.1.11 mrg omp_get_mapped_ptr (const void *ptr, int device_num)
5016 1.1.1.11 mrg {
5017 1.1.1.11 mrg if (device_num == omp_initial_device
5018 1.1.1.11 mrg || device_num == omp_get_initial_device ())
5019 1.1.1.11 mrg return (void *) ptr;
5020 1.1.1.11 mrg
5021 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
5022 1.1.1.11 mrg if (devicep == NULL)
5023 1.1.1.11 mrg return NULL;
5024 1.1.1.11 mrg
5025 1.1.1.11 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5026 1.1.1.11 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5027 1.1.1.11 mrg return (void *) ptr;
5028 1.1.1.11 mrg
5029 1.1.1.11 mrg gomp_mutex_lock (&devicep->lock);
5030 1.1.1.11 mrg
5031 1.1.1.11 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
5032 1.1.1.11 mrg struct splay_tree_key_s cur_node;
5033 1.1.1.11 mrg void *ret = NULL;
5034 1.1.1.11 mrg
5035 1.1.1.11 mrg cur_node.host_start = (uintptr_t) ptr;
5036 1.1.1.11 mrg cur_node.host_end = cur_node.host_start;
5037 1.1.1.11 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5038 1.1.1.11 mrg
5039 1.1.1.11 mrg if (n)
5040 1.1.1.11 mrg {
5041 1.1.1.11 mrg uintptr_t offset = cur_node.host_start - n->host_start;
5042 1.1.1.11 mrg ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5043 1.1.1.11 mrg }
5044 1.1.1.11 mrg
5045 1.1.1.11 mrg gomp_mutex_unlock (&devicep->lock);
5046 1.1.1.11 mrg
5047 1.1.1.11 mrg return ret;
5048 1.1.1.11 mrg }
5049 1.1.1.11 mrg
5050 1.1.1.11 mrg int
5051 1.1.1.11 mrg omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5052 1.1.1.11 mrg {
5053 1.1.1.11 mrg if (device_num == omp_initial_device
5054 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
5055 1.1.1.11 mrg return true;
5056 1.1.1.11 mrg
5057 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
5058 1.1.1.11 mrg if (devicep == NULL)
5059 1.1.1.11 mrg return false;
5060 1.1.1.11 mrg
5061 1.1.1.11 mrg /* TODO: Unified shared memory must be handled when available. */
5062 1.1.1.11 mrg
5063 1.1.1.11 mrg return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
5064 1.1.1.11 mrg }
5065 1.1.1.11 mrg
5066 1.1.1.7 mrg int
5067 1.1.1.7 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num)
5068 1.1.1.7 mrg {
5069 1.1.1.7 mrg (void) kind;
5070 1.1.1.11 mrg if (device_num == omp_initial_device
5071 1.1.1.11 mrg || device_num == gomp_get_num_devices ())
5072 1.1.1.7 mrg return gomp_pause_host ();
5073 1.1.1.11 mrg
5074 1.1.1.11 mrg struct gomp_device_descr *devicep = resolve_device (device_num, false);
5075 1.1.1.11 mrg if (devicep == NULL)
5076 1.1.1.7 mrg return -1;
5077 1.1.1.11 mrg
5078 1.1.1.7 mrg /* Do nothing for target devices for now. */
5079 1.1.1.7 mrg return 0;
5080 1.1.1.7 mrg }
5081 1.1.1.7 mrg
5082 1.1.1.7 mrg int
5083 1.1.1.7 mrg omp_pause_resource_all (omp_pause_resource_t kind)
5084 1.1.1.7 mrg {
5085 1.1.1.7 mrg (void) kind;
5086 1.1.1.7 mrg if (gomp_pause_host ())
5087 1.1.1.7 mrg return -1;
5088 1.1.1.7 mrg /* Do nothing for target devices for now. */
5089 1.1.1.7 mrg return 0;
5090 1.1.1.7 mrg }
5091 1.1.1.7 mrg
5092 1.1.1.7 mrg ialias (omp_pause_resource)
5093 1.1.1.7 mrg ialias (omp_pause_resource_all)
5094 1.1.1.7 mrg
5095 1.1 mrg #ifdef PLUGIN_SUPPORT
5096 1.1 mrg
5097 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5098 1.1 mrg in PLUGIN_NAME.
5099 1.1 mrg The handles of the found functions are stored in the corresponding fields
5100 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5101 1.1 mrg
5102 1.1 mrg static bool
5103 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
5104 1.1 mrg const char *plugin_name)
5105 1.1 mrg {
5106 1.1 mrg const char *err = NULL, *last_missing = NULL;
5107 1.1 mrg
5108 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5109 1.1 mrg if (!plugin_handle)
5110 1.1.1.10 mrg #if OFFLOAD_DEFAULTED
5111 1.1.1.10 mrg return 0;
5112 1.1.1.10 mrg #else
5113 1.1.1.2 mrg goto dl_fail;
5114 1.1.1.10 mrg #endif
5115 1.1 mrg
5116 1.1 mrg /* Check if all required functions are available in the plugin and store
5117 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL,
5118 1.1.1.2 mrg so we don't need to check dlerror all the time. */
5119 1.1 mrg #define DLSYM(f) \
5120 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5121 1.1.1.2 mrg goto dl_fail
5122 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if
5123 1.1.1.2 mrg failed, true otherwise. */
5124 1.1.1.2 mrg #define DLSYM_OPT(f, n) \
5125 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5126 1.1.1.2 mrg || (last_missing = #n, 0))
5127 1.1.1.2 mrg
5128 1.1.1.2 mrg DLSYM (version);
5129 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION)
5130 1.1.1.2 mrg {
5131 1.1.1.2 mrg err = "plugin version mismatch";
5132 1.1.1.2 mrg goto fail;
5133 1.1.1.2 mrg }
5134 1.1 mrg
5135 1.1 mrg DLSYM (get_name);
5136 1.1 mrg DLSYM (get_caps);
5137 1.1 mrg DLSYM (get_type);
5138 1.1 mrg DLSYM (get_num_devices);
5139 1.1 mrg DLSYM (init_device);
5140 1.1 mrg DLSYM (fini_device);
5141 1.1 mrg DLSYM (load_image);
5142 1.1 mrg DLSYM (unload_image);
5143 1.1 mrg DLSYM (alloc);
5144 1.1 mrg DLSYM (free);
5145 1.1 mrg DLSYM (dev2host);
5146 1.1 mrg DLSYM (host2dev);
5147 1.1.1.11 mrg DLSYM_OPT (memcpy2d, memcpy2d);
5148 1.1.1.11 mrg DLSYM_OPT (memcpy3d, memcpy3d);
5149 1.1 mrg device->capabilities = device->get_caps_func ();
5150 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5151 1.1.1.2 mrg {
5152 1.1.1.2 mrg DLSYM (run);
5153 1.1.1.8 mrg DLSYM_OPT (async_run, async_run);
5154 1.1.1.2 mrg DLSYM_OPT (can_run, can_run);
5155 1.1.1.2 mrg DLSYM (dev2dev);
5156 1.1.1.2 mrg }
5157 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5158 1.1 mrg {
5159 1.1.1.3 mrg if (!DLSYM_OPT (openacc.exec, openacc_exec)
5160 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data,
5161 1.1.1.2 mrg openacc_create_thread_data)
5162 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data,
5163 1.1.1.8 mrg openacc_destroy_thread_data)
5164 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5165 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5166 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5167 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5168 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5169 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.queue_callback,
5170 1.1.1.8 mrg openacc_async_queue_callback)
5171 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5172 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5173 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5174 1.1.1.8 mrg || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5175 1.1 mrg {
5176 1.1.1.2 mrg /* Require all the OpenACC handlers if we have
5177 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
5178 1.1 mrg err = "plugin missing OpenACC handler function";
5179 1.1.1.2 mrg goto fail;
5180 1.1 mrg }
5181 1.1.1.2 mrg
5182 1.1.1.2 mrg unsigned cuda = 0;
5183 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5184 1.1.1.3 mrg openacc_cuda_get_current_device);
5185 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5186 1.1.1.3 mrg openacc_cuda_get_current_context);
5187 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5188 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5189 1.1.1.2 mrg if (cuda && cuda != 4)
5190 1.1 mrg {
5191 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */
5192 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
5193 1.1.1.2 mrg goto fail;
5194 1.1 mrg }
5195 1.1 mrg }
5196 1.1 mrg #undef DLSYM
5197 1.1 mrg #undef DLSYM_OPT
5198 1.1 mrg
5199 1.1.1.2 mrg return 1;
5200 1.1.1.2 mrg
5201 1.1.1.2 mrg dl_fail:
5202 1.1.1.2 mrg err = dlerror ();
5203 1.1.1.2 mrg fail:
5204 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err);
5205 1.1.1.2 mrg if (last_missing)
5206 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing);
5207 1.1.1.2 mrg if (plugin_handle)
5208 1.1.1.2 mrg dlclose (plugin_handle);
5209 1.1.1.2 mrg
5210 1.1.1.2 mrg return 0;
5211 1.1.1.2 mrg }
5212 1.1.1.2 mrg
5213 1.1.1.2 mrg /* This function finalizes all initialized devices. */
5214 1.1.1.2 mrg
5215 1.1.1.2 mrg static void
5216 1.1.1.2 mrg gomp_target_fini (void)
5217 1.1.1.2 mrg {
5218 1.1.1.2 mrg int i;
5219 1.1.1.2 mrg for (i = 0; i < num_devices; i++)
5220 1.1 mrg {
5221 1.1.1.3 mrg bool ret = true;
5222 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i];
5223 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
5224 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
5225 1.1.1.8 mrg ret = gomp_fini_device (devicep);
5226 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
5227 1.1.1.3 mrg if (!ret)
5228 1.1.1.3 mrg gomp_fatal ("device finalization failed");
5229 1.1 mrg }
5230 1.1 mrg }
5231 1.1 mrg
5232 1.1.1.7 mrg /* This function initializes the runtime for offloading.
5233 1.1.1.7 mrg It parses the list of offload plugins, and tries to load these.
5234 1.1.1.7 mrg On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5235 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
5236 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5237 1.1 mrg by the others. */
5238 1.1 mrg
5239 1.1 mrg static void
5240 1.1 mrg gomp_target_init (void)
5241 1.1 mrg {
5242 1.1 mrg const char *prefix ="libgomp-plugin-";
5243 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
5244 1.1 mrg const char *cur, *next;
5245 1.1 mrg char *plugin_name;
5246 1.1.1.10 mrg int i, new_num_devs;
5247 1.1.1.10 mrg int num_devs = 0, num_devs_openmp;
5248 1.1.1.10 mrg struct gomp_device_descr *devs = NULL;
5249 1.1 mrg
5250 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5251 1.1.1.10 mrg return;
5252 1.1 mrg
5253 1.1.1.7 mrg cur = OFFLOAD_PLUGINS;
5254 1.1 mrg if (*cur)
5255 1.1 mrg do
5256 1.1 mrg {
5257 1.1 mrg struct gomp_device_descr current_device;
5258 1.1.1.6 mrg size_t prefix_len, suffix_len, cur_len;
5259 1.1 mrg
5260 1.1 mrg next = strchr (cur, ',');
5261 1.1 mrg
5262 1.1.1.6 mrg prefix_len = strlen (prefix);
5263 1.1.1.6 mrg cur_len = next ? next - cur : strlen (cur);
5264 1.1.1.6 mrg suffix_len = strlen (suffix);
5265 1.1.1.6 mrg
5266 1.1.1.6 mrg plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5267 1.1 mrg if (!plugin_name)
5268 1.1 mrg {
5269 1.1.1.10 mrg num_devs = 0;
5270 1.1 mrg break;
5271 1.1 mrg }
5272 1.1 mrg
5273 1.1.1.6 mrg memcpy (plugin_name, prefix, prefix_len);
5274 1.1.1.6 mrg memcpy (plugin_name + prefix_len, cur, cur_len);
5275 1.1.1.6 mrg memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5276 1.1 mrg
5277 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
5278 1.1 mrg {
5279 1.1.1.11 mrg int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5280 1.1.1.11 mrg new_num_devs = current_device.get_num_devices_func (omp_req);
5281 1.1.1.11 mrg if (gomp_debug_var > 0 && new_num_devs < 0)
5282 1.1.1.11 mrg {
5283 1.1.1.11 mrg bool found = false;
5284 1.1.1.11 mrg int type = current_device.get_type_func ();
5285 1.1.1.11 mrg for (int img = 0; img < num_offload_images; img++)
5286 1.1.1.11 mrg if (type == offload_images[img].type)
5287 1.1.1.11 mrg found = true;
5288 1.1.1.11 mrg if (found)
5289 1.1.1.11 mrg {
5290 1.1.1.11 mrg char buf[sizeof ("unified_address, unified_shared_memory, "
5291 1.1.1.11 mrg "reverse_offload")];
5292 1.1.1.11 mrg gomp_requires_to_name (buf, sizeof (buf), omp_req);
5293 1.1.1.11 mrg char *name = (char *) malloc (cur_len + 1);
5294 1.1.1.11 mrg memcpy (name, cur, cur_len);
5295 1.1.1.11 mrg name[cur_len] = '\0';
5296 1.1.1.11 mrg gomp_debug (1,
5297 1.1.1.11 mrg "%s devices present but 'omp requires %s' "
5298 1.1.1.11 mrg "cannot be fulfilled\n", name, buf);
5299 1.1.1.11 mrg free (name);
5300 1.1.1.11 mrg }
5301 1.1.1.11 mrg }
5302 1.1.1.11 mrg else if (new_num_devs >= 1)
5303 1.1 mrg {
5304 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
5305 1.1 mrg
5306 1.1.1.10 mrg devs = realloc (devs, (num_devs + new_num_devs)
5307 1.1.1.10 mrg * sizeof (struct gomp_device_descr));
5308 1.1.1.10 mrg if (!devs)
5309 1.1 mrg {
5310 1.1.1.10 mrg num_devs = 0;
5311 1.1 mrg free (plugin_name);
5312 1.1 mrg break;
5313 1.1 mrg }
5314 1.1 mrg
5315 1.1 mrg current_device.name = current_device.get_name_func ();
5316 1.1 mrg /* current_device.capabilities has already been set. */
5317 1.1 mrg current_device.type = current_device.get_type_func ();
5318 1.1 mrg current_device.mem_map.root = NULL;
5319 1.1.1.11 mrg current_device.mem_map_rev.root = NULL;
5320 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED;
5321 1.1.1.10 mrg for (i = 0; i < new_num_devs; i++)
5322 1.1 mrg {
5323 1.1 mrg current_device.target_id = i;
5324 1.1.1.10 mrg devs[num_devs] = current_device;
5325 1.1.1.10 mrg gomp_mutex_init (&devs[num_devs].lock);
5326 1.1.1.10 mrg num_devs++;
5327 1.1 mrg }
5328 1.1 mrg }
5329 1.1 mrg }
5330 1.1 mrg
5331 1.1 mrg free (plugin_name);
5332 1.1 mrg cur = next + 1;
5333 1.1 mrg }
5334 1.1 mrg while (next);
5335 1.1 mrg
5336 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5337 1.1 mrg NUM_DEVICES_OPENMP. */
5338 1.1.1.10 mrg struct gomp_device_descr *devs_s
5339 1.1.1.10 mrg = malloc (num_devs * sizeof (struct gomp_device_descr));
5340 1.1.1.10 mrg if (!devs_s)
5341 1.1.1.10 mrg {
5342 1.1.1.10 mrg num_devs = 0;
5343 1.1.1.10 mrg free (devs);
5344 1.1.1.10 mrg devs = NULL;
5345 1.1.1.10 mrg }
5346 1.1.1.10 mrg num_devs_openmp = 0;
5347 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
5348 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5349 1.1.1.10 mrg devs_s[num_devs_openmp++] = devs[i];
5350 1.1.1.10 mrg int num_devs_after_openmp = num_devs_openmp;
5351 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
5352 1.1.1.10 mrg if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5353 1.1.1.10 mrg devs_s[num_devs_after_openmp++] = devs[i];
5354 1.1.1.10 mrg free (devs);
5355 1.1.1.10 mrg devs = devs_s;
5356 1.1 mrg
5357 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
5358 1.1 mrg {
5359 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
5360 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
5361 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
5362 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5363 1.1.1.10 mrg goacc_register (&devs[i]);
5364 1.1 mrg }
5365 1.1.1.11 mrg if (gomp_global_icv.default_device_var == INT_MIN)
5366 1.1.1.11 mrg {
5367 1.1.1.11 mrg /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5368 1.1.1.11 mrg struct gomp_icv_list *none;
5369 1.1.1.11 mrg none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5370 1.1.1.11 mrg gomp_global_icv.default_device_var = (num_devs_openmp
5371 1.1.1.11 mrg ? 0 : omp_invalid_device);
5372 1.1.1.11 mrg none->icvs.default_device_var = gomp_global_icv.default_device_var;
5373 1.1.1.11 mrg }
5374 1.1.1.2 mrg
5375 1.1.1.10 mrg num_devices = num_devs;
5376 1.1.1.10 mrg num_devices_openmp = num_devs_openmp;
5377 1.1.1.10 mrg devices = devs;
5378 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0)
5379 1.1.1.2 mrg gomp_fatal ("atexit failed");
5380 1.1 mrg }
5381 1.1 mrg
5382 1.1 mrg #else /* PLUGIN_SUPPORT */
5383 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
5384 1.1 mrg GOMP_target* routines are just stubs for this case. */
5385 1.1 mrg static void
5386 1.1 mrg gomp_target_init (void)
5387 1.1 mrg {
5388 1.1 mrg }
5389 1.1 mrg #endif /* PLUGIN_SUPPORT */
5390