target.c revision 1.1.1.10 1 1.1.1.10 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "libgomp.h"
29 1.1 mrg #include "oacc-plugin.h"
30 1.1 mrg #include "oacc-int.h"
31 1.1 mrg #include "gomp-constants.h"
32 1.1 mrg #include <limits.h>
33 1.1 mrg #include <stdbool.h>
34 1.1 mrg #include <stdlib.h>
35 1.1 mrg #ifdef HAVE_INTTYPES_H
36 1.1 mrg # include <inttypes.h> /* For PRIu64. */
37 1.1 mrg #endif
38 1.1 mrg #include <string.h>
39 1.1 mrg #include <assert.h>
40 1.1.1.2 mrg #include <errno.h>
41 1.1 mrg
42 1.1 mrg #ifdef PLUGIN_SUPPORT
43 1.1 mrg #include <dlfcn.h>
44 1.1 mrg #include "plugin-suffix.h"
45 1.1 mrg #endif
46 1.1 mrg
47 1.1.1.10 mrg typedef uintptr_t *hash_entry_type;
48 1.1.1.10 mrg static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49 1.1.1.10 mrg static inline void htab_free (void *ptr) { free (ptr); }
50 1.1.1.10 mrg #include "hashtab.h"
51 1.1.1.10 mrg
52 1.1.1.10 mrg static inline hashval_t
53 1.1.1.10 mrg htab_hash (hash_entry_type element)
54 1.1.1.10 mrg {
55 1.1.1.10 mrg return hash_pointer ((void *) element);
56 1.1.1.10 mrg }
57 1.1.1.10 mrg
58 1.1.1.10 mrg static inline bool
59 1.1.1.10 mrg htab_eq (hash_entry_type x, hash_entry_type y)
60 1.1.1.10 mrg {
61 1.1.1.10 mrg return x == y;
62 1.1.1.10 mrg }
63 1.1.1.10 mrg
64 1.1.1.8 mrg #define FIELD_TGT_EMPTY (~(size_t) 0)
65 1.1.1.8 mrg
66 1.1 mrg static void gomp_target_init (void);
67 1.1 mrg
68 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
69 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
70 1.1 mrg
71 1.1 mrg /* Mutex for offload image registration. */
72 1.1 mrg static gomp_mutex_t register_lock;
73 1.1 mrg
74 1.1 mrg /* This structure describes an offload image.
75 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
76 1.1 mrg pointer to target data. */
77 1.1 mrg struct offload_image_descr {
78 1.1.1.2 mrg unsigned version;
79 1.1 mrg enum offload_target_type type;
80 1.1.1.2 mrg const void *host_table;
81 1.1.1.2 mrg const void *target_data;
82 1.1 mrg };
83 1.1 mrg
84 1.1 mrg /* Array of descriptors of offload images. */
85 1.1 mrg static struct offload_image_descr *offload_images;
86 1.1 mrg
87 1.1 mrg /* Total number of offload images. */
88 1.1 mrg static int num_offload_images;
89 1.1 mrg
90 1.1 mrg /* Array of descriptors for all available devices. */
91 1.1 mrg static struct gomp_device_descr *devices;
92 1.1 mrg
93 1.1 mrg /* Total number of available devices. */
94 1.1 mrg static int num_devices;
95 1.1 mrg
96 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97 1.1 mrg static int num_devices_openmp;
98 1.1 mrg
99 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
100 1.1 mrg
101 1.1 mrg static void *
102 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
103 1.1 mrg {
104 1.1 mrg void *ret = realloc (old, size);
105 1.1 mrg if (ret == NULL)
106 1.1 mrg {
107 1.1 mrg gomp_mutex_unlock (®ister_lock);
108 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
109 1.1 mrg }
110 1.1 mrg return ret;
111 1.1 mrg }
112 1.1 mrg
113 1.1 mrg attribute_hidden void
114 1.1 mrg gomp_init_targets_once (void)
115 1.1 mrg {
116 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 1.1 mrg }
118 1.1 mrg
119 1.1 mrg attribute_hidden int
120 1.1 mrg gomp_get_num_devices (void)
121 1.1 mrg {
122 1.1 mrg gomp_init_targets_once ();
123 1.1 mrg return num_devices_openmp;
124 1.1 mrg }
125 1.1 mrg
126 1.1 mrg static struct gomp_device_descr *
127 1.1 mrg resolve_device (int device_id)
128 1.1 mrg {
129 1.1 mrg if (device_id == GOMP_DEVICE_ICV)
130 1.1 mrg {
131 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
132 1.1 mrg device_id = icv->default_device_var;
133 1.1 mrg }
134 1.1 mrg
135 1.1 mrg if (device_id < 0 || device_id >= gomp_get_num_devices ())
136 1.1.1.10 mrg {
137 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138 1.1.1.10 mrg && device_id != GOMP_DEVICE_HOST_FALLBACK
139 1.1.1.10 mrg && device_id != num_devices_openmp)
140 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 1.1.1.10 mrg "but device not found");
142 1.1.1.10 mrg
143 1.1.1.10 mrg return NULL;
144 1.1.1.10 mrg }
145 1.1 mrg
146 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock);
147 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148 1.1.1.2 mrg gomp_init_device (&devices[device_id]);
149 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
150 1.1.1.2 mrg {
151 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
152 1.1.1.10 mrg
153 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 1.1.1.10 mrg "but device is finalized");
156 1.1.1.10 mrg
157 1.1.1.2 mrg return NULL;
158 1.1.1.2 mrg }
159 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
160 1.1.1.2 mrg
161 1.1 mrg return &devices[device_id];
162 1.1 mrg }
163 1.1 mrg
164 1.1 mrg
165 1.1.1.2 mrg static inline splay_tree_key
166 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
167 1.1.1.2 mrg {
168 1.1.1.2 mrg if (key->host_start != key->host_end)
169 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
170 1.1.1.2 mrg
171 1.1.1.2 mrg key->host_end++;
172 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
173 1.1.1.2 mrg key->host_end--;
174 1.1.1.2 mrg if (n)
175 1.1.1.2 mrg return n;
176 1.1.1.2 mrg key->host_start--;
177 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key);
178 1.1.1.2 mrg key->host_start++;
179 1.1.1.2 mrg if (n)
180 1.1.1.2 mrg return n;
181 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
182 1.1.1.2 mrg }
183 1.1.1.2 mrg
184 1.1.1.2 mrg static inline splay_tree_key
185 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
186 1.1.1.2 mrg {
187 1.1.1.2 mrg if (key->host_start != key->host_end)
188 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
189 1.1.1.2 mrg
190 1.1.1.2 mrg key->host_end++;
191 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
192 1.1.1.2 mrg key->host_end--;
193 1.1.1.2 mrg return n;
194 1.1.1.2 mrg }
195 1.1.1.2 mrg
196 1.1.1.3 mrg static inline void
197 1.1.1.3 mrg gomp_device_copy (struct gomp_device_descr *devicep,
198 1.1.1.3 mrg bool (*copy_func) (int, void *, const void *, size_t),
199 1.1.1.3 mrg const char *dst, void *dstaddr,
200 1.1.1.3 mrg const char *src, const void *srcaddr,
201 1.1.1.3 mrg size_t size)
202 1.1.1.3 mrg {
203 1.1.1.3 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
204 1.1.1.3 mrg {
205 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
206 1.1.1.3 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 1.1.1.3 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
208 1.1.1.3 mrg }
209 1.1.1.3 mrg }
210 1.1.1.3 mrg
211 1.1.1.8 mrg static inline void
212 1.1.1.8 mrg goacc_device_copy_async (struct gomp_device_descr *devicep,
213 1.1.1.8 mrg bool (*copy_func) (int, void *, const void *, size_t,
214 1.1.1.8 mrg struct goacc_asyncqueue *),
215 1.1.1.8 mrg const char *dst, void *dstaddr,
216 1.1.1.8 mrg const char *src, const void *srcaddr,
217 1.1.1.10 mrg const void *srcaddr_orig,
218 1.1.1.8 mrg size_t size, struct goacc_asyncqueue *aq)
219 1.1.1.8 mrg {
220 1.1.1.8 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
221 1.1.1.8 mrg {
222 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
223 1.1.1.10 mrg if (srcaddr_orig && srcaddr_orig != srcaddr)
224 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)"
225 1.1.1.10 mrg " via buffer %s object [%p..%p)"
226 1.1.1.10 mrg " to %s object [%p..%p) failed",
227 1.1.1.10 mrg src, srcaddr_orig, srcaddr_orig + size,
228 1.1.1.10 mrg src, srcaddr, srcaddr + size,
229 1.1.1.10 mrg dst, dstaddr, dstaddr + size);
230 1.1.1.10 mrg else
231 1.1.1.10 mrg gomp_fatal ("Copying of %s object [%p..%p)"
232 1.1.1.10 mrg " to %s object [%p..%p) failed",
233 1.1.1.10 mrg src, srcaddr, srcaddr + size,
234 1.1.1.10 mrg dst, dstaddr, dstaddr + size);
235 1.1.1.8 mrg }
236 1.1.1.8 mrg }
237 1.1.1.8 mrg
238 1.1.1.6 mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 1.1.1.6 mrg host to device memory transfers. */
240 1.1.1.6 mrg
241 1.1.1.7 mrg struct gomp_coalesce_chunk
242 1.1.1.7 mrg {
243 1.1.1.7 mrg /* The starting and ending point of a coalesced chunk of memory. */
244 1.1.1.7 mrg size_t start, end;
245 1.1.1.7 mrg };
246 1.1.1.7 mrg
247 1.1.1.6 mrg struct gomp_coalesce_buf
248 1.1.1.6 mrg {
249 1.1.1.6 mrg /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 1.1.1.6 mrg it will be copied to the device. */
251 1.1.1.6 mrg void *buf;
252 1.1.1.6 mrg struct target_mem_desc *tgt;
253 1.1.1.7 mrg /* Array with offsets, chunks[i].start is the starting offset and
254 1.1.1.7 mrg chunks[i].end ending offset relative to tgt->tgt_start device address
255 1.1.1.6 mrg of chunks which are to be copied to buf and later copied to device. */
256 1.1.1.7 mrg struct gomp_coalesce_chunk *chunks;
257 1.1.1.6 mrg /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258 1.1.1.6 mrg be performed. */
259 1.1.1.6 mrg long chunk_cnt;
260 1.1.1.6 mrg /* During construction of chunks array, how many memory regions are within
261 1.1.1.6 mrg the last chunk. If there is just one memory region for a chunk, we copy
262 1.1.1.6 mrg it directly to device rather than going through buf. */
263 1.1.1.6 mrg long use_cnt;
264 1.1.1.6 mrg };
265 1.1.1.6 mrg
266 1.1.1.6 mrg /* Maximum size of memory region considered for coalescing. Larger copies
267 1.1.1.6 mrg are performed directly. */
268 1.1.1.6 mrg #define MAX_COALESCE_BUF_SIZE (32 * 1024)
269 1.1.1.6 mrg
270 1.1.1.6 mrg /* Maximum size of a gap in between regions to consider them being copied
271 1.1.1.6 mrg within the same chunk. All the device offsets considered are within
272 1.1.1.6 mrg newly allocated device memory, so it isn't fatal if we copy some padding
273 1.1.1.6 mrg in between from host to device. The gaps come either from alignment
274 1.1.1.6 mrg padding or from memory regions which are not supposed to be copied from
275 1.1.1.6 mrg host to device (e.g. map(alloc:), map(from:) etc.). */
276 1.1.1.6 mrg #define MAX_COALESCE_BUF_GAP (4 * 1024)
277 1.1.1.6 mrg
278 1.1.1.10 mrg /* Add region with device tgt_start relative offset and length to CBUF.
279 1.1.1.10 mrg
280 1.1.1.10 mrg This must not be used for asynchronous copies, because the host data might
281 1.1.1.10 mrg not be computed yet (by an earlier asynchronous compute region, for
282 1.1.1.10 mrg example).
283 1.1.1.10 mrg TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 1.1.1.10 mrg is it more performant to use libgomp CBUF buffering or individual device
285 1.1.1.10 mrg asyncronous copying?) */
286 1.1.1.6 mrg
287 1.1.1.6 mrg static inline void
288 1.1.1.6 mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
289 1.1.1.6 mrg {
290 1.1.1.6 mrg if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291 1.1.1.6 mrg return;
292 1.1.1.6 mrg if (cbuf->chunk_cnt)
293 1.1.1.6 mrg {
294 1.1.1.6 mrg if (cbuf->chunk_cnt < 0)
295 1.1.1.6 mrg return;
296 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
297 1.1.1.6 mrg {
298 1.1.1.6 mrg cbuf->chunk_cnt = -1;
299 1.1.1.6 mrg return;
300 1.1.1.6 mrg }
301 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
302 1.1.1.6 mrg {
303 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304 1.1.1.6 mrg cbuf->use_cnt++;
305 1.1.1.6 mrg return;
306 1.1.1.6 mrg }
307 1.1.1.6 mrg /* If the last chunk is only used by one mapping, discard it,
308 1.1.1.6 mrg as it will be one host to device copy anyway and
309 1.1.1.6 mrg memcpying it around will only waste cycles. */
310 1.1.1.6 mrg if (cbuf->use_cnt == 1)
311 1.1.1.6 mrg cbuf->chunk_cnt--;
312 1.1.1.6 mrg }
313 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].start = start;
314 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315 1.1.1.6 mrg cbuf->chunk_cnt++;
316 1.1.1.6 mrg cbuf->use_cnt = 1;
317 1.1.1.6 mrg }
318 1.1.1.6 mrg
319 1.1.1.6 mrg /* Return true for mapping kinds which need to copy data from the
320 1.1.1.6 mrg host to device for regions that weren't previously mapped. */
321 1.1.1.6 mrg
322 1.1.1.6 mrg static inline bool
323 1.1.1.6 mrg gomp_to_device_kind_p (int kind)
324 1.1.1.6 mrg {
325 1.1.1.6 mrg switch (kind)
326 1.1.1.6 mrg {
327 1.1.1.6 mrg case GOMP_MAP_ALLOC:
328 1.1.1.6 mrg case GOMP_MAP_FROM:
329 1.1.1.6 mrg case GOMP_MAP_FORCE_ALLOC:
330 1.1.1.8 mrg case GOMP_MAP_FORCE_FROM:
331 1.1.1.6 mrg case GOMP_MAP_ALWAYS_FROM:
332 1.1.1.6 mrg return false;
333 1.1.1.6 mrg default:
334 1.1.1.6 mrg return true;
335 1.1.1.6 mrg }
336 1.1.1.6 mrg }
337 1.1.1.6 mrg
338 1.1.1.10 mrg /* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 1.1.1.10 mrg non-NULL), when the source data is stack or may otherwise be deallocated
340 1.1.1.10 mrg before the asynchronous copy takes place, EPHEMERAL must be passed as
341 1.1.1.10 mrg TRUE. */
342 1.1.1.10 mrg
343 1.1.1.8 mrg attribute_hidden void
344 1.1.1.3 mrg gomp_copy_host2dev (struct gomp_device_descr *devicep,
345 1.1.1.8 mrg struct goacc_asyncqueue *aq,
346 1.1.1.6 mrg void *d, const void *h, size_t sz,
347 1.1.1.10 mrg bool ephemeral, struct gomp_coalesce_buf *cbuf)
348 1.1.1.3 mrg {
349 1.1.1.10 mrg if (__builtin_expect (aq != NULL, 0))
350 1.1.1.10 mrg {
351 1.1.1.10 mrg /* See 'gomp_coalesce_buf_add'. */
352 1.1.1.10 mrg assert (!cbuf);
353 1.1.1.10 mrg
354 1.1.1.10 mrg void *h_buf = (void *) h;
355 1.1.1.10 mrg if (ephemeral)
356 1.1.1.10 mrg {
357 1.1.1.10 mrg /* We're queueing up an asynchronous copy from data that may
358 1.1.1.10 mrg disappear before the transfer takes place (i.e. because it is a
359 1.1.1.10 mrg stack local in a function that is no longer executing). Make a
360 1.1.1.10 mrg copy of the data into a temporary buffer in those cases. */
361 1.1.1.10 mrg h_buf = gomp_malloc (sz);
362 1.1.1.10 mrg memcpy (h_buf, h, sz);
363 1.1.1.10 mrg }
364 1.1.1.10 mrg goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 1.1.1.10 mrg "dev", d, "host", h_buf, h, sz, aq);
366 1.1.1.10 mrg if (ephemeral)
367 1.1.1.10 mrg /* Free temporary buffer once the transfer has completed. */
368 1.1.1.10 mrg devicep->openacc.async.queue_callback_func (aq, free, h_buf);
369 1.1.1.10 mrg
370 1.1.1.10 mrg return;
371 1.1.1.10 mrg }
372 1.1.1.10 mrg
373 1.1.1.6 mrg if (cbuf)
374 1.1.1.6 mrg {
375 1.1.1.6 mrg uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376 1.1.1.7 mrg if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
377 1.1.1.6 mrg {
378 1.1.1.6 mrg long first = 0;
379 1.1.1.6 mrg long last = cbuf->chunk_cnt - 1;
380 1.1.1.6 mrg while (first <= last)
381 1.1.1.6 mrg {
382 1.1.1.6 mrg long middle = (first + last) >> 1;
383 1.1.1.7 mrg if (cbuf->chunks[middle].end <= doff)
384 1.1.1.6 mrg first = middle + 1;
385 1.1.1.7 mrg else if (cbuf->chunks[middle].start <= doff)
386 1.1.1.6 mrg {
387 1.1.1.7 mrg if (doff + sz > cbuf->chunks[middle].end)
388 1.1.1.10 mrg {
389 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
390 1.1.1.10 mrg gomp_fatal ("internal libgomp cbuf error");
391 1.1.1.10 mrg }
392 1.1.1.7 mrg memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393 1.1.1.6 mrg h, sz);
394 1.1.1.6 mrg return;
395 1.1.1.6 mrg }
396 1.1.1.6 mrg else
397 1.1.1.6 mrg last = middle - 1;
398 1.1.1.6 mrg }
399 1.1.1.6 mrg }
400 1.1.1.6 mrg }
401 1.1.1.10 mrg
402 1.1.1.10 mrg gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
403 1.1.1.3 mrg }
404 1.1.1.3 mrg
405 1.1.1.8 mrg attribute_hidden void
406 1.1.1.3 mrg gomp_copy_dev2host (struct gomp_device_descr *devicep,
407 1.1.1.8 mrg struct goacc_asyncqueue *aq,
408 1.1.1.3 mrg void *h, const void *d, size_t sz)
409 1.1.1.3 mrg {
410 1.1.1.8 mrg if (__builtin_expect (aq != NULL, 0))
411 1.1.1.8 mrg goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412 1.1.1.10 mrg "host", h, "dev", d, NULL, sz, aq);
413 1.1.1.8 mrg else
414 1.1.1.8 mrg gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
415 1.1.1.3 mrg }
416 1.1.1.3 mrg
417 1.1.1.3 mrg static void
418 1.1.1.3 mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
419 1.1.1.3 mrg {
420 1.1.1.3 mrg if (!devicep->free_func (devicep->target_id, devptr))
421 1.1.1.3 mrg {
422 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
423 1.1.1.3 mrg gomp_fatal ("error in freeing device memory block at %p", devptr);
424 1.1.1.3 mrg }
425 1.1.1.3 mrg }
426 1.1.1.3 mrg
427 1.1.1.10 mrg /* Increment reference count of a splay_tree_key region K by 1.
428 1.1.1.10 mrg If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429 1.1.1.10 mrg increment the value if refcount is not yet contained in the set (used for
430 1.1.1.10 mrg OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431 1.1.1.10 mrg once for each construct). */
432 1.1.1.10 mrg
433 1.1.1.10 mrg static inline void
434 1.1.1.10 mrg gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
435 1.1.1.10 mrg {
436 1.1.1.10 mrg if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437 1.1.1.10 mrg return;
438 1.1.1.10 mrg
439 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount;
440 1.1.1.10 mrg
441 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount;
443 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr;
445 1.1.1.10 mrg
446 1.1.1.10 mrg if (refcount_set)
447 1.1.1.10 mrg {
448 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr))
449 1.1.1.10 mrg return;
450 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451 1.1.1.10 mrg *slot = refcount_ptr;
452 1.1.1.10 mrg }
453 1.1.1.10 mrg
454 1.1.1.10 mrg *refcount_ptr += 1;
455 1.1.1.10 mrg return;
456 1.1.1.10 mrg }
457 1.1.1.10 mrg
458 1.1.1.10 mrg /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459 1.1.1.10 mrg is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460 1.1.1.10 mrg track already seen refcounts, and only adjust the value if refcount is not
461 1.1.1.10 mrg yet contained in the set (like gomp_increment_refcount).
462 1.1.1.10 mrg
463 1.1.1.10 mrg Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464 1.1.1.10 mrg it is already zero and we know we decremented it earlier. This signals that
465 1.1.1.10 mrg associated maps should be copied back to host.
466 1.1.1.10 mrg
467 1.1.1.10 mrg *DO_REMOVE is set to true when we this is the first handling of this refcount
468 1.1.1.10 mrg and we are setting it to zero. This signals a removal of this key from the
469 1.1.1.10 mrg splay-tree map.
470 1.1.1.10 mrg
471 1.1.1.10 mrg Copy and removal are separated due to cases like handling of structure
472 1.1.1.10 mrg elements, e.g. each map of a structure element representing a possible copy
473 1.1.1.10 mrg out of a structure field has to be handled individually, but we only signal
474 1.1.1.10 mrg removal for one (the first encountered) sibing map. */
475 1.1.1.10 mrg
476 1.1.1.10 mrg static inline void
477 1.1.1.10 mrg gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478 1.1.1.10 mrg bool *do_copy, bool *do_remove)
479 1.1.1.10 mrg {
480 1.1.1.10 mrg if (k == NULL || k->refcount == REFCOUNT_INFINITY)
481 1.1.1.10 mrg {
482 1.1.1.10 mrg *do_copy = *do_remove = false;
483 1.1.1.10 mrg return;
484 1.1.1.10 mrg }
485 1.1.1.10 mrg
486 1.1.1.10 mrg uintptr_t *refcount_ptr = &k->refcount;
487 1.1.1.10 mrg
488 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489 1.1.1.10 mrg refcount_ptr = &k->structelem_refcount;
490 1.1.1.10 mrg else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491 1.1.1.10 mrg refcount_ptr = k->structelem_refcount_ptr;
492 1.1.1.10 mrg
493 1.1.1.10 mrg bool new_encountered_refcount;
494 1.1.1.10 mrg bool set_to_zero = false;
495 1.1.1.10 mrg bool is_zero = false;
496 1.1.1.10 mrg
497 1.1.1.10 mrg uintptr_t orig_refcount = *refcount_ptr;
498 1.1.1.10 mrg
499 1.1.1.10 mrg if (refcount_set)
500 1.1.1.10 mrg {
501 1.1.1.10 mrg if (htab_find (*refcount_set, refcount_ptr))
502 1.1.1.10 mrg {
503 1.1.1.10 mrg new_encountered_refcount = false;
504 1.1.1.10 mrg goto end;
505 1.1.1.10 mrg }
506 1.1.1.10 mrg
507 1.1.1.10 mrg uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508 1.1.1.10 mrg *slot = refcount_ptr;
509 1.1.1.10 mrg new_encountered_refcount = true;
510 1.1.1.10 mrg }
511 1.1.1.10 mrg else
512 1.1.1.10 mrg /* If no refcount_set being used, assume all keys are being decremented
513 1.1.1.10 mrg for the first time. */
514 1.1.1.10 mrg new_encountered_refcount = true;
515 1.1.1.10 mrg
516 1.1.1.10 mrg if (delete_p)
517 1.1.1.10 mrg *refcount_ptr = 0;
518 1.1.1.10 mrg else if (*refcount_ptr > 0)
519 1.1.1.10 mrg *refcount_ptr -= 1;
520 1.1.1.10 mrg
521 1.1.1.10 mrg end:
522 1.1.1.10 mrg if (*refcount_ptr == 0)
523 1.1.1.10 mrg {
524 1.1.1.10 mrg if (orig_refcount > 0)
525 1.1.1.10 mrg set_to_zero = true;
526 1.1.1.10 mrg
527 1.1.1.10 mrg is_zero = true;
528 1.1.1.10 mrg }
529 1.1.1.10 mrg
530 1.1.1.10 mrg *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531 1.1.1.10 mrg *do_remove = (new_encountered_refcount && set_to_zero);
532 1.1.1.10 mrg }
533 1.1.1.10 mrg
534 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn.
536 1.1 mrg Helper function of gomp_map_vars. */
537 1.1 mrg
538 1.1 mrg static inline void
539 1.1.1.8 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep,
540 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key oldn,
541 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var,
542 1.1.1.10 mrg unsigned char kind, bool always_to_flag, bool implicit,
543 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf,
544 1.1.1.10 mrg htab_t *refcount_set)
545 1.1 mrg {
546 1.1.1.10 mrg assert (kind != GOMP_MAP_ATTACH
547 1.1.1.10 mrg || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
548 1.1.1.8 mrg
549 1.1.1.2 mrg tgt_var->key = oldn;
550 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
551 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
552 1.1.1.8 mrg tgt_var->is_attach = false;
553 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start;
554 1.1.1.10 mrg
555 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */
556 1.1.1.10 mrg bool implicit_subset = (implicit
557 1.1.1.10 mrg && newn->host_start <= oldn->host_start
558 1.1.1.10 mrg && oldn->host_end <= newn->host_end);
559 1.1.1.10 mrg if (implicit_subset)
560 1.1.1.10 mrg tgt_var->length = oldn->host_end - oldn->host_start;
561 1.1.1.10 mrg else
562 1.1.1.10 mrg tgt_var->length = newn->host_end - newn->host_start;
563 1.1.1.2 mrg
564 1.1 mrg if ((kind & GOMP_MAP_FLAG_FORCE)
565 1.1.1.10 mrg /* For implicit maps, old contained in new is valid. */
566 1.1.1.10 mrg || !(implicit_subset
567 1.1.1.10 mrg /* Otherwise, new contained inside old is considered valid. */
568 1.1.1.10 mrg || (oldn->host_start <= newn->host_start
569 1.1.1.10 mrg && newn->host_end <= oldn->host_end)))
570 1.1 mrg {
571 1.1 mrg gomp_mutex_unlock (&devicep->lock);
572 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
573 1.1 mrg "[%p..%p) is already mapped",
574 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
575 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
576 1.1 mrg }
577 1.1.1.2 mrg
578 1.1.1.10 mrg if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
579 1.1.1.10 mrg {
580 1.1.1.10 mrg /* Implicit + always should not happen. If this does occur, below
581 1.1.1.10 mrg address/length adjustment is a TODO. */
582 1.1.1.10 mrg assert (!implicit_subset);
583 1.1.1.10 mrg
584 1.1.1.10 mrg if (oldn->aux && oldn->aux->attach_count)
585 1.1.1.10 mrg {
586 1.1.1.10 mrg /* We have to be careful not to overwrite still attached pointers
587 1.1.1.10 mrg during the copyback to host. */
588 1.1.1.10 mrg uintptr_t addr = newn->host_start;
589 1.1.1.10 mrg while (addr < newn->host_end)
590 1.1.1.10 mrg {
591 1.1.1.10 mrg size_t i = (addr - oldn->host_start) / sizeof (void *);
592 1.1.1.10 mrg if (oldn->aux->attach_count[i] == 0)
593 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq,
594 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start
595 1.1.1.10 mrg + oldn->tgt_offset
596 1.1.1.10 mrg + addr - oldn->host_start),
597 1.1.1.10 mrg (void *) addr,
598 1.1.1.10 mrg sizeof (void *), false, cbuf);
599 1.1.1.10 mrg addr += sizeof (void *);
600 1.1.1.10 mrg }
601 1.1.1.10 mrg }
602 1.1.1.10 mrg else
603 1.1.1.10 mrg gomp_copy_host2dev (devicep, aq,
604 1.1.1.10 mrg (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
605 1.1.1.10 mrg + newn->host_start - oldn->host_start),
606 1.1.1.10 mrg (void *) newn->host_start,
607 1.1.1.10 mrg newn->host_end - newn->host_start, false, cbuf);
608 1.1.1.10 mrg }
609 1.1.1.3 mrg
610 1.1.1.10 mrg gomp_increment_refcount (oldn, refcount_set);
611 1.1 mrg }
612 1.1 mrg
613 1.1 mrg static int
614 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx)
615 1.1.1.2 mrg {
616 1.1.1.10 mrg if (!short_mapkind)
617 1.1.1.10 mrg return ((unsigned char *) kinds)[idx];
618 1.1.1.10 mrg
619 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx];
620 1.1.1.10 mrg if (GOMP_MAP_IMPLICIT_P (val))
621 1.1.1.10 mrg val &= ~GOMP_MAP_IMPLICIT;
622 1.1.1.10 mrg return val;
623 1.1.1.10 mrg }
624 1.1.1.10 mrg
625 1.1.1.10 mrg
626 1.1.1.10 mrg static bool
627 1.1.1.10 mrg get_implicit (bool short_mapkind, void *kinds, int idx)
628 1.1.1.10 mrg {
629 1.1.1.10 mrg if (!short_mapkind)
630 1.1.1.10 mrg return false;
631 1.1.1.10 mrg
632 1.1.1.10 mrg int val = ((unsigned short *) kinds)[idx];
633 1.1.1.10 mrg return GOMP_MAP_IMPLICIT_P (val);
634 1.1.1.2 mrg }
635 1.1.1.2 mrg
636 1.1.1.2 mrg static void
637 1.1.1.8 mrg gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
638 1.1.1.8 mrg uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
639 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf,
640 1.1.1.10 mrg bool allow_zero_length_array_sections)
641 1.1.1.2 mrg {
642 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
643 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
644 1.1.1.2 mrg struct splay_tree_key_s cur_node;
645 1.1.1.2 mrg
646 1.1.1.2 mrg cur_node.host_start = host_ptr;
647 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL)
648 1.1.1.2 mrg {
649 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL;
650 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
651 1.1.1.3 mrg (void *) (tgt->tgt_start + target_offset),
652 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
653 1.1.1.10 mrg true, cbuf);
654 1.1.1.2 mrg return;
655 1.1.1.2 mrg }
656 1.1.1.2 mrg /* Add bias to the pointer value. */
657 1.1.1.2 mrg cur_node.host_start += bias;
658 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
659 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
660 1.1.1.2 mrg if (n == NULL)
661 1.1.1.2 mrg {
662 1.1.1.10 mrg if (allow_zero_length_array_sections)
663 1.1.1.10 mrg cur_node.tgt_offset = 0;
664 1.1.1.10 mrg else
665 1.1.1.10 mrg {
666 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
667 1.1.1.10 mrg gomp_fatal ("Pointer target of array section wasn't mapped");
668 1.1.1.10 mrg }
669 1.1.1.10 mrg }
670 1.1.1.10 mrg else
671 1.1.1.10 mrg {
672 1.1.1.10 mrg cur_node.host_start -= n->host_start;
673 1.1.1.10 mrg cur_node.tgt_offset
674 1.1.1.10 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
675 1.1.1.10 mrg /* At this point tgt_offset is target address of the
676 1.1.1.10 mrg array section. Now subtract bias to get what we want
677 1.1.1.10 mrg to initialize the pointer with. */
678 1.1.1.10 mrg cur_node.tgt_offset -= bias;
679 1.1.1.2 mrg }
680 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
681 1.1.1.10 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
682 1.1.1.10 mrg true, cbuf);
683 1.1.1.2 mrg }
684 1.1.1.2 mrg
685 1.1.1.2 mrg static void
686 1.1.1.8 mrg gomp_map_fields_existing (struct target_mem_desc *tgt,
687 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
688 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs,
689 1.1.1.6 mrg size_t *sizes, void *kinds,
690 1.1.1.10 mrg struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
691 1.1 mrg {
692 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
693 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
694 1.1.1.2 mrg struct splay_tree_key_s cur_node;
695 1.1.1.2 mrg int kind;
696 1.1.1.10 mrg bool implicit;
697 1.1.1.2 mrg const bool short_mapkind = true;
698 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
699 1.1.1.2 mrg
700 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
701 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
702 1.1.1.2 mrg splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
703 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i);
704 1.1.1.10 mrg implicit = get_implicit (short_mapkind, kinds, i);
705 1.1.1.2 mrg if (n2
706 1.1.1.2 mrg && n2->tgt == n->tgt
707 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
708 1.1.1.2 mrg {
709 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
710 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
711 1.1.1.10 mrg refcount_set);
712 1.1.1.2 mrg return;
713 1.1.1.2 mrg }
714 1.1.1.2 mrg if (sizes[i] == 0)
715 1.1.1.2 mrg {
716 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
717 1.1.1.2 mrg {
718 1.1.1.2 mrg cur_node.host_start--;
719 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
720 1.1.1.2 mrg cur_node.host_start++;
721 1.1.1.2 mrg if (n2
722 1.1.1.2 mrg && n2->tgt == n->tgt
723 1.1.1.2 mrg && n2->host_start - n->host_start
724 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset)
725 1.1.1.2 mrg {
726 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
727 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
728 1.1.1.10 mrg refcount_set);
729 1.1.1.2 mrg return;
730 1.1.1.2 mrg }
731 1.1.1.2 mrg }
732 1.1.1.2 mrg cur_node.host_end++;
733 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
734 1.1.1.2 mrg cur_node.host_end--;
735 1.1.1.2 mrg if (n2
736 1.1.1.2 mrg && n2->tgt == n->tgt
737 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
738 1.1.1.2 mrg {
739 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740 1.1.1.10 mrg kind & typemask, false, implicit, cbuf,
741 1.1.1.10 mrg refcount_set);
742 1.1.1.2 mrg return;
743 1.1.1.2 mrg }
744 1.1.1.2 mrg }
745 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
746 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped "
748 1.1.1.2 mrg "together with it", (void *) cur_node.host_start,
749 1.1.1.2 mrg (void *) cur_node.host_end);
750 1.1.1.2 mrg }
751 1.1.1.2 mrg
752 1.1.1.8 mrg attribute_hidden void
753 1.1.1.8 mrg gomp_attach_pointer (struct gomp_device_descr *devicep,
754 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree mem_map,
755 1.1.1.8 mrg splay_tree_key n, uintptr_t attach_to, size_t bias,
756 1.1.1.10 mrg struct gomp_coalesce_buf *cbufp,
757 1.1.1.10 mrg bool allow_zero_length_array_sections)
758 1.1.1.8 mrg {
759 1.1.1.8 mrg struct splay_tree_key_s s;
760 1.1.1.8 mrg size_t size, idx;
761 1.1.1.8 mrg
762 1.1.1.8 mrg if (n == NULL)
763 1.1.1.8 mrg {
764 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
765 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for attach");
766 1.1.1.8 mrg }
767 1.1.1.8 mrg
768 1.1.1.8 mrg size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
769 1.1.1.8 mrg /* We might have a pointer in a packed struct: however we cannot have more
770 1.1.1.8 mrg than one such pointer in each pointer-sized portion of the struct, so
771 1.1.1.8 mrg this is safe. */
772 1.1.1.8 mrg idx = (attach_to - n->host_start) / sizeof (void *);
773 1.1.1.8 mrg
774 1.1.1.8 mrg if (!n->aux)
775 1.1.1.8 mrg n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
776 1.1.1.8 mrg
777 1.1.1.8 mrg if (!n->aux->attach_count)
778 1.1.1.8 mrg n->aux->attach_count
779 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
780 1.1.1.8 mrg
781 1.1.1.8 mrg if (n->aux->attach_count[idx] < UINTPTR_MAX)
782 1.1.1.8 mrg n->aux->attach_count[idx]++;
783 1.1.1.8 mrg else
784 1.1.1.8 mrg {
785 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
786 1.1.1.8 mrg gomp_fatal ("attach count overflow");
787 1.1.1.8 mrg }
788 1.1.1.8 mrg
789 1.1.1.8 mrg if (n->aux->attach_count[idx] == 1)
790 1.1.1.8 mrg {
791 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
792 1.1.1.8 mrg - n->host_start;
793 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) attach_to;
794 1.1.1.8 mrg splay_tree_key tn;
795 1.1.1.8 mrg uintptr_t data;
796 1.1.1.8 mrg
797 1.1.1.8 mrg if ((void *) target == NULL)
798 1.1.1.8 mrg {
799 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
800 1.1.1.8 mrg gomp_fatal ("attempt to attach null pointer");
801 1.1.1.8 mrg }
802 1.1.1.8 mrg
803 1.1.1.8 mrg s.host_start = target + bias;
804 1.1.1.8 mrg s.host_end = s.host_start + 1;
805 1.1.1.8 mrg tn = splay_tree_lookup (mem_map, &s);
806 1.1.1.8 mrg
807 1.1.1.8 mrg if (!tn)
808 1.1.1.8 mrg {
809 1.1.1.10 mrg if (allow_zero_length_array_sections)
810 1.1.1.10 mrg /* When allowing attachment to zero-length array sections, we
811 1.1.1.10 mrg allow attaching to NULL pointers when the target region is not
812 1.1.1.10 mrg mapped. */
813 1.1.1.10 mrg data = 0;
814 1.1.1.10 mrg else
815 1.1.1.10 mrg {
816 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
817 1.1.1.10 mrg gomp_fatal ("pointer target not mapped for attach");
818 1.1.1.10 mrg }
819 1.1.1.8 mrg }
820 1.1.1.10 mrg else
821 1.1.1.10 mrg data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
822 1.1.1.8 mrg
823 1.1.1.8 mrg gomp_debug (1,
824 1.1.1.8 mrg "%s: attaching host %p, target %p (struct base %p) to %p\n",
825 1.1.1.8 mrg __FUNCTION__, (void *) attach_to, (void *) devptr,
826 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
827 1.1.1.8 mrg
828 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
829 1.1.1.10 mrg sizeof (void *), true, cbufp);
830 1.1.1.8 mrg }
831 1.1.1.8 mrg else
832 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
833 1.1.1.8 mrg (void *) attach_to, (int) n->aux->attach_count[idx]);
834 1.1.1.8 mrg }
835 1.1.1.8 mrg
836 1.1.1.8 mrg attribute_hidden void
837 1.1.1.8 mrg gomp_detach_pointer (struct gomp_device_descr *devicep,
838 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
839 1.1.1.8 mrg uintptr_t detach_from, bool finalize,
840 1.1.1.8 mrg struct gomp_coalesce_buf *cbufp)
841 1.1.1.8 mrg {
842 1.1.1.8 mrg size_t idx;
843 1.1.1.8 mrg
844 1.1.1.8 mrg if (n == NULL)
845 1.1.1.8 mrg {
846 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
847 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for detach");
848 1.1.1.8 mrg }
849 1.1.1.8 mrg
850 1.1.1.8 mrg idx = (detach_from - n->host_start) / sizeof (void *);
851 1.1.1.8 mrg
852 1.1.1.8 mrg if (!n->aux || !n->aux->attach_count)
853 1.1.1.8 mrg {
854 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
855 1.1.1.8 mrg gomp_fatal ("no attachment counters for struct");
856 1.1.1.8 mrg }
857 1.1.1.8 mrg
858 1.1.1.8 mrg if (finalize)
859 1.1.1.8 mrg n->aux->attach_count[idx] = 1;
860 1.1.1.8 mrg
861 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
862 1.1.1.8 mrg {
863 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
864 1.1.1.8 mrg gomp_fatal ("attach count underflow");
865 1.1.1.8 mrg }
866 1.1.1.8 mrg else
867 1.1.1.8 mrg n->aux->attach_count[idx]--;
868 1.1.1.8 mrg
869 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
870 1.1.1.8 mrg {
871 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
872 1.1.1.8 mrg - n->host_start;
873 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) detach_from;
874 1.1.1.8 mrg
875 1.1.1.8 mrg gomp_debug (1,
876 1.1.1.8 mrg "%s: detaching host %p, target %p (struct base %p) to %p\n",
877 1.1.1.8 mrg __FUNCTION__, (void *) detach_from, (void *) devptr,
878 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset),
879 1.1.1.8 mrg (void *) target);
880 1.1.1.8 mrg
881 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
882 1.1.1.10 mrg sizeof (void *), true, cbufp);
883 1.1.1.8 mrg }
884 1.1.1.8 mrg else
885 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
886 1.1.1.8 mrg (void *) detach_from, (int) n->aux->attach_count[idx]);
887 1.1.1.8 mrg }
888 1.1.1.8 mrg
889 1.1.1.8 mrg attribute_hidden uintptr_t
890 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
891 1.1.1.2 mrg {
892 1.1.1.2 mrg if (tgt->list[i].key != NULL)
893 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start
894 1.1.1.2 mrg + tgt->list[i].key->tgt_offset
895 1.1.1.2 mrg + tgt->list[i].offset;
896 1.1.1.8 mrg
897 1.1.1.8 mrg switch (tgt->list[i].offset)
898 1.1.1.8 mrg {
899 1.1.1.8 mrg case OFFSET_INLINED:
900 1.1.1.8 mrg return (uintptr_t) hostaddrs[i];
901 1.1.1.8 mrg
902 1.1.1.8 mrg case OFFSET_POINTER:
903 1.1.1.8 mrg return 0;
904 1.1.1.8 mrg
905 1.1.1.8 mrg case OFFSET_STRUCT:
906 1.1.1.8 mrg return tgt->list[i + 1].key->tgt->tgt_start
907 1.1.1.8 mrg + tgt->list[i + 1].key->tgt_offset
908 1.1.1.8 mrg + tgt->list[i + 1].offset
909 1.1.1.8 mrg + (uintptr_t) hostaddrs[i]
910 1.1.1.8 mrg - (uintptr_t) hostaddrs[i + 1];
911 1.1.1.8 mrg
912 1.1.1.8 mrg default:
913 1.1.1.8 mrg return tgt->tgt_start + tgt->list[i].offset;
914 1.1.1.8 mrg }
915 1.1 mrg }
916 1.1 mrg
917 1.1.1.8 mrg static inline __attribute__((always_inline)) struct target_mem_desc *
918 1.1.1.8 mrg gomp_map_vars_internal (struct gomp_device_descr *devicep,
919 1.1.1.8 mrg struct goacc_asyncqueue *aq, size_t mapnum,
920 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
921 1.1.1.8 mrg void *kinds, bool short_mapkind,
922 1.1.1.10 mrg htab_t *refcount_set,
923 1.1.1.8 mrg enum gomp_map_vars_kind pragma_kind)
924 1.1 mrg {
925 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
926 1.1.1.2 mrg bool has_firstprivate = false;
927 1.1.1.10 mrg bool has_always_ptrset = false;
928 1.1.1.10 mrg bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
929 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3;
930 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
931 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
932 1.1 mrg struct splay_tree_key_s cur_node;
933 1.1 mrg struct target_mem_desc *tgt
934 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
935 1.1 mrg tgt->list_count = mapnum;
936 1.1.1.10 mrg tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
937 1.1 mrg tgt->device_descr = devicep;
938 1.1.1.8 mrg tgt->prev = NULL;
939 1.1.1.6 mrg struct gomp_coalesce_buf cbuf, *cbufp = NULL;
940 1.1 mrg
941 1.1 mrg if (mapnum == 0)
942 1.1.1.2 mrg {
943 1.1.1.2 mrg tgt->tgt_start = 0;
944 1.1.1.2 mrg tgt->tgt_end = 0;
945 1.1.1.2 mrg return tgt;
946 1.1.1.2 mrg }
947 1.1 mrg
948 1.1 mrg tgt_align = sizeof (void *);
949 1.1 mrg tgt_size = 0;
950 1.1.1.6 mrg cbuf.chunks = NULL;
951 1.1.1.6 mrg cbuf.chunk_cnt = -1;
952 1.1.1.6 mrg cbuf.use_cnt = 0;
953 1.1.1.6 mrg cbuf.buf = NULL;
954 1.1.1.6 mrg if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
955 1.1.1.6 mrg {
956 1.1.1.7 mrg size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
957 1.1.1.7 mrg cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
958 1.1.1.6 mrg cbuf.chunk_cnt = 0;
959 1.1.1.6 mrg }
960 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
961 1.1 mrg {
962 1.1 mrg size_t align = 4 * sizeof (void *);
963 1.1 mrg tgt_align = align;
964 1.1 mrg tgt_size = mapnum * sizeof (void *);
965 1.1.1.6 mrg cbuf.chunk_cnt = 1;
966 1.1.1.6 mrg cbuf.use_cnt = 1 + (mapnum > 1);
967 1.1.1.7 mrg cbuf.chunks[0].start = 0;
968 1.1.1.7 mrg cbuf.chunks[0].end = tgt_size;
969 1.1 mrg }
970 1.1 mrg
971 1.1 mrg gomp_mutex_lock (&devicep->lock);
972 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
973 1.1.1.2 mrg {
974 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
975 1.1.1.2 mrg free (tgt);
976 1.1.1.2 mrg return NULL;
977 1.1.1.2 mrg }
978 1.1 mrg
979 1.1 mrg for (i = 0; i < mapnum; i++)
980 1.1 mrg {
981 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
982 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i);
983 1.1.1.2 mrg if (hostaddrs[i] == NULL
984 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
985 1.1.1.2 mrg {
986 1.1.1.2 mrg tgt->list[i].key = NULL;
987 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
988 1.1.1.2 mrg continue;
989 1.1.1.2 mrg }
990 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
991 1.1.1.8 mrg || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
992 1.1.1.2 mrg {
993 1.1.1.8 mrg tgt->list[i].key = NULL;
994 1.1.1.8 mrg if (!not_found_cnt)
995 1.1.1.2 mrg {
996 1.1.1.8 mrg /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997 1.1.1.8 mrg on a separate construct prior to using use_device_{addr,ptr}.
998 1.1.1.8 mrg In OpenMP 5.0, map directives need to be ordered by the
999 1.1.1.8 mrg middle-end before the use_device_* clauses. If
1000 1.1.1.8 mrg !not_found_cnt, all mappings requested (if any) are already
1001 1.1.1.8 mrg mapped, so use_device_{addr,ptr} can be resolved right away.
1002 1.1.1.8 mrg Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003 1.1.1.8 mrg now but would succeed after performing the mappings in the
1004 1.1.1.8 mrg following loop. We can't defer this always to the second
1005 1.1.1.8 mrg loop, because it is not even invoked when !not_found_cnt
1006 1.1.1.8 mrg after the first loop. */
1007 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1008 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
1009 1.1.1.8 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1010 1.1.1.8 mrg if (n != NULL)
1011 1.1.1.8 mrg {
1012 1.1.1.8 mrg cur_node.host_start -= n->host_start;
1013 1.1.1.8 mrg hostaddrs[i]
1014 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
1015 1.1.1.8 mrg + cur_node.host_start);
1016 1.1.1.8 mrg }
1017 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1018 1.1.1.8 mrg {
1019 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1020 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
1021 1.1.1.8 mrg }
1022 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023 1.1.1.8 mrg /* If not present, continue using the host address. */
1024 1.1.1.8 mrg ;
1025 1.1.1.8 mrg else
1026 1.1.1.8 mrg __builtin_unreachable ();
1027 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1028 1.1.1.2 mrg }
1029 1.1.1.8 mrg else
1030 1.1.1.8 mrg tgt->list[i].offset = 0;
1031 1.1.1.2 mrg continue;
1032 1.1.1.2 mrg }
1033 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT)
1034 1.1.1.2 mrg {
1035 1.1.1.2 mrg size_t first = i + 1;
1036 1.1.1.2 mrg size_t last = i + sizes[i];
1037 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1038 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
1039 1.1.1.2 mrg + sizes[last];
1040 1.1.1.2 mrg tgt->list[i].key = NULL;
1041 1.1.1.8 mrg tgt->list[i].offset = OFFSET_STRUCT;
1042 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1043 1.1.1.2 mrg if (n == NULL)
1044 1.1.1.2 mrg {
1045 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1046 1.1.1.2 mrg if (tgt_align < align)
1047 1.1.1.2 mrg tgt_align = align;
1048 1.1.1.6 mrg tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1049 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1050 1.1.1.6 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1051 1.1.1.2 mrg not_found_cnt += last - i;
1052 1.1.1.2 mrg for (i = first; i <= last; i++)
1053 1.1.1.6 mrg {
1054 1.1.1.6 mrg tgt->list[i].key = NULL;
1055 1.1.1.10 mrg if (!aq
1056 1.1.1.10 mrg && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1057 1.1.1.10 mrg & typemask))
1058 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf,
1059 1.1.1.6 mrg tgt_size - cur_node.host_end
1060 1.1.1.6 mrg + (uintptr_t) hostaddrs[i],
1061 1.1.1.6 mrg sizes[i]);
1062 1.1.1.6 mrg }
1063 1.1.1.2 mrg i--;
1064 1.1.1.2 mrg continue;
1065 1.1.1.2 mrg }
1066 1.1.1.2 mrg for (i = first; i <= last; i++)
1067 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1068 1.1.1.10 mrg sizes, kinds, NULL, refcount_set);
1069 1.1.1.2 mrg i--;
1070 1.1.1.2 mrg continue;
1071 1.1.1.2 mrg }
1072 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1073 1.1 mrg {
1074 1.1.1.2 mrg tgt->list[i].key = NULL;
1075 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
1076 1.1.1.8 mrg has_firstprivate = true;
1077 1.1.1.8 mrg continue;
1078 1.1.1.8 mrg }
1079 1.1.1.10 mrg else if ((kind & typemask) == GOMP_MAP_ATTACH
1080 1.1.1.10 mrg || ((kind & typemask)
1081 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1082 1.1.1.8 mrg {
1083 1.1.1.8 mrg tgt->list[i].key = NULL;
1084 1.1.1.2 mrg has_firstprivate = true;
1085 1.1 mrg continue;
1086 1.1 mrg }
1087 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1088 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
1089 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1090 1.1 mrg else
1091 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1092 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1093 1.1.1.2 mrg {
1094 1.1.1.2 mrg tgt->list[i].key = NULL;
1095 1.1.1.2 mrg
1096 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1097 1.1.1.2 mrg if (tgt_align < align)
1098 1.1.1.2 mrg tgt_align = align;
1099 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1100 1.1.1.10 mrg if (!aq)
1101 1.1.1.10 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
1102 1.1.1.10 mrg cur_node.host_end - cur_node.host_start);
1103 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1104 1.1.1.2 mrg has_firstprivate = true;
1105 1.1.1.2 mrg continue;
1106 1.1.1.2 mrg }
1107 1.1.1.2 mrg splay_tree_key n;
1108 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1109 1.1 mrg {
1110 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node);
1111 1.1.1.2 mrg if (!n)
1112 1.1.1.2 mrg {
1113 1.1.1.2 mrg tgt->list[i].key = NULL;
1114 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
1115 1.1.1.2 mrg continue;
1116 1.1.1.2 mrg }
1117 1.1 mrg }
1118 1.1 mrg else
1119 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1120 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
1121 1.1.1.10 mrg {
1122 1.1.1.10 mrg int always_to_cnt = 0;
1123 1.1.1.10 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
1124 1.1.1.10 mrg {
1125 1.1.1.10 mrg bool has_nullptr = false;
1126 1.1.1.10 mrg size_t j;
1127 1.1.1.10 mrg for (j = 0; j < n->tgt->list_count; j++)
1128 1.1.1.10 mrg if (n->tgt->list[j].key == n)
1129 1.1.1.10 mrg {
1130 1.1.1.10 mrg has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1131 1.1.1.10 mrg break;
1132 1.1.1.10 mrg }
1133 1.1.1.10 mrg if (n->tgt->list_count == 0)
1134 1.1.1.10 mrg {
1135 1.1.1.10 mrg /* 'declare target'; assume has_nullptr; it could also be
1136 1.1.1.10 mrg statically assigned pointer, but that it should be to
1137 1.1.1.10 mrg the equivalent variable on the host. */
1138 1.1.1.10 mrg assert (n->refcount == REFCOUNT_INFINITY);
1139 1.1.1.10 mrg has_nullptr = true;
1140 1.1.1.10 mrg }
1141 1.1.1.10 mrg else
1142 1.1.1.10 mrg assert (j < n->tgt->list_count);
1143 1.1.1.10 mrg /* Re-map the data if there is an 'always' modifier or if it a
1144 1.1.1.10 mrg null pointer was there and non a nonnull has been found; that
1145 1.1.1.10 mrg permits transparent re-mapping for Fortran array descriptors
1146 1.1.1.10 mrg which were previously mapped unallocated. */
1147 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++)
1148 1.1.1.10 mrg {
1149 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1150 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1151 1.1.1.10 mrg && (!has_nullptr
1152 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind)
1153 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL))
1154 1.1.1.10 mrg break;
1155 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1156 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1157 1.1.1.10 mrg > cur_node.host_end))
1158 1.1.1.10 mrg break;
1159 1.1.1.10 mrg else
1160 1.1.1.10 mrg {
1161 1.1.1.10 mrg has_always_ptrset = true;
1162 1.1.1.10 mrg ++always_to_cnt;
1163 1.1.1.10 mrg }
1164 1.1.1.10 mrg }
1165 1.1.1.10 mrg }
1166 1.1.1.10 mrg gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1167 1.1.1.10 mrg kind & typemask, always_to_cnt > 0, implicit,
1168 1.1.1.10 mrg NULL, refcount_set);
1169 1.1.1.10 mrg i += always_to_cnt;
1170 1.1.1.10 mrg }
1171 1.1.1.2 mrg else
1172 1.1 mrg {
1173 1.1.1.2 mrg tgt->list[i].key = NULL;
1174 1.1 mrg
1175 1.1.1.8 mrg if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1176 1.1.1.8 mrg {
1177 1.1.1.8 mrg /* Not present, hence, skip entry - including its MAP_POINTER,
1178 1.1.1.8 mrg when existing. */
1179 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
1180 1.1.1.8 mrg if (i + 1 < mapnum
1181 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1182 1.1.1.8 mrg == GOMP_MAP_POINTER))
1183 1.1.1.8 mrg {
1184 1.1.1.8 mrg ++i;
1185 1.1.1.8 mrg tgt->list[i].key = NULL;
1186 1.1.1.8 mrg tgt->list[i].offset = 0;
1187 1.1.1.8 mrg }
1188 1.1.1.8 mrg continue;
1189 1.1.1.8 mrg }
1190 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
1191 1.1 mrg not_found_cnt++;
1192 1.1 mrg if (tgt_align < align)
1193 1.1 mrg tgt_align = align;
1194 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1195 1.1.1.10 mrg if (!aq
1196 1.1.1.10 mrg && gomp_to_device_kind_p (kind & typemask))
1197 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
1198 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
1199 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
1200 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
1201 1.1 mrg {
1202 1.1 mrg size_t j;
1203 1.1.1.10 mrg int kind;
1204 1.1 mrg for (j = i + 1; j < mapnum; j++)
1205 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1206 1.1.1.10 mrg kinds, j)) & typemask))
1207 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1208 1.1 mrg break;
1209 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1210 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1211 1.1 mrg > cur_node.host_end))
1212 1.1 mrg break;
1213 1.1 mrg else
1214 1.1 mrg {
1215 1.1.1.2 mrg tgt->list[j].key = NULL;
1216 1.1 mrg i++;
1217 1.1 mrg }
1218 1.1 mrg }
1219 1.1 mrg }
1220 1.1 mrg }
1221 1.1 mrg
1222 1.1 mrg if (devaddrs)
1223 1.1 mrg {
1224 1.1 mrg if (mapnum != 1)
1225 1.1 mrg {
1226 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1227 1.1 mrg gomp_fatal ("unexpected aggregation");
1228 1.1 mrg }
1229 1.1 mrg tgt->to_free = devaddrs[0];
1230 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
1231 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
1232 1.1 mrg }
1233 1.1.1.2 mrg else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1234 1.1 mrg {
1235 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
1236 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
1237 1.1 mrg memory. */
1238 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
1239 1.1 mrg tgt_size + tgt_align - 1);
1240 1.1.1.3 mrg if (!tgt->to_free)
1241 1.1.1.3 mrg {
1242 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1243 1.1.1.3 mrg gomp_fatal ("device memory allocation fail");
1244 1.1.1.3 mrg }
1245 1.1.1.3 mrg
1246 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
1247 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1248 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
1249 1.1.1.6 mrg
1250 1.1.1.6 mrg if (cbuf.use_cnt == 1)
1251 1.1.1.6 mrg cbuf.chunk_cnt--;
1252 1.1.1.6 mrg if (cbuf.chunk_cnt > 0)
1253 1.1.1.6 mrg {
1254 1.1.1.6 mrg cbuf.buf
1255 1.1.1.7 mrg = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1256 1.1.1.6 mrg if (cbuf.buf)
1257 1.1.1.6 mrg {
1258 1.1.1.6 mrg cbuf.tgt = tgt;
1259 1.1.1.6 mrg cbufp = &cbuf;
1260 1.1.1.6 mrg }
1261 1.1.1.6 mrg }
1262 1.1 mrg }
1263 1.1 mrg else
1264 1.1 mrg {
1265 1.1 mrg tgt->to_free = NULL;
1266 1.1 mrg tgt->tgt_start = 0;
1267 1.1 mrg tgt->tgt_end = 0;
1268 1.1 mrg }
1269 1.1 mrg
1270 1.1 mrg tgt_size = 0;
1271 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
1272 1.1 mrg tgt_size = mapnum * sizeof (void *);
1273 1.1 mrg
1274 1.1 mrg tgt->array = NULL;
1275 1.1.1.10 mrg if (not_found_cnt || has_firstprivate || has_always_ptrset)
1276 1.1 mrg {
1277 1.1.1.2 mrg if (not_found_cnt)
1278 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1279 1.1 mrg splay_tree_node array = tgt->array;
1280 1.1.1.10 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1281 1.1.1.2 mrg uintptr_t field_tgt_base = 0;
1282 1.1.1.10 mrg splay_tree_key field_tgt_structelem_first = NULL;
1283 1.1 mrg
1284 1.1 mrg for (i = 0; i < mapnum; i++)
1285 1.1.1.10 mrg if (has_always_ptrset
1286 1.1.1.10 mrg && tgt->list[i].key
1287 1.1.1.10 mrg && (get_kind (short_mapkind, kinds, i) & typemask)
1288 1.1.1.10 mrg == GOMP_MAP_TO_PSET)
1289 1.1.1.10 mrg {
1290 1.1.1.10 mrg splay_tree_key k = tgt->list[i].key;
1291 1.1.1.10 mrg bool has_nullptr = false;
1292 1.1.1.10 mrg size_t j;
1293 1.1.1.10 mrg for (j = 0; j < k->tgt->list_count; j++)
1294 1.1.1.10 mrg if (k->tgt->list[j].key == k)
1295 1.1.1.10 mrg {
1296 1.1.1.10 mrg has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1297 1.1.1.10 mrg break;
1298 1.1.1.10 mrg }
1299 1.1.1.10 mrg if (k->tgt->list_count == 0)
1300 1.1.1.10 mrg has_nullptr = true;
1301 1.1.1.10 mrg else
1302 1.1.1.10 mrg assert (j < k->tgt->list_count);
1303 1.1.1.10 mrg
1304 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false;
1305 1.1.1.10 mrg for (j = i + 1; j < mapnum; j++)
1306 1.1.1.10 mrg {
1307 1.1.1.10 mrg int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1308 1.1.1.10 mrg if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1309 1.1.1.10 mrg && (!has_nullptr
1310 1.1.1.10 mrg || !GOMP_MAP_POINTER_P (ptr_kind)
1311 1.1.1.10 mrg || *(void **) hostaddrs[j] == NULL))
1312 1.1.1.10 mrg break;
1313 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
1314 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1315 1.1.1.10 mrg > k->host_end))
1316 1.1.1.10 mrg break;
1317 1.1.1.10 mrg else
1318 1.1.1.10 mrg {
1319 1.1.1.10 mrg if (*(void **) hostaddrs[j] == NULL)
1320 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = true;
1321 1.1.1.10 mrg tgt->list[j].key = k;
1322 1.1.1.10 mrg tgt->list[j].copy_from = false;
1323 1.1.1.10 mrg tgt->list[j].always_copy_from = false;
1324 1.1.1.10 mrg tgt->list[j].is_attach = false;
1325 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1326 1.1.1.10 mrg gomp_map_pointer (k->tgt, aq,
1327 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j],
1328 1.1.1.10 mrg k->tgt_offset + ((uintptr_t) hostaddrs[j]
1329 1.1.1.10 mrg - k->host_start),
1330 1.1.1.10 mrg sizes[j], cbufp, false);
1331 1.1.1.10 mrg }
1332 1.1.1.10 mrg }
1333 1.1.1.10 mrg i = j - 1;
1334 1.1.1.10 mrg }
1335 1.1.1.10 mrg else if (tgt->list[i].key == NULL)
1336 1.1 mrg {
1337 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
1338 1.1.1.10 mrg bool implicit = get_implicit (short_mapkind, kinds, i);
1339 1.1 mrg if (hostaddrs[i] == NULL)
1340 1.1 mrg continue;
1341 1.1.1.2 mrg switch (kind & typemask)
1342 1.1.1.2 mrg {
1343 1.1.1.2 mrg size_t align, len, first, last;
1344 1.1.1.2 mrg splay_tree_key n;
1345 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE:
1346 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift);
1347 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1348 1.1.1.2 mrg tgt->list[i].offset = tgt_size;
1349 1.1.1.2 mrg len = sizes[i];
1350 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1351 1.1.1.3 mrg (void *) (tgt->tgt_start + tgt_size),
1352 1.1.1.10 mrg (void *) hostaddrs[i], len, false, cbufp);
1353 1.1.1.2 mrg tgt_size += len;
1354 1.1.1.2 mrg continue;
1355 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
1356 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1357 1.1.1.2 mrg continue;
1358 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1359 1.1.1.8 mrg /* The OpenACC 'host_data' construct only allows 'use_device'
1360 1.1.1.8 mrg "mapping" clauses, so in the first loop, 'not_found_cnt'
1361 1.1.1.8 mrg must always have been zero, so all OpenACC 'use_device'
1362 1.1.1.8 mrg clauses have already been handled. (We can only easily test
1363 1.1.1.8 mrg 'use_device' with 'if_present' clause here.) */
1364 1.1.1.8 mrg assert (tgt->list[i].offset == OFFSET_INLINED);
1365 1.1.1.8 mrg /* Nevertheless, FALLTHRU to the normal handling, to keep the
1366 1.1.1.8 mrg code conceptually simple, similar to the first loop. */
1367 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR:
1368 1.1.1.8 mrg if (tgt->list[i].offset == 0)
1369 1.1.1.8 mrg {
1370 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1371 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
1372 1.1.1.8 mrg n = gomp_map_lookup (mem_map, &cur_node);
1373 1.1.1.8 mrg if (n != NULL)
1374 1.1.1.8 mrg {
1375 1.1.1.8 mrg cur_node.host_start -= n->host_start;
1376 1.1.1.8 mrg hostaddrs[i]
1377 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
1378 1.1.1.8 mrg + cur_node.host_start);
1379 1.1.1.8 mrg }
1380 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1381 1.1.1.8 mrg {
1382 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1383 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
1384 1.1.1.8 mrg }
1385 1.1.1.8 mrg else if ((kind & typemask)
1386 1.1.1.8 mrg == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1387 1.1.1.8 mrg /* If not present, continue using the host address. */
1388 1.1.1.8 mrg ;
1389 1.1.1.8 mrg else
1390 1.1.1.8 mrg __builtin_unreachable ();
1391 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1392 1.1.1.8 mrg }
1393 1.1.1.8 mrg continue;
1394 1.1.1.2 mrg case GOMP_MAP_STRUCT:
1395 1.1.1.2 mrg first = i + 1;
1396 1.1.1.2 mrg last = i + sizes[i];
1397 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1398 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
1399 1.1.1.2 mrg + sizes[last];
1400 1.1.1.2 mrg if (tgt->list[first].key != NULL)
1401 1.1.1.2 mrg continue;
1402 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1403 1.1.1.2 mrg if (n == NULL)
1404 1.1.1.2 mrg {
1405 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1406 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
1407 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1408 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1409 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first]
1410 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1411 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first];
1412 1.1.1.2 mrg field_tgt_offset = tgt_size;
1413 1.1.1.2 mrg field_tgt_clear = last;
1414 1.1.1.10 mrg field_tgt_structelem_first = NULL;
1415 1.1.1.2 mrg tgt_size += cur_node.host_end
1416 1.1.1.2 mrg - (uintptr_t) hostaddrs[first];
1417 1.1.1.2 mrg continue;
1418 1.1.1.2 mrg }
1419 1.1.1.2 mrg for (i = first; i <= last; i++)
1420 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1421 1.1.1.10 mrg sizes, kinds, cbufp, refcount_set);
1422 1.1.1.2 mrg i--;
1423 1.1.1.2 mrg continue;
1424 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER:
1425 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1426 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1427 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1428 1.1.1.2 mrg if (n == NULL
1429 1.1.1.2 mrg || n->host_start > cur_node.host_start
1430 1.1.1.2 mrg || n->host_end < cur_node.host_end)
1431 1.1.1.2 mrg {
1432 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1433 1.1.1.2 mrg gomp_fatal ("always pointer not mapped");
1434 1.1.1.2 mrg }
1435 1.1.1.2 mrg if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1436 1.1.1.2 mrg != GOMP_MAP_ALWAYS_POINTER)
1437 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1438 1.1.1.2 mrg if (cur_node.tgt_offset)
1439 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i];
1440 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1441 1.1.1.3 mrg (void *) (n->tgt->tgt_start
1442 1.1.1.3 mrg + n->tgt_offset
1443 1.1.1.3 mrg + cur_node.host_start
1444 1.1.1.3 mrg - n->host_start),
1445 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
1446 1.1.1.10 mrg sizeof (void *), true, cbufp);
1447 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1448 1.1.1.2 mrg + cur_node.host_start - n->host_start;
1449 1.1.1.2 mrg continue;
1450 1.1.1.8 mrg case GOMP_MAP_IF_PRESENT:
1451 1.1.1.8 mrg /* Not present - otherwise handled above. Skip over its
1452 1.1.1.8 mrg MAP_POINTER as well. */
1453 1.1.1.8 mrg if (i + 1 < mapnum
1454 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1455 1.1.1.8 mrg == GOMP_MAP_POINTER))
1456 1.1.1.8 mrg ++i;
1457 1.1.1.8 mrg continue;
1458 1.1.1.8 mrg case GOMP_MAP_ATTACH:
1459 1.1.1.10 mrg case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1460 1.1.1.8 mrg {
1461 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 1.1.1.8 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1463 1.1.1.8 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1464 1.1.1.8 mrg if (n != NULL)
1465 1.1.1.8 mrg {
1466 1.1.1.8 mrg tgt->list[i].key = n;
1467 1.1.1.8 mrg tgt->list[i].offset = cur_node.host_start - n->host_start;
1468 1.1.1.8 mrg tgt->list[i].length = n->host_end - n->host_start;
1469 1.1.1.8 mrg tgt->list[i].copy_from = false;
1470 1.1.1.8 mrg tgt->list[i].always_copy_from = false;
1471 1.1.1.8 mrg tgt->list[i].is_attach = true;
1472 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect
1473 1.1.1.8 mrg structured/dynamic reference counts ('n->refcount',
1474 1.1.1.8 mrg 'n->dynamic_refcount'). */
1475 1.1.1.10 mrg
1476 1.1.1.10 mrg bool zlas
1477 1.1.1.10 mrg = ((kind & typemask)
1478 1.1.1.10 mrg == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1479 1.1.1.10 mrg gomp_attach_pointer (devicep, aq, mem_map, n,
1480 1.1.1.10 mrg (uintptr_t) hostaddrs[i], sizes[i],
1481 1.1.1.10 mrg cbufp, zlas);
1482 1.1.1.8 mrg }
1483 1.1.1.10 mrg else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1484 1.1.1.8 mrg {
1485 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1486 1.1.1.8 mrg gomp_fatal ("outer struct not mapped for attach");
1487 1.1.1.8 mrg }
1488 1.1.1.8 mrg continue;
1489 1.1.1.8 mrg }
1490 1.1.1.2 mrg default:
1491 1.1.1.2 mrg break;
1492 1.1.1.2 mrg }
1493 1.1 mrg splay_tree_key k = &array->key;
1494 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
1495 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
1496 1.1 mrg k->host_end = k->host_start + sizes[i];
1497 1.1 mrg else
1498 1.1 mrg k->host_end = k->host_start + sizeof (void *);
1499 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
1500 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
1501 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1502 1.1.1.10 mrg kind & typemask, false, implicit, cbufp,
1503 1.1.1.10 mrg refcount_set);
1504 1.1 mrg else
1505 1.1 mrg {
1506 1.1.1.8 mrg k->aux = NULL;
1507 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK)
1508 1.1.1.2 mrg {
1509 1.1.1.2 mrg /* Replace target address of the pointer with target address
1510 1.1.1.2 mrg of mapped object in the splay tree. */
1511 1.1.1.2 mrg splay_tree_remove (mem_map, n);
1512 1.1.1.8 mrg k->aux
1513 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1514 1.1.1.8 mrg k->aux->link_key = n;
1515 1.1.1.2 mrg }
1516 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
1517 1.1.1.2 mrg tgt->list[i].key = k;
1518 1.1 mrg k->tgt = tgt;
1519 1.1.1.10 mrg k->refcount = 0;
1520 1.1.1.10 mrg k->dynamic_refcount = 0;
1521 1.1.1.8 mrg if (field_tgt_clear != FIELD_TGT_EMPTY)
1522 1.1.1.2 mrg {
1523 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base
1524 1.1.1.2 mrg + field_tgt_offset;
1525 1.1.1.10 mrg if (openmp_p)
1526 1.1.1.10 mrg {
1527 1.1.1.10 mrg k->refcount = REFCOUNT_STRUCTELEM;
1528 1.1.1.10 mrg if (field_tgt_structelem_first == NULL)
1529 1.1.1.10 mrg {
1530 1.1.1.10 mrg /* Set to first structure element of sequence. */
1531 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1532 1.1.1.10 mrg field_tgt_structelem_first = k;
1533 1.1.1.10 mrg }
1534 1.1.1.10 mrg else
1535 1.1.1.10 mrg /* Point to refcount of leading element, but do not
1536 1.1.1.10 mrg increment again. */
1537 1.1.1.10 mrg k->structelem_refcount_ptr
1538 1.1.1.10 mrg = &field_tgt_structelem_first->structelem_refcount;
1539 1.1.1.10 mrg
1540 1.1.1.10 mrg if (i == field_tgt_clear)
1541 1.1.1.10 mrg {
1542 1.1.1.10 mrg k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1543 1.1.1.10 mrg field_tgt_structelem_first = NULL;
1544 1.1.1.10 mrg }
1545 1.1.1.10 mrg }
1546 1.1.1.2 mrg if (i == field_tgt_clear)
1547 1.1.1.8 mrg field_tgt_clear = FIELD_TGT_EMPTY;
1548 1.1.1.2 mrg }
1549 1.1.1.2 mrg else
1550 1.1.1.2 mrg {
1551 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1552 1.1.1.2 mrg k->tgt_offset = tgt_size;
1553 1.1.1.2 mrg tgt_size += k->host_end - k->host_start;
1554 1.1.1.2 mrg }
1555 1.1.1.10 mrg /* First increment, from 0 to 1. gomp_increment_refcount
1556 1.1.1.10 mrg encapsulates the different increment cases, so use this
1557 1.1.1.10 mrg instead of directly setting 1 during initialization. */
1558 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1559 1.1.1.10 mrg
1560 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1561 1.1.1.2 mrg tgt->list[i].always_copy_from
1562 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1563 1.1.1.8 mrg tgt->list[i].is_attach = false;
1564 1.1.1.2 mrg tgt->list[i].offset = 0;
1565 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start;
1566 1.1 mrg tgt->refcount++;
1567 1.1 mrg array->left = NULL;
1568 1.1 mrg array->right = NULL;
1569 1.1 mrg splay_tree_insert (mem_map, array);
1570 1.1 mrg switch (kind & typemask)
1571 1.1 mrg {
1572 1.1 mrg case GOMP_MAP_ALLOC:
1573 1.1 mrg case GOMP_MAP_FROM:
1574 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
1575 1.1 mrg case GOMP_MAP_FORCE_FROM:
1576 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
1577 1.1 mrg break;
1578 1.1 mrg case GOMP_MAP_TO:
1579 1.1 mrg case GOMP_MAP_TOFROM:
1580 1.1 mrg case GOMP_MAP_FORCE_TO:
1581 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
1582 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO:
1583 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM:
1584 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1585 1.1.1.3 mrg (void *) (tgt->tgt_start
1586 1.1.1.3 mrg + k->tgt_offset),
1587 1.1.1.3 mrg (void *) k->host_start,
1588 1.1.1.10 mrg k->host_end - k->host_start,
1589 1.1.1.10 mrg false, cbufp);
1590 1.1 mrg break;
1591 1.1 mrg case GOMP_MAP_POINTER:
1592 1.1.1.10 mrg case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1593 1.1.1.10 mrg gomp_map_pointer
1594 1.1.1.10 mrg (tgt, aq, (uintptr_t) *(void **) k->host_start,
1595 1.1.1.10 mrg k->tgt_offset, sizes[i], cbufp,
1596 1.1.1.10 mrg ((kind & typemask)
1597 1.1.1.10 mrg == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1598 1.1 mrg break;
1599 1.1 mrg case GOMP_MAP_TO_PSET:
1600 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1601 1.1.1.3 mrg (void *) (tgt->tgt_start
1602 1.1.1.3 mrg + k->tgt_offset),
1603 1.1.1.3 mrg (void *) k->host_start,
1604 1.1.1.10 mrg k->host_end - k->host_start,
1605 1.1.1.10 mrg false, cbufp);
1606 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc = false;
1607 1.1 mrg
1608 1.1 mrg for (j = i + 1; j < mapnum; j++)
1609 1.1.1.10 mrg {
1610 1.1.1.10 mrg int ptr_kind = (get_kind (short_mapkind, kinds, j)
1611 1.1.1.10 mrg & typemask);
1612 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (ptr_kind)
1613 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1614 1.1.1.10 mrg break;
1615 1.1.1.10 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
1616 1.1.1.10 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1617 1.1.1.10 mrg > k->host_end))
1618 1.1.1.10 mrg break;
1619 1.1.1.10 mrg else
1620 1.1.1.10 mrg {
1621 1.1.1.10 mrg tgt->list[j].key = k;
1622 1.1.1.10 mrg tgt->list[j].copy_from = false;
1623 1.1.1.10 mrg tgt->list[j].always_copy_from = false;
1624 1.1.1.10 mrg tgt->list[j].is_attach = false;
1625 1.1.1.10 mrg tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1626 1.1.1.10 mrg /* For OpenMP, the use of refcount_sets causes
1627 1.1.1.10 mrg errors if we set k->refcount = 1 above but also
1628 1.1.1.10 mrg increment it again here, for decrementing will
1629 1.1.1.10 mrg not properly match, since we decrement only once
1630 1.1.1.10 mrg for each key's refcount. Therefore avoid this
1631 1.1.1.10 mrg increment for OpenMP constructs. */
1632 1.1.1.10 mrg if (!openmp_p)
1633 1.1.1.10 mrg gomp_increment_refcount (k, refcount_set);
1634 1.1.1.10 mrg gomp_map_pointer (tgt, aq,
1635 1.1.1.10 mrg (uintptr_t) *(void **) hostaddrs[j],
1636 1.1.1.10 mrg k->tgt_offset
1637 1.1.1.10 mrg + ((uintptr_t) hostaddrs[j]
1638 1.1.1.10 mrg - k->host_start),
1639 1.1.1.10 mrg sizes[j], cbufp, false);
1640 1.1.1.10 mrg }
1641 1.1 mrg }
1642 1.1.1.10 mrg i = j - 1;
1643 1.1 mrg break;
1644 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
1645 1.1 mrg {
1646 1.1 mrg /* We already looked up the memory region above and it
1647 1.1 mrg was missing. */
1648 1.1 mrg size_t size = k->host_end - k->host_start;
1649 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1650 1.1 mrg #ifdef HAVE_INTTYPES_H
1651 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
1652 1.1 mrg "%"PRIu64" (0x%"PRIx64"))",
1653 1.1 mrg (void *) k->host_start,
1654 1.1 mrg (uint64_t) size, (uint64_t) size);
1655 1.1 mrg #else
1656 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
1657 1.1 mrg "%lu (0x%lx))", (void *) k->host_start,
1658 1.1 mrg (unsigned long) size, (unsigned long) size);
1659 1.1 mrg #endif
1660 1.1 mrg }
1661 1.1 mrg break;
1662 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
1663 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
1664 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1665 1.1.1.3 mrg (void *) (tgt->tgt_start
1666 1.1.1.3 mrg + k->tgt_offset),
1667 1.1.1.3 mrg (void *) k->host_start,
1668 1.1.1.10 mrg sizeof (void *), false, cbufp);
1669 1.1 mrg break;
1670 1.1 mrg default:
1671 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1672 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1673 1.1 mrg kind);
1674 1.1 mrg }
1675 1.1.1.2 mrg
1676 1.1.1.8 mrg if (k->aux && k->aux->link_key)
1677 1.1.1.2 mrg {
1678 1.1.1.2 mrg /* Set link pointer on target to the device address of the
1679 1.1.1.2 mrg mapped object. */
1680 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1681 1.1.1.7 mrg /* We intentionally do not use coalescing here, as it's not
1682 1.1.1.7 mrg data allocated by the current call to this function. */
1683 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1684 1.1.1.10 mrg &tgt_addr, sizeof (void *), true, NULL);
1685 1.1.1.2 mrg }
1686 1.1 mrg array++;
1687 1.1 mrg }
1688 1.1 mrg }
1689 1.1 mrg }
1690 1.1 mrg
1691 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
1692 1.1 mrg {
1693 1.1 mrg for (i = 0; i < mapnum; i++)
1694 1.1 mrg {
1695 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1696 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1697 1.1.1.3 mrg (void *) (tgt->tgt_start + i * sizeof (void *)),
1698 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
1699 1.1.1.10 mrg true, cbufp);
1700 1.1 mrg }
1701 1.1 mrg }
1702 1.1 mrg
1703 1.1.1.6 mrg if (cbufp)
1704 1.1.1.6 mrg {
1705 1.1.1.10 mrg /* See 'gomp_coalesce_buf_add'. */
1706 1.1.1.10 mrg assert (!aq);
1707 1.1.1.10 mrg
1708 1.1.1.6 mrg long c = 0;
1709 1.1.1.6 mrg for (c = 0; c < cbuf.chunk_cnt; ++c)
1710 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1711 1.1.1.7 mrg (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1712 1.1.1.7 mrg (char *) cbuf.buf + (cbuf.chunks[c].start
1713 1.1.1.7 mrg - cbuf.chunks[0].start),
1714 1.1.1.10 mrg cbuf.chunks[c].end - cbuf.chunks[c].start,
1715 1.1.1.10 mrg true, NULL);
1716 1.1.1.6 mrg free (cbuf.buf);
1717 1.1.1.7 mrg cbuf.buf = NULL;
1718 1.1.1.7 mrg cbufp = NULL;
1719 1.1.1.6 mrg }
1720 1.1.1.6 mrg
1721 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped,
1722 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1723 1.1.1.2 mrg gomp_exit_data. */
1724 1.1.1.10 mrg if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1725 1.1.1.2 mrg {
1726 1.1.1.2 mrg free (tgt);
1727 1.1.1.2 mrg tgt = NULL;
1728 1.1.1.2 mrg }
1729 1.1.1.2 mrg
1730 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1731 1.1 mrg return tgt;
1732 1.1 mrg }
1733 1.1 mrg
1734 1.1.1.10 mrg static struct target_mem_desc *
1735 1.1.1.8 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1736 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1737 1.1.1.10 mrg bool short_mapkind, htab_t *refcount_set,
1738 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind)
1739 1.1.1.8 mrg {
1740 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers
1741 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */
1742 1.1.1.10 mrg htab_t local_refcount_set = NULL;
1743 1.1.1.10 mrg if (refcount_set == NULL)
1744 1.1.1.10 mrg {
1745 1.1.1.10 mrg local_refcount_set = htab_create (mapnum);
1746 1.1.1.10 mrg refcount_set = &local_refcount_set;
1747 1.1.1.10 mrg }
1748 1.1.1.10 mrg
1749 1.1.1.10 mrg struct target_mem_desc *tgt;
1750 1.1.1.10 mrg tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1751 1.1.1.10 mrg sizes, kinds, short_mapkind, refcount_set,
1752 1.1.1.10 mrg pragma_kind);
1753 1.1.1.10 mrg if (local_refcount_set)
1754 1.1.1.10 mrg htab_free (local_refcount_set);
1755 1.1.1.10 mrg
1756 1.1.1.10 mrg return tgt;
1757 1.1.1.8 mrg }
1758 1.1.1.8 mrg
1759 1.1.1.8 mrg attribute_hidden struct target_mem_desc *
1760 1.1.1.10 mrg goacc_map_vars (struct gomp_device_descr *devicep,
1761 1.1.1.10 mrg struct goacc_asyncqueue *aq, size_t mapnum,
1762 1.1.1.10 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
1763 1.1.1.10 mrg void *kinds, bool short_mapkind,
1764 1.1.1.10 mrg enum gomp_map_vars_kind pragma_kind)
1765 1.1.1.8 mrg {
1766 1.1.1.8 mrg return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1767 1.1.1.10 mrg sizes, kinds, short_mapkind, NULL,
1768 1.1.1.10 mrg GOMP_MAP_VARS_OPENACC | pragma_kind);
1769 1.1.1.8 mrg }
1770 1.1.1.8 mrg
1771 1.1 mrg static void
1772 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
1773 1.1 mrg {
1774 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1775 1.1 mrg if (tgt->tgt_end)
1776 1.1.1.3 mrg gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1777 1.1 mrg
1778 1.1 mrg free (tgt->array);
1779 1.1 mrg free (tgt);
1780 1.1 mrg }
1781 1.1 mrg
1782 1.1.1.8 mrg static bool
1783 1.1.1.8 mrg gomp_unref_tgt (void *ptr)
1784 1.1.1.7 mrg {
1785 1.1.1.7 mrg bool is_tgt_unmapped = false;
1786 1.1.1.8 mrg
1787 1.1.1.8 mrg struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1788 1.1.1.8 mrg
1789 1.1.1.8 mrg if (tgt->refcount > 1)
1790 1.1.1.8 mrg tgt->refcount--;
1791 1.1.1.7 mrg else
1792 1.1.1.7 mrg {
1793 1.1.1.8 mrg gomp_unmap_tgt (tgt);
1794 1.1.1.7 mrg is_tgt_unmapped = true;
1795 1.1.1.7 mrg }
1796 1.1.1.8 mrg
1797 1.1.1.8 mrg return is_tgt_unmapped;
1798 1.1.1.8 mrg }
1799 1.1.1.8 mrg
1800 1.1.1.8 mrg static void
1801 1.1.1.8 mrg gomp_unref_tgt_void (void *ptr)
1802 1.1.1.8 mrg {
1803 1.1.1.8 mrg (void) gomp_unref_tgt (ptr);
1804 1.1.1.8 mrg }
1805 1.1.1.8 mrg
1806 1.1.1.10 mrg static void
1807 1.1.1.10 mrg gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1808 1.1.1.8 mrg {
1809 1.1.1.10 mrg splay_tree_remove (sp, k);
1810 1.1.1.8 mrg if (k->aux)
1811 1.1.1.8 mrg {
1812 1.1.1.8 mrg if (k->aux->link_key)
1813 1.1.1.10 mrg splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1814 1.1.1.8 mrg if (k->aux->attach_count)
1815 1.1.1.8 mrg free (k->aux->attach_count);
1816 1.1.1.8 mrg free (k->aux);
1817 1.1.1.8 mrg k->aux = NULL;
1818 1.1.1.8 mrg }
1819 1.1.1.10 mrg }
1820 1.1.1.10 mrg
1821 1.1.1.10 mrg static inline __attribute__((always_inline)) bool
1822 1.1.1.10 mrg gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1823 1.1.1.10 mrg struct goacc_asyncqueue *aq)
1824 1.1.1.10 mrg {
1825 1.1.1.10 mrg bool is_tgt_unmapped = false;
1826 1.1.1.10 mrg
1827 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_P (k->refcount))
1828 1.1.1.10 mrg {
1829 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1830 1.1.1.10 mrg /* Infer the splay_tree_key of the first structelem key using the
1831 1.1.1.10 mrg pointer to the first structleme_refcount. */
1832 1.1.1.10 mrg k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1833 1.1.1.10 mrg - offsetof (struct splay_tree_key_s,
1834 1.1.1.10 mrg structelem_refcount));
1835 1.1.1.10 mrg assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1836 1.1.1.10 mrg
1837 1.1.1.10 mrg /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838 1.1.1.10 mrg with the splay_tree_keys embedded inside. */
1839 1.1.1.10 mrg splay_tree_node node =
1840 1.1.1.10 mrg (splay_tree_node) ((char *) k
1841 1.1.1.10 mrg - offsetof (struct splay_tree_node_s, key));
1842 1.1.1.10 mrg while (true)
1843 1.1.1.10 mrg {
1844 1.1.1.10 mrg /* Starting from the _FIRST key, and continue for all following
1845 1.1.1.10 mrg sibling keys. */
1846 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k);
1847 1.1.1.10 mrg if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1848 1.1.1.10 mrg break;
1849 1.1.1.10 mrg else
1850 1.1.1.10 mrg k = &(++node)->key;
1851 1.1.1.10 mrg }
1852 1.1.1.10 mrg }
1853 1.1.1.10 mrg else
1854 1.1.1.10 mrg gomp_remove_splay_tree_key (&devicep->mem_map, k);
1855 1.1.1.10 mrg
1856 1.1.1.8 mrg if (aq)
1857 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1858 1.1.1.8 mrg (void *) k->tgt);
1859 1.1.1.8 mrg else
1860 1.1.1.8 mrg is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1861 1.1.1.7 mrg return is_tgt_unmapped;
1862 1.1.1.7 mrg }
1863 1.1.1.7 mrg
1864 1.1.1.8 mrg attribute_hidden bool
1865 1.1.1.8 mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1866 1.1.1.8 mrg {
1867 1.1.1.8 mrg return gomp_remove_var_internal (devicep, k, NULL);
1868 1.1.1.8 mrg }
1869 1.1.1.8 mrg
1870 1.1.1.8 mrg /* Remove a variable asynchronously. This actually removes the variable
1871 1.1.1.8 mrg mapping immediately, but retains the linked target_mem_desc until the
1872 1.1.1.8 mrg asynchronous operation has completed (as it may still refer to target
1873 1.1.1.8 mrg memory). The device lock must be held before entry, and remains locked on
1874 1.1.1.8 mrg exit. */
1875 1.1.1.8 mrg
1876 1.1.1.8 mrg attribute_hidden void
1877 1.1.1.8 mrg gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1878 1.1.1.8 mrg struct goacc_asyncqueue *aq)
1879 1.1.1.8 mrg {
1880 1.1.1.8 mrg (void) gomp_remove_var_internal (devicep, k, aq);
1881 1.1.1.8 mrg }
1882 1.1.1.8 mrg
1883 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1884 1.1 mrg variables back from device to host: if it is false, it is assumed that this
1885 1.1.1.3 mrg has been done already. */
1886 1.1 mrg
1887 1.1.1.8 mrg static inline __attribute__((always_inline)) void
1888 1.1.1.8 mrg gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1889 1.1.1.10 mrg htab_t *refcount_set, struct goacc_asyncqueue *aq)
1890 1.1 mrg {
1891 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
1892 1.1 mrg
1893 1.1 mrg if (tgt->list_count == 0)
1894 1.1 mrg {
1895 1.1 mrg free (tgt);
1896 1.1 mrg return;
1897 1.1 mrg }
1898 1.1 mrg
1899 1.1 mrg gomp_mutex_lock (&devicep->lock);
1900 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1901 1.1.1.2 mrg {
1902 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1903 1.1.1.2 mrg free (tgt->array);
1904 1.1.1.2 mrg free (tgt);
1905 1.1.1.2 mrg return;
1906 1.1.1.2 mrg }
1907 1.1 mrg
1908 1.1 mrg size_t i;
1909 1.1.1.8 mrg
1910 1.1.1.8 mrg /* We must perform detachments before any copies back to the host. */
1911 1.1.1.8 mrg for (i = 0; i < tgt->list_count; i++)
1912 1.1.1.8 mrg {
1913 1.1.1.8 mrg splay_tree_key k = tgt->list[i].key;
1914 1.1.1.8 mrg
1915 1.1.1.8 mrg if (k != NULL && tgt->list[i].is_attach)
1916 1.1.1.8 mrg gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1917 1.1.1.8 mrg + tgt->list[i].offset,
1918 1.1.1.8 mrg false, NULL);
1919 1.1.1.8 mrg }
1920 1.1.1.8 mrg
1921 1.1 mrg for (i = 0; i < tgt->list_count; i++)
1922 1.1.1.2 mrg {
1923 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key;
1924 1.1.1.2 mrg if (k == NULL)
1925 1.1.1.2 mrg continue;
1926 1.1.1.2 mrg
1927 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928 1.1.1.8 mrg counts ('n->refcount', 'n->dynamic_refcount'). */
1929 1.1.1.8 mrg if (tgt->list[i].is_attach)
1930 1.1.1.8 mrg continue;
1931 1.1.1.8 mrg
1932 1.1.1.10 mrg bool do_copy, do_remove;
1933 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1934 1.1.1.2 mrg
1935 1.1.1.10 mrg if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1936 1.1.1.2 mrg || tgt->list[i].always_copy_from)
1937 1.1.1.8 mrg gomp_copy_dev2host (devicep, aq,
1938 1.1.1.3 mrg (void *) (k->host_start + tgt->list[i].offset),
1939 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
1940 1.1.1.3 mrg + tgt->list[i].offset),
1941 1.1.1.3 mrg tgt->list[i].length);
1942 1.1.1.10 mrg if (do_remove)
1943 1.1.1.8 mrg {
1944 1.1.1.8 mrg struct target_mem_desc *k_tgt = k->tgt;
1945 1.1.1.8 mrg bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1946 1.1.1.8 mrg /* It would be bad if TGT got unmapped while we're still iterating
1947 1.1.1.8 mrg over its LIST_COUNT, and also expect to use it in the following
1948 1.1.1.8 mrg code. */
1949 1.1.1.8 mrg assert (!is_tgt_unmapped
1950 1.1.1.8 mrg || k_tgt != tgt);
1951 1.1.1.8 mrg }
1952 1.1.1.2 mrg }
1953 1.1 mrg
1954 1.1.1.8 mrg if (aq)
1955 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1956 1.1.1.8 mrg (void *) tgt);
1957 1.1 mrg else
1958 1.1.1.8 mrg gomp_unref_tgt ((void *) tgt);
1959 1.1 mrg
1960 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1961 1.1 mrg }
1962 1.1 mrg
1963 1.1.1.10 mrg static void
1964 1.1.1.10 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1965 1.1.1.10 mrg htab_t *refcount_set)
1966 1.1.1.8 mrg {
1967 1.1.1.10 mrg /* This management of a local refcount_set is for convenience of callers
1968 1.1.1.10 mrg who do not share a refcount_set over multiple map/unmap uses. */
1969 1.1.1.10 mrg htab_t local_refcount_set = NULL;
1970 1.1.1.10 mrg if (refcount_set == NULL)
1971 1.1.1.10 mrg {
1972 1.1.1.10 mrg local_refcount_set = htab_create (tgt->list_count);
1973 1.1.1.10 mrg refcount_set = &local_refcount_set;
1974 1.1.1.10 mrg }
1975 1.1.1.10 mrg
1976 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1977 1.1.1.10 mrg
1978 1.1.1.10 mrg if (local_refcount_set)
1979 1.1.1.10 mrg htab_free (local_refcount_set);
1980 1.1.1.8 mrg }
1981 1.1.1.8 mrg
1982 1.1.1.8 mrg attribute_hidden void
1983 1.1.1.10 mrg goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1984 1.1.1.10 mrg struct goacc_asyncqueue *aq)
1985 1.1.1.8 mrg {
1986 1.1.1.10 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1987 1.1.1.8 mrg }
1988 1.1.1.8 mrg
1989 1.1 mrg static void
1990 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1991 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind)
1992 1.1 mrg {
1993 1.1 mrg size_t i;
1994 1.1 mrg struct splay_tree_key_s cur_node;
1995 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
1996 1.1 mrg
1997 1.1 mrg if (!devicep)
1998 1.1 mrg return;
1999 1.1 mrg
2000 1.1 mrg if (mapnum == 0)
2001 1.1 mrg return;
2002 1.1 mrg
2003 1.1 mrg gomp_mutex_lock (&devicep->lock);
2004 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2005 1.1.1.2 mrg {
2006 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2007 1.1.1.2 mrg return;
2008 1.1.1.2 mrg }
2009 1.1.1.2 mrg
2010 1.1 mrg for (i = 0; i < mapnum; i++)
2011 1.1 mrg if (sizes[i])
2012 1.1 mrg {
2013 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
2014 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
2015 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2016 1.1 mrg if (n)
2017 1.1 mrg {
2018 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
2019 1.1 mrg if (n->host_start > cur_node.host_start
2020 1.1 mrg || n->host_end < cur_node.host_end)
2021 1.1 mrg {
2022 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2023 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
2024 1.1 mrg "only [%p..%p) is mapped",
2025 1.1 mrg (void *) cur_node.host_start,
2026 1.1 mrg (void *) cur_node.host_end,
2027 1.1 mrg (void *) n->host_start,
2028 1.1 mrg (void *) n->host_end);
2029 1.1 mrg }
2030 1.1.1.3 mrg
2031 1.1.1.10 mrg if (n->aux && n->aux->attach_count)
2032 1.1.1.10 mrg {
2033 1.1.1.10 mrg uintptr_t addr = cur_node.host_start;
2034 1.1.1.10 mrg while (addr < cur_node.host_end)
2035 1.1.1.10 mrg {
2036 1.1.1.10 mrg /* We have to be careful not to overwrite still attached
2037 1.1.1.10 mrg pointers during host<->device updates. */
2038 1.1.1.10 mrg size_t i = (addr - cur_node.host_start) / sizeof (void *);
2039 1.1.1.10 mrg if (n->aux->attach_count[i] == 0)
2040 1.1.1.10 mrg {
2041 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start
2042 1.1.1.10 mrg + n->tgt_offset
2043 1.1.1.10 mrg + addr - n->host_start);
2044 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
2045 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL,
2046 1.1.1.10 mrg devaddr, (void *) addr,
2047 1.1.1.10 mrg sizeof (void *), false, NULL);
2048 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2049 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL,
2050 1.1.1.10 mrg (void *) addr, devaddr,
2051 1.1.1.10 mrg sizeof (void *));
2052 1.1.1.10 mrg }
2053 1.1.1.10 mrg addr += sizeof (void *);
2054 1.1.1.10 mrg }
2055 1.1.1.10 mrg }
2056 1.1.1.10 mrg else
2057 1.1.1.10 mrg {
2058 1.1.1.10 mrg void *hostaddr = (void *) cur_node.host_start;
2059 1.1.1.10 mrg void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2060 1.1.1.10 mrg + cur_node.host_start
2061 1.1.1.10 mrg - n->host_start);
2062 1.1.1.10 mrg size_t size = cur_node.host_end - cur_node.host_start;
2063 1.1.1.3 mrg
2064 1.1.1.10 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
2065 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2066 1.1.1.10 mrg false, NULL);
2067 1.1.1.10 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2068 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2069 1.1.1.10 mrg }
2070 1.1 mrg }
2071 1.1 mrg }
2072 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2073 1.1 mrg }
2074 1.1 mrg
2075 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
2077 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler
2078 1.1.1.2 mrg emitting variable and functions in the same order. */
2079 1.1 mrg
2080 1.1 mrg static void
2081 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2082 1.1.1.2 mrg const void *host_table, const void *target_data,
2083 1.1.1.2 mrg bool is_register_lock)
2084 1.1 mrg {
2085 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
2086 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
2087 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
2088 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
2089 1.1 mrg
2090 1.1 mrg /* The func table contains only addresses, the var table contains addresses
2091 1.1 mrg and corresponding sizes. */
2092 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
2093 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
2094 1.1 mrg
2095 1.1.1.10 mrg /* Others currently is only 'device_num' */
2096 1.1.1.10 mrg int num_others = 1;
2097 1.1.1.10 mrg
2098 1.1 mrg /* Load image to device and get target addresses for the image. */
2099 1.1 mrg struct addr_pair *target_table = NULL;
2100 1.1.1.2 mrg int i, num_target_entries;
2101 1.1.1.2 mrg
2102 1.1.1.2 mrg num_target_entries
2103 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version,
2104 1.1.1.2 mrg target_data, &target_table);
2105 1.1 mrg
2106 1.1.1.10 mrg if (num_target_entries != num_funcs + num_vars
2107 1.1.1.10 mrg /* Others (device_num) are included as trailing entries in pair list. */
2108 1.1.1.10 mrg && num_target_entries != num_funcs + num_vars + num_others)
2109 1.1 mrg {
2110 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2111 1.1 mrg if (is_register_lock)
2112 1.1 mrg gomp_mutex_unlock (®ister_lock);
2113 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables"
2114 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars,
2115 1.1.1.2 mrg num_target_entries);
2116 1.1 mrg }
2117 1.1 mrg
2118 1.1 mrg /* Insert host-target address mapping into splay tree. */
2119 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2120 1.1 mrg tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2121 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY;
2122 1.1 mrg tgt->tgt_start = 0;
2123 1.1 mrg tgt->tgt_end = 0;
2124 1.1 mrg tgt->to_free = NULL;
2125 1.1 mrg tgt->prev = NULL;
2126 1.1 mrg tgt->list_count = 0;
2127 1.1 mrg tgt->device_descr = devicep;
2128 1.1 mrg splay_tree_node array = tgt->array;
2129 1.1 mrg
2130 1.1 mrg for (i = 0; i < num_funcs; i++)
2131 1.1 mrg {
2132 1.1 mrg splay_tree_key k = &array->key;
2133 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
2134 1.1 mrg k->host_end = k->host_start + 1;
2135 1.1 mrg k->tgt = tgt;
2136 1.1 mrg k->tgt_offset = target_table[i].start;
2137 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
2138 1.1.1.7 mrg k->dynamic_refcount = 0;
2139 1.1.1.8 mrg k->aux = NULL;
2140 1.1 mrg array->left = NULL;
2141 1.1 mrg array->right = NULL;
2142 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
2143 1.1 mrg array++;
2144 1.1 mrg }
2145 1.1 mrg
2146 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
2147 1.1.1.2 mrg "omp declare target link" variables. */
2148 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2149 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
2150 1.1.1.2 mrg
2151 1.1 mrg for (i = 0; i < num_vars; i++)
2152 1.1 mrg {
2153 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
2154 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start;
2155 1.1.1.8 mrg bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2156 1.1.1.2 mrg
2157 1.1.1.8 mrg if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2158 1.1 mrg {
2159 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2160 1.1 mrg if (is_register_lock)
2161 1.1 mrg gomp_mutex_unlock (®ister_lock);
2162 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)");
2163 1.1 mrg }
2164 1.1 mrg
2165 1.1 mrg splay_tree_key k = &array->key;
2166 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
2167 1.1.1.2 mrg k->host_end
2168 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2169 1.1 mrg k->tgt = tgt;
2170 1.1 mrg k->tgt_offset = target_var->start;
2171 1.1.1.8 mrg k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2172 1.1.1.7 mrg k->dynamic_refcount = 0;
2173 1.1.1.8 mrg k->aux = NULL;
2174 1.1 mrg array->left = NULL;
2175 1.1 mrg array->right = NULL;
2176 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
2177 1.1 mrg array++;
2178 1.1 mrg }
2179 1.1 mrg
2180 1.1.1.10 mrg /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181 1.1.1.10 mrg where plugin does not return this entry. */
2182 1.1.1.10 mrg if (num_funcs + num_vars < num_target_entries)
2183 1.1.1.10 mrg {
2184 1.1.1.10 mrg struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2185 1.1.1.10 mrg /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186 1.1.1.10 mrg was found in this image. */
2187 1.1.1.10 mrg if (device_num_var->start != 0)
2188 1.1.1.10 mrg {
2189 1.1.1.10 mrg /* The index of the devicep within devices[] is regarded as its
2190 1.1.1.10 mrg 'device number', which is different from the per-device type
2191 1.1.1.10 mrg devicep->target_id. */
2192 1.1.1.10 mrg int device_num_val = (int) (devicep - &devices[0]);
2193 1.1.1.10 mrg if (device_num_var->end - device_num_var->start != sizeof (int))
2194 1.1.1.10 mrg {
2195 1.1.1.10 mrg gomp_mutex_unlock (&devicep->lock);
2196 1.1.1.10 mrg if (is_register_lock)
2197 1.1.1.10 mrg gomp_mutex_unlock (®ister_lock);
2198 1.1.1.10 mrg gomp_fatal ("offload plugin managed 'device_num' not of expected "
2199 1.1.1.10 mrg "format");
2200 1.1.1.10 mrg }
2201 1.1.1.10 mrg
2202 1.1.1.10 mrg /* Copy device_num value to place on device memory, hereby actually
2203 1.1.1.10 mrg designating its device number into effect. */
2204 1.1.1.10 mrg gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2205 1.1.1.10 mrg &device_num_val, sizeof (int), false, NULL);
2206 1.1.1.10 mrg }
2207 1.1.1.10 mrg }
2208 1.1.1.10 mrg
2209 1.1 mrg free (target_table);
2210 1.1 mrg }
2211 1.1 mrg
2212 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P.
2213 1.1.1.2 mrg The device must be locked. */
2214 1.1.1.2 mrg
2215 1.1.1.2 mrg static void
2216 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2217 1.1.1.2 mrg unsigned version,
2218 1.1.1.2 mrg const void *host_table, const void *target_data)
2219 1.1.1.2 mrg {
2220 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0];
2221 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1];
2222 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2];
2223 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3];
2224 1.1.1.2 mrg
2225 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses
2226 1.1.1.2 mrg and corresponding sizes. */
2227 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table;
2228 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2;
2229 1.1.1.2 mrg
2230 1.1.1.2 mrg struct splay_tree_key_s k;
2231 1.1.1.2 mrg splay_tree_key node = NULL;
2232 1.1.1.2 mrg
2233 1.1.1.2 mrg /* Find mapping at start of node array */
2234 1.1.1.2 mrg if (num_funcs || num_vars)
2235 1.1.1.2 mrg {
2236 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2237 1.1.1.2 mrg : (uintptr_t) host_var_table[0]);
2238 1.1.1.2 mrg k.host_end = k.host_start + 1;
2239 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
2240 1.1.1.2 mrg }
2241 1.1.1.2 mrg
2242 1.1.1.3 mrg if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2243 1.1.1.3 mrg {
2244 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
2245 1.1.1.3 mrg gomp_fatal ("image unload fail");
2246 1.1.1.3 mrg }
2247 1.1.1.2 mrg
2248 1.1.1.2 mrg /* Remove mappings from splay tree. */
2249 1.1.1.2 mrg int i;
2250 1.1.1.2 mrg for (i = 0; i < num_funcs; i++)
2251 1.1.1.2 mrg {
2252 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i];
2253 1.1.1.2 mrg k.host_end = k.host_start + 1;
2254 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
2255 1.1.1.2 mrg }
2256 1.1.1.2 mrg
2257 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
2258 1.1.1.2 mrg "omp declare target link" variables. */
2259 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2260 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
2261 1.1.1.2 mrg bool is_tgt_unmapped = false;
2262 1.1.1.2 mrg
2263 1.1.1.2 mrg for (i = 0; i < num_vars; i++)
2264 1.1.1.2 mrg {
2265 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2];
2266 1.1.1.2 mrg k.host_end
2267 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2268 1.1.1.2 mrg
2269 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2270 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
2271 1.1.1.2 mrg else
2272 1.1.1.2 mrg {
2273 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2274 1.1.1.7 mrg is_tgt_unmapped = gomp_remove_var (devicep, n);
2275 1.1.1.2 mrg }
2276 1.1.1.2 mrg }
2277 1.1.1.2 mrg
2278 1.1.1.2 mrg if (node && !is_tgt_unmapped)
2279 1.1.1.2 mrg {
2280 1.1.1.2 mrg free (node->tgt);
2281 1.1.1.2 mrg free (node);
2282 1.1.1.2 mrg }
2283 1.1.1.2 mrg }
2284 1.1.1.2 mrg
2285 1.1 mrg /* This function should be called from every offload image while loading.
2286 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
2288 1.1 mrg
2289 1.1 mrg void
2290 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
2291 1.1.1.2 mrg int target_type, const void *target_data)
2292 1.1 mrg {
2293 1.1 mrg int i;
2294 1.1.1.2 mrg
2295 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2296 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
2297 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
2298 1.1.1.2 mrg
2299 1.1 mrg gomp_mutex_lock (®ister_lock);
2300 1.1 mrg
2301 1.1 mrg /* Load image to all initialized devices. */
2302 1.1 mrg for (i = 0; i < num_devices; i++)
2303 1.1 mrg {
2304 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
2305 1.1 mrg gomp_mutex_lock (&devicep->lock);
2306 1.1.1.2 mrg if (devicep->type == target_type
2307 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
2308 1.1.1.2 mrg gomp_load_image_to_device (devicep, version,
2309 1.1.1.2 mrg host_table, target_data, true);
2310 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2311 1.1 mrg }
2312 1.1 mrg
2313 1.1 mrg /* Insert image to array of pending images. */
2314 1.1 mrg offload_images
2315 1.1 mrg = gomp_realloc_unlock (offload_images,
2316 1.1 mrg (num_offload_images + 1)
2317 1.1 mrg * sizeof (struct offload_image_descr));
2318 1.1.1.2 mrg offload_images[num_offload_images].version = version;
2319 1.1 mrg offload_images[num_offload_images].type = target_type;
2320 1.1 mrg offload_images[num_offload_images].host_table = host_table;
2321 1.1 mrg offload_images[num_offload_images].target_data = target_data;
2322 1.1 mrg
2323 1.1 mrg num_offload_images++;
2324 1.1 mrg gomp_mutex_unlock (®ister_lock);
2325 1.1 mrg }
2326 1.1 mrg
2327 1.1.1.2 mrg void
2328 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type,
2329 1.1.1.2 mrg const void *target_data)
2330 1.1.1.2 mrg {
2331 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data);
2332 1.1.1.2 mrg }
2333 1.1.1.2 mrg
2334 1.1 mrg /* This function should be called from every offload image while unloading.
2335 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
2337 1.1 mrg
2338 1.1 mrg void
2339 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2340 1.1.1.2 mrg int target_type, const void *target_data)
2341 1.1 mrg {
2342 1.1 mrg int i;
2343 1.1 mrg
2344 1.1 mrg gomp_mutex_lock (®ister_lock);
2345 1.1 mrg
2346 1.1 mrg /* Unload image from all initialized devices. */
2347 1.1 mrg for (i = 0; i < num_devices; i++)
2348 1.1 mrg {
2349 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
2350 1.1 mrg gomp_mutex_lock (&devicep->lock);
2351 1.1.1.2 mrg if (devicep->type == target_type
2352 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
2353 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version,
2354 1.1.1.2 mrg host_table, target_data);
2355 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2356 1.1 mrg }
2357 1.1 mrg
2358 1.1 mrg /* Remove image from array of pending images. */
2359 1.1 mrg for (i = 0; i < num_offload_images; i++)
2360 1.1 mrg if (offload_images[i].target_data == target_data)
2361 1.1 mrg {
2362 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
2363 1.1 mrg break;
2364 1.1 mrg }
2365 1.1 mrg
2366 1.1 mrg gomp_mutex_unlock (®ister_lock);
2367 1.1 mrg }
2368 1.1 mrg
2369 1.1.1.2 mrg void
2370 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type,
2371 1.1.1.2 mrg const void *target_data)
2372 1.1.1.2 mrg {
2373 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2374 1.1.1.2 mrg }
2375 1.1.1.2 mrg
2376 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
2377 1.1 mrg must be locked on entry, and remains locked on return. */
2378 1.1 mrg
2379 1.1 mrg attribute_hidden void
2380 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
2381 1.1 mrg {
2382 1.1 mrg int i;
2383 1.1.1.3 mrg if (!devicep->init_device_func (devicep->target_id))
2384 1.1.1.3 mrg {
2385 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
2386 1.1.1.3 mrg gomp_fatal ("device initialization failed");
2387 1.1.1.3 mrg }
2388 1.1 mrg
2389 1.1 mrg /* Load to device all images registered by the moment. */
2390 1.1 mrg for (i = 0; i < num_offload_images; i++)
2391 1.1 mrg {
2392 1.1 mrg struct offload_image_descr *image = &offload_images[i];
2393 1.1 mrg if (image->type == devicep->type)
2394 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version,
2395 1.1.1.2 mrg image->host_table, image->target_data,
2396 1.1.1.2 mrg false);
2397 1.1 mrg }
2398 1.1 mrg
2399 1.1.1.8 mrg /* Initialize OpenACC asynchronous queues. */
2400 1.1.1.8 mrg goacc_init_asyncqueues (devicep);
2401 1.1.1.8 mrg
2402 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED;
2403 1.1.1.2 mrg }
2404 1.1.1.2 mrg
2405 1.1.1.8 mrg /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2406 1.1.1.8 mrg must be locked on entry, and remains locked on return. */
2407 1.1.1.8 mrg
2408 1.1.1.8 mrg attribute_hidden bool
2409 1.1.1.8 mrg gomp_fini_device (struct gomp_device_descr *devicep)
2410 1.1.1.8 mrg {
2411 1.1.1.8 mrg bool ret = goacc_fini_asyncqueues (devicep);
2412 1.1.1.8 mrg ret &= devicep->fini_device_func (devicep->target_id);
2413 1.1.1.8 mrg devicep->state = GOMP_DEVICE_FINALIZED;
2414 1.1.1.8 mrg return ret;
2415 1.1.1.8 mrg }
2416 1.1.1.8 mrg
2417 1.1.1.2 mrg attribute_hidden void
2418 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep)
2419 1.1.1.2 mrg {
2420 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
2421 1.1.1.2 mrg {
2422 1.1.1.2 mrg unsigned i;
2423 1.1.1.2 mrg
2424 1.1.1.2 mrg /* Unload from device all images registered at the moment. */
2425 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++)
2426 1.1.1.2 mrg {
2427 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i];
2428 1.1.1.2 mrg if (image->type == devicep->type)
2429 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version,
2430 1.1.1.2 mrg image->host_table,
2431 1.1.1.2 mrg image->target_data);
2432 1.1.1.2 mrg }
2433 1.1.1.2 mrg }
2434 1.1 mrg }
2435 1.1 mrg
2436 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */
2437 1.1 mrg
2438 1.1.1.2 mrg static void
2439 1.1.1.10 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2440 1.1.1.10 mrg struct gomp_device_descr *devicep, void **args)
2441 1.1 mrg {
2442 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
2443 1.1.1.10 mrg
2444 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2445 1.1.1.10 mrg && devicep != NULL)
2446 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447 1.1.1.10 mrg "be used for offloading");
2448 1.1.1.10 mrg
2449 1.1.1.2 mrg old_thr = *thr;
2450 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr));
2451 1.1.1.2 mrg if (gomp_places_list)
2452 1.1.1.2 mrg {
2453 1.1.1.2 mrg thr->place = old_thr.place;
2454 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len;
2455 1.1.1.2 mrg }
2456 1.1.1.10 mrg if (args)
2457 1.1.1.10 mrg while (*args)
2458 1.1.1.10 mrg {
2459 1.1.1.10 mrg intptr_t id = (intptr_t) *args++, val;
2460 1.1.1.10 mrg if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2461 1.1.1.10 mrg val = (intptr_t) *args++;
2462 1.1.1.10 mrg else
2463 1.1.1.10 mrg val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2464 1.1.1.10 mrg if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2465 1.1.1.10 mrg continue;
2466 1.1.1.10 mrg id &= GOMP_TARGET_ARG_ID_MASK;
2467 1.1.1.10 mrg if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2468 1.1.1.10 mrg continue;
2469 1.1.1.10 mrg val = val > INT_MAX ? INT_MAX : val;
2470 1.1.1.10 mrg if (val)
2471 1.1.1.10 mrg gomp_icv (true)->thread_limit_var = val;
2472 1.1.1.10 mrg break;
2473 1.1.1.10 mrg }
2474 1.1.1.10 mrg
2475 1.1.1.2 mrg fn (hostaddrs);
2476 1.1.1.2 mrg gomp_free_thread (thr);
2477 1.1.1.2 mrg *thr = old_thr;
2478 1.1 mrg }
2479 1.1 mrg
2480 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared
2481 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2482 1.1.1.2 mrg
2483 1.1.1.2 mrg static inline void
2484 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2485 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align,
2486 1.1.1.2 mrg size_t *tgt_size)
2487 1.1.1.2 mrg {
2488 1.1.1.2 mrg size_t i;
2489 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2490 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2491 1.1.1.2 mrg {
2492 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
2493 1.1.1.2 mrg if (*tgt_align < align)
2494 1.1.1.2 mrg *tgt_align = align;
2495 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2496 1.1.1.2 mrg *tgt_size += sizes[i];
2497 1.1.1.2 mrg }
2498 1.1.1.2 mrg }
2499 1.1.1.2 mrg
2500 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2501 1.1.1.2 mrg
2502 1.1.1.2 mrg static inline void
2503 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2504 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align,
2505 1.1.1.2 mrg size_t tgt_size)
2506 1.1.1.2 mrg {
2507 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2508 1.1.1.2 mrg if (al)
2509 1.1.1.2 mrg tgt += tgt_align - al;
2510 1.1.1.2 mrg tgt_size = 0;
2511 1.1.1.2 mrg size_t i;
2512 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2513 1.1.1.10 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2514 1.1.1.2 mrg {
2515 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
2516 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
2517 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2518 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size;
2519 1.1.1.2 mrg tgt_size = tgt_size + sizes[i];
2520 1.1.1.2 mrg }
2521 1.1.1.2 mrg }
2522 1.1.1.2 mrg
2523 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */
2524 1.1.1.2 mrg
2525 1.1.1.2 mrg static void *
2526 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2527 1.1.1.2 mrg void (*host_fn) (void *))
2528 1.1.1.2 mrg {
2529 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2530 1.1.1.2 mrg return (void *) host_fn;
2531 1.1.1.2 mrg else
2532 1.1.1.2 mrg {
2533 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2534 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2535 1.1.1.2 mrg {
2536 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2537 1.1.1.2 mrg return NULL;
2538 1.1.1.2 mrg }
2539 1.1.1.2 mrg
2540 1.1.1.2 mrg struct splay_tree_key_s k;
2541 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn;
2542 1.1.1.2 mrg k.host_end = k.host_start + 1;
2543 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2544 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2545 1.1.1.2 mrg if (tgt_fn == NULL)
2546 1.1.1.2 mrg return NULL;
2547 1.1.1.2 mrg
2548 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset;
2549 1.1.1.2 mrg }
2550 1.1.1.2 mrg }
2551 1.1.1.2 mrg
2552 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE
2553 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2554 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
2555 1.1 mrg larger than last available hw device), use host fallback.
2556 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
2557 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2558 1.1 mrg with MAPNUM entries, with addresses of the host objects,
2559 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
2560 1.1 mrg and assumed sizeof (void *) size) and kinds. */
2561 1.1 mrg
2562 1.1 mrg void
2563 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
2564 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
2565 1.1 mrg unsigned char *kinds)
2566 1.1 mrg {
2567 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
2568 1.1 mrg
2569 1.1.1.2 mrg void *fn_addr;
2570 1.1 mrg if (devicep == NULL
2571 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2572 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */
2573 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2574 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2575 1.1.1.10 mrg return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2576 1.1.1.2 mrg
2577 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum);
2578 1.1.1.2 mrg struct target_mem_desc *tgt_vars
2579 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2580 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_TARGET);
2581 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2582 1.1.1.2 mrg NULL);
2583 1.1.1.10 mrg htab_clear (refcount_set);
2584 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set);
2585 1.1.1.10 mrg htab_free (refcount_set);
2586 1.1.1.2 mrg }
2587 1.1.1.2 mrg
2588 1.1.1.8 mrg static inline unsigned int
2589 1.1.1.8 mrg clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2590 1.1.1.8 mrg {
2591 1.1.1.8 mrg /* If we cannot run asynchronously, simply ignore nowait. */
2592 1.1.1.8 mrg if (devicep != NULL && devicep->async_run_func == NULL)
2593 1.1.1.8 mrg flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2594 1.1.1.8 mrg
2595 1.1.1.8 mrg return flags;
2596 1.1.1.8 mrg }
2597 1.1.1.8 mrg
2598 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599 1.1.1.2 mrg and several arguments have been added:
2600 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details.
2602 1.1.1.2 mrg
2603 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both
2604 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two
2605 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type
2606 1.1.1.2 mrg and optionally also the value. If the value is not present in the first
2607 1.1.1.2 mrg one, the whole second element the actual value. The last element of the
2608 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example
2609 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT.
2610 1.1.1.2 mrg
2611 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if
2613 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is
2614 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host
2615 1.1.1.2 mrg what value will GOMP_teams have on the device.
2616 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit
2618 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be
2619 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */
2620 1.1.1.2 mrg
2621 1.1.1.2 mrg void
2622 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2623 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
2624 1.1.1.2 mrg unsigned int flags, void **depend, void **args)
2625 1.1.1.2 mrg {
2626 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2627 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0;
2628 1.1.1.2 mrg bool fpc_done = false;
2629 1.1.1.2 mrg
2630 1.1.1.8 mrg flags = clear_unsupported_flags (devicep, flags);
2631 1.1.1.8 mrg
2632 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT)
2633 1.1 mrg {
2634 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2635 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait
2636 1.1.1.2 mrg target tasks make sense to run asynchronously even when
2637 1.1.1.2 mrg outside of any parallel. */
2638 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0))
2639 1.1.1.2 mrg {
2640 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1);
2641 1.1.1.2 mrg struct gomp_task *task = thr->task;
2642 1.1.1.9 mrg struct gomp_task **implicit_task = &task;
2643 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2644 1.1.1.2 mrg team->prev_ts = thr->ts;
2645 1.1.1.2 mrg thr->ts.team = team;
2646 1.1.1.2 mrg thr->ts.team_id = 0;
2647 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
2648 1.1.1.2 mrg thr->ts.last_work_share = NULL;
2649 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS
2650 1.1.1.2 mrg thr->ts.single_count = 0;
2651 1.1.1.2 mrg #endif
2652 1.1.1.2 mrg thr->ts.static_trip = 0;
2653 1.1.1.2 mrg thr->task = &team->implicit_task[0];
2654 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv);
2655 1.1.1.9 mrg while (*implicit_task
2656 1.1.1.9 mrg && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2657 1.1.1.9 mrg implicit_task = &(*implicit_task)->parent;
2658 1.1.1.9 mrg if (*implicit_task)
2659 1.1.1.2 mrg {
2660 1.1.1.9 mrg thr->task = *implicit_task;
2661 1.1.1.2 mrg gomp_end_task ();
2662 1.1.1.9 mrg free (*implicit_task);
2663 1.1.1.2 mrg thr->task = &team->implicit_task[0];
2664 1.1.1.2 mrg }
2665 1.1.1.2 mrg else
2666 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr);
2667 1.1.1.9 mrg if (implicit_task != &task)
2668 1.1.1.9 mrg {
2669 1.1.1.9 mrg *implicit_task = thr->task;
2670 1.1.1.9 mrg thr->task = task;
2671 1.1.1.9 mrg }
2672 1.1.1.2 mrg }
2673 1.1.1.2 mrg if (thr->ts.team
2674 1.1.1.2 mrg && !thr->task->final_task)
2675 1.1.1.2 mrg {
2676 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2677 1.1.1.2 mrg sizes, kinds, flags, depend, args,
2678 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP);
2679 1.1.1.2 mrg return;
2680 1.1.1.2 mrg }
2681 1.1 mrg }
2682 1.1 mrg
2683 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present
2684 1.1.1.2 mrg (or we are in a final task), block the parent task until the
2685 1.1.1.2 mrg dependencies are resolved and then just continue with the rest
2686 1.1.1.2 mrg of the function as if it is a merged task. */
2687 1.1.1.2 mrg if (depend != NULL)
2688 1.1.1.2 mrg {
2689 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2690 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2691 1.1.1.2 mrg {
2692 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */
2693 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2694 1.1.1.2 mrg &tgt_align, &tgt_size);
2695 1.1.1.2 mrg if (tgt_align)
2696 1.1.1.2 mrg {
2697 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2698 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2699 1.1.1.2 mrg tgt_align, tgt_size);
2700 1.1.1.2 mrg }
2701 1.1.1.2 mrg fpc_done = true;
2702 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2703 1.1.1.2 mrg }
2704 1.1.1.2 mrg }
2705 1.1 mrg
2706 1.1 mrg void *fn_addr;
2707 1.1.1.2 mrg if (devicep == NULL
2708 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2709 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2710 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2711 1.1 mrg {
2712 1.1.1.2 mrg if (!fpc_done)
2713 1.1 mrg {
2714 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2715 1.1.1.2 mrg &tgt_align, &tgt_size);
2716 1.1.1.2 mrg if (tgt_align)
2717 1.1.1.2 mrg {
2718 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2719 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2720 1.1.1.2 mrg tgt_align, tgt_size);
2721 1.1.1.2 mrg }
2722 1.1 mrg }
2723 1.1.1.10 mrg gomp_target_fallback (fn, hostaddrs, devicep, args);
2724 1.1.1.2 mrg return;
2725 1.1.1.2 mrg }
2726 1.1 mrg
2727 1.1.1.2 mrg struct target_mem_desc *tgt_vars;
2728 1.1.1.10 mrg htab_t refcount_set = NULL;
2729 1.1.1.10 mrg
2730 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2731 1.1.1.2 mrg {
2732 1.1.1.2 mrg if (!fpc_done)
2733 1.1.1.2 mrg {
2734 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2735 1.1.1.2 mrg &tgt_align, &tgt_size);
2736 1.1.1.2 mrg if (tgt_align)
2737 1.1.1.2 mrg {
2738 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2739 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2740 1.1.1.2 mrg tgt_align, tgt_size);
2741 1.1.1.2 mrg }
2742 1.1.1.2 mrg }
2743 1.1.1.2 mrg tgt_vars = NULL;
2744 1.1 mrg }
2745 1.1.1.2 mrg else
2746 1.1.1.10 mrg {
2747 1.1.1.10 mrg refcount_set = htab_create (mapnum);
2748 1.1.1.10 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2749 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_TARGET);
2750 1.1.1.10 mrg }
2751 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr,
2752 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2753 1.1.1.2 mrg args);
2754 1.1.1.2 mrg if (tgt_vars)
2755 1.1.1.10 mrg {
2756 1.1.1.10 mrg htab_clear (refcount_set);
2757 1.1.1.10 mrg gomp_unmap_vars (tgt_vars, true, &refcount_set);
2758 1.1.1.10 mrg }
2759 1.1.1.10 mrg if (refcount_set)
2760 1.1.1.10 mrg htab_free (refcount_set);
2761 1.1.1.2 mrg }
2762 1.1 mrg
2763 1.1.1.2 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */
2764 1.1.1.2 mrg
2765 1.1.1.2 mrg static void
2766 1.1.1.10 mrg gomp_target_data_fallback (struct gomp_device_descr *devicep)
2767 1.1.1.2 mrg {
2768 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (false);
2769 1.1.1.10 mrg
2770 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2771 1.1.1.10 mrg && devicep != NULL)
2772 1.1.1.10 mrg gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2773 1.1.1.10 mrg "be used for offloading");
2774 1.1.1.10 mrg
2775 1.1.1.2 mrg if (icv->target_data)
2776 1.1 mrg {
2777 1.1.1.2 mrg /* Even when doing a host fallback, if there are any active
2778 1.1.1.2 mrg #pragma omp target data constructs, need to remember the
2779 1.1.1.2 mrg new #pragma omp target data, otherwise GOMP_target_end_data
2780 1.1.1.2 mrg would get out of sync. */
2781 1.1.1.2 mrg struct target_mem_desc *tgt
2782 1.1.1.2 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2783 1.1.1.10 mrg NULL, GOMP_MAP_VARS_DATA);
2784 1.1.1.2 mrg tgt->prev = icv->target_data;
2785 1.1.1.2 mrg icv->target_data = tgt;
2786 1.1 mrg }
2787 1.1 mrg }
2788 1.1 mrg
2789 1.1 mrg void
2790 1.1 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
2791 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
2792 1.1 mrg {
2793 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
2794 1.1 mrg
2795 1.1 mrg if (devicep == NULL
2796 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2797 1.1.1.2 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2798 1.1.1.10 mrg return gomp_target_data_fallback (devicep);
2799 1.1 mrg
2800 1.1 mrg struct target_mem_desc *tgt
2801 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2802 1.1.1.10 mrg NULL, GOMP_MAP_VARS_DATA);
2803 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (true);
2804 1.1.1.2 mrg tgt->prev = icv->target_data;
2805 1.1.1.2 mrg icv->target_data = tgt;
2806 1.1.1.2 mrg }
2807 1.1.1.2 mrg
2808 1.1.1.2 mrg void
2809 1.1.1.2 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2810 1.1.1.2 mrg size_t *sizes, unsigned short *kinds)
2811 1.1.1.2 mrg {
2812 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2813 1.1.1.2 mrg
2814 1.1.1.2 mrg if (devicep == NULL
2815 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2817 1.1.1.10 mrg return gomp_target_data_fallback (devicep);
2818 1.1.1.2 mrg
2819 1.1.1.2 mrg struct target_mem_desc *tgt
2820 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2821 1.1.1.10 mrg NULL, GOMP_MAP_VARS_DATA);
2822 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
2823 1.1 mrg tgt->prev = icv->target_data;
2824 1.1 mrg icv->target_data = tgt;
2825 1.1 mrg }
2826 1.1 mrg
2827 1.1 mrg void
2828 1.1 mrg GOMP_target_end_data (void)
2829 1.1 mrg {
2830 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
2831 1.1 mrg if (icv->target_data)
2832 1.1 mrg {
2833 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
2834 1.1 mrg icv->target_data = tgt->prev;
2835 1.1.1.10 mrg gomp_unmap_vars (tgt, true, NULL);
2836 1.1 mrg }
2837 1.1 mrg }
2838 1.1 mrg
2839 1.1 mrg void
2840 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
2841 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
2842 1.1 mrg {
2843 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
2844 1.1 mrg
2845 1.1 mrg if (devicep == NULL
2846 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2847 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2848 1.1.1.2 mrg return;
2849 1.1.1.2 mrg
2850 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2851 1.1.1.2 mrg }
2852 1.1.1.2 mrg
2853 1.1.1.2 mrg void
2854 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2855 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
2856 1.1.1.2 mrg unsigned int flags, void **depend)
2857 1.1.1.2 mrg {
2858 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2859 1.1.1.2 mrg
2860 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
2861 1.1.1.2 mrg block the parent task until the dependencies are resolved
2862 1.1.1.2 mrg and then just continue with the rest of the function as if it
2863 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
2864 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
2865 1.1.1.2 mrg are not present. */
2866 1.1.1.2 mrg if (depend != NULL)
2867 1.1.1.2 mrg {
2868 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2869 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2870 1.1.1.2 mrg {
2871 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2872 1.1.1.2 mrg && thr->ts.team
2873 1.1.1.2 mrg && !thr->task->final_task)
2874 1.1.1.2 mrg {
2875 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2876 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
2877 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE,
2878 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA))
2879 1.1.1.2 mrg return;
2880 1.1.1.2 mrg }
2881 1.1.1.2 mrg else
2882 1.1.1.2 mrg {
2883 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2884 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
2885 1.1.1.2 mrg tasks. */
2886 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2887 1.1.1.7 mrg {
2888 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2889 1.1.1.7 mrg return;
2890 1.1.1.7 mrg if (thr->task->taskgroup)
2891 1.1.1.7 mrg {
2892 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2893 1.1.1.7 mrg return;
2894 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2895 1.1.1.7 mrg && thr->task->taskgroup->prev
2896 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2897 1.1.1.7 mrg return;
2898 1.1.1.7 mrg }
2899 1.1.1.7 mrg }
2900 1.1.1.2 mrg
2901 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2902 1.1.1.2 mrg }
2903 1.1.1.2 mrg }
2904 1.1.1.2 mrg }
2905 1.1.1.2 mrg
2906 1.1.1.2 mrg if (devicep == NULL
2907 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2908 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2909 1.1.1.2 mrg return;
2910 1.1.1.2 mrg
2911 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2912 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2913 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2914 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2915 1.1.1.7 mrg {
2916 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2917 1.1.1.7 mrg return;
2918 1.1.1.7 mrg if (thr->task->taskgroup)
2919 1.1.1.7 mrg {
2920 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2921 1.1.1.7 mrg return;
2922 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2923 1.1.1.7 mrg && thr->task->taskgroup->prev
2924 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2925 1.1.1.7 mrg return;
2926 1.1.1.7 mrg }
2927 1.1.1.7 mrg }
2928 1.1 mrg
2929 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2930 1.1.1.2 mrg }
2931 1.1.1.2 mrg
2932 1.1.1.2 mrg static void
2933 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2934 1.1.1.10 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
2935 1.1.1.10 mrg htab_t *refcount_set)
2936 1.1.1.2 mrg {
2937 1.1.1.2 mrg const int typemask = 0xff;
2938 1.1.1.2 mrg size_t i;
2939 1.1 mrg gomp_mutex_lock (&devicep->lock);
2940 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2941 1.1.1.2 mrg {
2942 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2943 1.1.1.2 mrg return;
2944 1.1.1.2 mrg }
2945 1.1.1.2 mrg
2946 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2947 1.1.1.10 mrg if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2948 1.1.1.10 mrg {
2949 1.1.1.10 mrg struct splay_tree_key_s cur_node;
2950 1.1.1.10 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
2951 1.1.1.10 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
2952 1.1.1.10 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2953 1.1.1.10 mrg
2954 1.1.1.10 mrg if (n)
2955 1.1.1.10 mrg gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2956 1.1.1.10 mrg false, NULL);
2957 1.1.1.10 mrg }
2958 1.1.1.10 mrg
2959 1.1.1.10 mrg int nrmvars = 0;
2960 1.1.1.10 mrg splay_tree_key remove_vars[mapnum];
2961 1.1.1.10 mrg
2962 1.1.1.10 mrg for (i = 0; i < mapnum; i++)
2963 1.1.1.2 mrg {
2964 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2965 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask;
2966 1.1.1.2 mrg switch (kind)
2967 1.1.1.2 mrg {
2968 1.1.1.2 mrg case GOMP_MAP_FROM:
2969 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
2970 1.1.1.2 mrg case GOMP_MAP_DELETE:
2971 1.1.1.2 mrg case GOMP_MAP_RELEASE:
2972 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2973 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2974 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
2975 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
2976 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2977 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2978 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2979 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node);
2980 1.1.1.2 mrg if (!k)
2981 1.1.1.2 mrg continue;
2982 1.1.1.2 mrg
2983 1.1.1.10 mrg bool delete_p = (kind == GOMP_MAP_DELETE
2984 1.1.1.10 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2985 1.1.1.10 mrg bool do_copy, do_remove;
2986 1.1.1.10 mrg gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2987 1.1.1.10 mrg &do_remove);
2988 1.1.1.2 mrg
2989 1.1.1.10 mrg if ((kind == GOMP_MAP_FROM && do_copy)
2990 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM)
2991 1.1.1.10 mrg {
2992 1.1.1.10 mrg if (k->aux && k->aux->attach_count)
2993 1.1.1.10 mrg {
2994 1.1.1.10 mrg /* We have to be careful not to overwrite still attached
2995 1.1.1.10 mrg pointers during the copyback to host. */
2996 1.1.1.10 mrg uintptr_t addr = k->host_start;
2997 1.1.1.10 mrg while (addr < k->host_end)
2998 1.1.1.10 mrg {
2999 1.1.1.10 mrg size_t i = (addr - k->host_start) / sizeof (void *);
3000 1.1.1.10 mrg if (k->aux->attach_count[i] == 0)
3001 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) addr,
3002 1.1.1.10 mrg (void *) (k->tgt->tgt_start
3003 1.1.1.10 mrg + k->tgt_offset
3004 1.1.1.10 mrg + addr - k->host_start),
3005 1.1.1.10 mrg sizeof (void *));
3006 1.1.1.10 mrg addr += sizeof (void *);
3007 1.1.1.10 mrg }
3008 1.1.1.10 mrg }
3009 1.1.1.10 mrg else
3010 1.1.1.10 mrg gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3011 1.1.1.10 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
3012 1.1.1.10 mrg + cur_node.host_start
3013 1.1.1.10 mrg - k->host_start),
3014 1.1.1.10 mrg cur_node.host_end - cur_node.host_start);
3015 1.1.1.10 mrg }
3016 1.1.1.10 mrg
3017 1.1.1.10 mrg /* Structure elements lists are removed altogether at once, which
3018 1.1.1.10 mrg may cause immediate deallocation of the target_mem_desc, causing
3019 1.1.1.10 mrg errors if we still have following element siblings to copy back.
3020 1.1.1.10 mrg While we're at it, it also seems more disciplined to simply
3021 1.1.1.10 mrg queue all removals together for processing below.
3022 1.1.1.10 mrg
3023 1.1.1.10 mrg Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3024 1.1.1.10 mrg not have this problem, since they maintain an additional
3025 1.1.1.10 mrg tgt->refcount = 1 reference to the target_mem_desc to start with.
3026 1.1.1.10 mrg */
3027 1.1.1.10 mrg if (do_remove)
3028 1.1.1.10 mrg remove_vars[nrmvars++] = k;
3029 1.1.1.10 mrg break;
3030 1.1.1.2 mrg
3031 1.1.1.10 mrg case GOMP_MAP_DETACH:
3032 1.1.1.2 mrg break;
3033 1.1.1.2 mrg default:
3034 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3035 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3036 1.1.1.2 mrg kind);
3037 1.1.1.2 mrg }
3038 1.1.1.2 mrg }
3039 1.1.1.2 mrg
3040 1.1.1.10 mrg for (int i = 0; i < nrmvars; i++)
3041 1.1.1.10 mrg gomp_remove_var (devicep, remove_vars[i]);
3042 1.1.1.10 mrg
3043 1.1 mrg gomp_mutex_unlock (&devicep->lock);
3044 1.1.1.2 mrg }
3045 1.1 mrg
3046 1.1.1.2 mrg void
3047 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3048 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
3049 1.1.1.2 mrg unsigned int flags, void **depend)
3050 1.1.1.2 mrg {
3051 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
3052 1.1.1.2 mrg
3053 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
3054 1.1.1.2 mrg block the parent task until the dependencies are resolved
3055 1.1.1.2 mrg and then just continue with the rest of the function as if it
3056 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
3057 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
3058 1.1.1.2 mrg are not present. */
3059 1.1.1.2 mrg if (depend != NULL)
3060 1.1.1.2 mrg {
3061 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
3062 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
3063 1.1.1.2 mrg {
3064 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3065 1.1.1.2 mrg && thr->ts.team
3066 1.1.1.2 mrg && !thr->task->final_task)
3067 1.1.1.2 mrg {
3068 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3069 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
3070 1.1.1.2 mrg flags, depend, NULL,
3071 1.1.1.2 mrg GOMP_TARGET_TASK_DATA))
3072 1.1.1.2 mrg return;
3073 1.1.1.2 mrg }
3074 1.1.1.2 mrg else
3075 1.1.1.2 mrg {
3076 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
3077 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
3078 1.1.1.2 mrg tasks. */
3079 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
3080 1.1.1.7 mrg {
3081 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
3082 1.1.1.7 mrg return;
3083 1.1.1.7 mrg if (thr->task->taskgroup)
3084 1.1.1.7 mrg {
3085 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
3086 1.1.1.7 mrg return;
3087 1.1.1.7 mrg if (thr->task->taskgroup->workshare
3088 1.1.1.7 mrg && thr->task->taskgroup->prev
3089 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
3090 1.1.1.7 mrg return;
3091 1.1.1.7 mrg }
3092 1.1.1.7 mrg }
3093 1.1.1.2 mrg
3094 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
3095 1.1.1.2 mrg }
3096 1.1.1.2 mrg }
3097 1.1.1.2 mrg }
3098 1.1.1.2 mrg
3099 1.1.1.2 mrg if (devicep == NULL
3100 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3101 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3102 1.1.1.2 mrg return;
3103 1.1.1.2 mrg
3104 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
3105 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
3106 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3107 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
3108 1.1.1.7 mrg {
3109 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
3110 1.1.1.7 mrg return;
3111 1.1.1.7 mrg if (thr->task->taskgroup)
3112 1.1.1.7 mrg {
3113 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
3114 1.1.1.7 mrg return;
3115 1.1.1.7 mrg if (thr->task->taskgroup->workshare
3116 1.1.1.7 mrg && thr->task->taskgroup->prev
3117 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
3118 1.1.1.7 mrg return;
3119 1.1.1.7 mrg }
3120 1.1.1.7 mrg }
3121 1.1.1.2 mrg
3122 1.1.1.10 mrg htab_t refcount_set = htab_create (mapnum);
3123 1.1.1.10 mrg
3124 1.1.1.8 mrg /* The variables are mapped separately such that they can be released
3125 1.1.1.8 mrg independently. */
3126 1.1.1.8 mrg size_t i, j;
3127 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3128 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
3129 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3130 1.1.1.2 mrg {
3131 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3132 1.1.1.10 mrg &kinds[i], true, &refcount_set,
3133 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
3134 1.1.1.2 mrg i += sizes[i];
3135 1.1.1.2 mrg }
3136 1.1.1.8 mrg else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3137 1.1.1.8 mrg {
3138 1.1.1.8 mrg for (j = i + 1; j < mapnum; j++)
3139 1.1.1.10 mrg if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3140 1.1.1.10 mrg && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3141 1.1.1.8 mrg break;
3142 1.1.1.8 mrg gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3143 1.1.1.10 mrg &kinds[i], true, &refcount_set,
3144 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
3145 1.1.1.8 mrg i += j - i - 1;
3146 1.1.1.8 mrg }
3147 1.1.1.10 mrg else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3148 1.1.1.10 mrg {
3149 1.1.1.10 mrg /* An attach operation must be processed together with the mapped
3150 1.1.1.10 mrg base-pointer list item. */
3151 1.1.1.10 mrg gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3152 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3153 1.1.1.10 mrg i += 1;
3154 1.1.1.10 mrg }
3155 1.1.1.2 mrg else
3156 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3157 1.1.1.10 mrg true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3158 1.1.1.2 mrg else
3159 1.1.1.10 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3160 1.1.1.10 mrg htab_free (refcount_set);
3161 1.1.1.2 mrg }
3162 1.1.1.2 mrg
3163 1.1.1.2 mrg bool
3164 1.1.1.2 mrg gomp_target_task_fn (void *data)
3165 1.1.1.2 mrg {
3166 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3167 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep;
3168 1.1.1.2 mrg
3169 1.1.1.2 mrg if (ttask->fn != NULL)
3170 1.1.1.2 mrg {
3171 1.1.1.2 mrg void *fn_addr;
3172 1.1.1.2 mrg if (devicep == NULL
3173 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3174 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3175 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3176 1.1.1.2 mrg {
3177 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK;
3178 1.1.1.10 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3179 1.1.1.10 mrg ttask->args);
3180 1.1.1.2 mrg return false;
3181 1.1.1.2 mrg }
3182 1.1.1.2 mrg
3183 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3184 1.1.1.2 mrg {
3185 1.1.1.2 mrg if (ttask->tgt)
3186 1.1.1.10 mrg gomp_unmap_vars (ttask->tgt, true, NULL);
3187 1.1.1.2 mrg return false;
3188 1.1.1.2 mrg }
3189 1.1.1.2 mrg
3190 1.1.1.2 mrg void *actual_arguments;
3191 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3192 1.1.1.2 mrg {
3193 1.1.1.2 mrg ttask->tgt = NULL;
3194 1.1.1.2 mrg actual_arguments = ttask->hostaddrs;
3195 1.1.1.2 mrg }
3196 1.1.1.2 mrg else
3197 1.1.1.2 mrg {
3198 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3199 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true,
3200 1.1.1.10 mrg NULL, GOMP_MAP_VARS_TARGET);
3201 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start;
3202 1.1.1.2 mrg }
3203 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3204 1.1.1.2 mrg
3205 1.1.1.8 mrg assert (devicep->async_run_func);
3206 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3207 1.1.1.2 mrg ttask->args, (void *) ttask);
3208 1.1.1.2 mrg return true;
3209 1.1.1.2 mrg }
3210 1.1.1.2 mrg else if (devicep == NULL
3211 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3212 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3213 1.1.1.2 mrg return false;
3214 1.1.1.2 mrg
3215 1.1.1.2 mrg size_t i;
3216 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3217 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3218 1.1.1.2 mrg ttask->kinds, true);
3219 1.1.1.2 mrg else
3220 1.1.1.10 mrg {
3221 1.1.1.10 mrg htab_t refcount_set = htab_create (ttask->mapnum);
3222 1.1.1.10 mrg if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3223 1.1.1.10 mrg for (i = 0; i < ttask->mapnum; i++)
3224 1.1.1.10 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3225 1.1.1.10 mrg {
3226 1.1.1.10 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3227 1.1.1.10 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3228 1.1.1.10 mrg &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3229 1.1.1.10 mrg i += ttask->sizes[i];
3230 1.1.1.10 mrg }
3231 1.1.1.10 mrg else
3232 1.1.1.10 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3233 1.1.1.10 mrg &ttask->kinds[i], true, &refcount_set,
3234 1.1.1.10 mrg GOMP_MAP_VARS_ENTER_DATA);
3235 1.1.1.10 mrg else
3236 1.1.1.10 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3237 1.1.1.10 mrg ttask->kinds, &refcount_set);
3238 1.1.1.10 mrg htab_free (refcount_set);
3239 1.1.1.10 mrg }
3240 1.1.1.2 mrg return false;
3241 1.1 mrg }
3242 1.1 mrg
3243 1.1 mrg void
3244 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3245 1.1 mrg {
3246 1.1 mrg if (thread_limit)
3247 1.1 mrg {
3248 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
3249 1.1 mrg icv->thread_limit_var
3250 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3251 1.1 mrg }
3252 1.1 mrg (void) num_teams;
3253 1.1 mrg }
3254 1.1 mrg
3255 1.1.1.10 mrg bool
3256 1.1.1.10 mrg GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3257 1.1.1.10 mrg unsigned int thread_limit, bool first)
3258 1.1.1.10 mrg {
3259 1.1.1.10 mrg struct gomp_thread *thr = gomp_thread ();
3260 1.1.1.10 mrg if (first)
3261 1.1.1.10 mrg {
3262 1.1.1.10 mrg if (thread_limit)
3263 1.1.1.10 mrg {
3264 1.1.1.10 mrg struct gomp_task_icv *icv = gomp_icv (true);
3265 1.1.1.10 mrg icv->thread_limit_var
3266 1.1.1.10 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3267 1.1.1.10 mrg }
3268 1.1.1.10 mrg (void) num_teams_high;
3269 1.1.1.10 mrg if (num_teams_low == 0)
3270 1.1.1.10 mrg num_teams_low = 1;
3271 1.1.1.10 mrg thr->num_teams = num_teams_low - 1;
3272 1.1.1.10 mrg thr->team_num = 0;
3273 1.1.1.10 mrg }
3274 1.1.1.10 mrg else if (thr->team_num == thr->num_teams)
3275 1.1.1.10 mrg return false;
3276 1.1.1.10 mrg else
3277 1.1.1.10 mrg ++thr->team_num;
3278 1.1.1.10 mrg return true;
3279 1.1.1.10 mrg }
3280 1.1.1.10 mrg
3281 1.1.1.2 mrg void *
3282 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num)
3283 1.1.1.2 mrg {
3284 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3285 1.1.1.2 mrg return malloc (size);
3286 1.1.1.2 mrg
3287 1.1.1.2 mrg if (device_num < 0)
3288 1.1.1.2 mrg return NULL;
3289 1.1.1.2 mrg
3290 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
3291 1.1.1.2 mrg if (devicep == NULL)
3292 1.1.1.2 mrg return NULL;
3293 1.1.1.2 mrg
3294 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3295 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3296 1.1.1.2 mrg return malloc (size);
3297 1.1.1.2 mrg
3298 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3299 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size);
3300 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3301 1.1.1.2 mrg return ret;
3302 1.1.1.2 mrg }
3303 1.1.1.2 mrg
3304 1.1.1.2 mrg void
3305 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num)
3306 1.1.1.2 mrg {
3307 1.1.1.2 mrg if (device_ptr == NULL)
3308 1.1.1.2 mrg return;
3309 1.1.1.2 mrg
3310 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3311 1.1.1.2 mrg {
3312 1.1.1.2 mrg free (device_ptr);
3313 1.1.1.2 mrg return;
3314 1.1.1.2 mrg }
3315 1.1.1.2 mrg
3316 1.1.1.2 mrg if (device_num < 0)
3317 1.1.1.2 mrg return;
3318 1.1.1.2 mrg
3319 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
3320 1.1.1.2 mrg if (devicep == NULL)
3321 1.1.1.2 mrg return;
3322 1.1.1.2 mrg
3323 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3324 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3325 1.1.1.2 mrg {
3326 1.1.1.2 mrg free (device_ptr);
3327 1.1.1.2 mrg return;
3328 1.1.1.2 mrg }
3329 1.1.1.2 mrg
3330 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3331 1.1.1.3 mrg gomp_free_device_memory (devicep, device_ptr);
3332 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3333 1.1.1.2 mrg }
3334 1.1.1.2 mrg
3335 1.1.1.2 mrg int
3336 1.1.1.7 mrg omp_target_is_present (const void *ptr, int device_num)
3337 1.1.1.2 mrg {
3338 1.1.1.2 mrg if (ptr == NULL)
3339 1.1.1.2 mrg return 1;
3340 1.1.1.2 mrg
3341 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3342 1.1.1.2 mrg return 1;
3343 1.1.1.2 mrg
3344 1.1.1.2 mrg if (device_num < 0)
3345 1.1.1.2 mrg return 0;
3346 1.1.1.2 mrg
3347 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
3348 1.1.1.2 mrg if (devicep == NULL)
3349 1.1.1.2 mrg return 0;
3350 1.1.1.2 mrg
3351 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3352 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3353 1.1.1.2 mrg return 1;
3354 1.1.1.2 mrg
3355 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3356 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
3357 1.1.1.2 mrg struct splay_tree_key_s cur_node;
3358 1.1.1.2 mrg
3359 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
3360 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
3361 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3362 1.1.1.2 mrg int ret = n != NULL;
3363 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3364 1.1.1.2 mrg return ret;
3365 1.1.1.2 mrg }
3366 1.1.1.2 mrg
3367 1.1.1.2 mrg int
3368 1.1.1.7 mrg omp_target_memcpy (void *dst, const void *src, size_t length,
3369 1.1.1.7 mrg size_t dst_offset, size_t src_offset, int dst_device_num,
3370 1.1.1.7 mrg int src_device_num)
3371 1.1.1.2 mrg {
3372 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3373 1.1.1.3 mrg bool ret;
3374 1.1.1.2 mrg
3375 1.1.1.10 mrg if (dst_device_num != gomp_get_num_devices ())
3376 1.1.1.2 mrg {
3377 1.1.1.2 mrg if (dst_device_num < 0)
3378 1.1.1.2 mrg return EINVAL;
3379 1.1.1.2 mrg
3380 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
3381 1.1.1.2 mrg if (dst_devicep == NULL)
3382 1.1.1.2 mrg return EINVAL;
3383 1.1.1.2 mrg
3384 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3385 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3386 1.1.1.2 mrg dst_devicep = NULL;
3387 1.1.1.2 mrg }
3388 1.1.1.10 mrg if (src_device_num != num_devices_openmp)
3389 1.1.1.2 mrg {
3390 1.1.1.2 mrg if (src_device_num < 0)
3391 1.1.1.2 mrg return EINVAL;
3392 1.1.1.2 mrg
3393 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
3394 1.1.1.2 mrg if (src_devicep == NULL)
3395 1.1.1.2 mrg return EINVAL;
3396 1.1.1.2 mrg
3397 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3398 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3399 1.1.1.2 mrg src_devicep = NULL;
3400 1.1.1.2 mrg }
3401 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL)
3402 1.1.1.2 mrg {
3403 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3404 1.1.1.2 mrg return 0;
3405 1.1.1.2 mrg }
3406 1.1.1.2 mrg if (src_devicep == NULL)
3407 1.1.1.2 mrg {
3408 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
3409 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3410 1.1.1.3 mrg (char *) dst + dst_offset,
3411 1.1.1.3 mrg (char *) src + src_offset, length);
3412 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
3413 1.1.1.3 mrg return (ret ? 0 : EINVAL);
3414 1.1.1.2 mrg }
3415 1.1.1.2 mrg if (dst_devicep == NULL)
3416 1.1.1.2 mrg {
3417 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
3418 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
3419 1.1.1.3 mrg (char *) dst + dst_offset,
3420 1.1.1.3 mrg (char *) src + src_offset, length);
3421 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
3422 1.1.1.3 mrg return (ret ? 0 : EINVAL);
3423 1.1.1.2 mrg }
3424 1.1.1.2 mrg if (src_devicep == dst_devicep)
3425 1.1.1.2 mrg {
3426 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
3427 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
3428 1.1.1.3 mrg (char *) dst + dst_offset,
3429 1.1.1.3 mrg (char *) src + src_offset, length);
3430 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
3431 1.1.1.3 mrg return (ret ? 0 : EINVAL);
3432 1.1.1.2 mrg }
3433 1.1.1.2 mrg return EINVAL;
3434 1.1.1.2 mrg }
3435 1.1.1.2 mrg
3436 1.1.1.2 mrg static int
3437 1.1.1.7 mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3438 1.1.1.2 mrg int num_dims, const size_t *volume,
3439 1.1.1.2 mrg const size_t *dst_offsets,
3440 1.1.1.2 mrg const size_t *src_offsets,
3441 1.1.1.2 mrg const size_t *dst_dimensions,
3442 1.1.1.2 mrg const size_t *src_dimensions,
3443 1.1.1.2 mrg struct gomp_device_descr *dst_devicep,
3444 1.1.1.2 mrg struct gomp_device_descr *src_devicep)
3445 1.1.1.2 mrg {
3446 1.1.1.2 mrg size_t dst_slice = element_size;
3447 1.1.1.2 mrg size_t src_slice = element_size;
3448 1.1.1.2 mrg size_t j, dst_off, src_off, length;
3449 1.1.1.2 mrg int i, ret;
3450 1.1.1.2 mrg
3451 1.1.1.2 mrg if (num_dims == 1)
3452 1.1.1.2 mrg {
3453 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length)
3454 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3455 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3456 1.1.1.2 mrg return EINVAL;
3457 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL)
3458 1.1.1.3 mrg {
3459 1.1.1.7 mrg memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3460 1.1.1.7 mrg length);
3461 1.1.1.3 mrg ret = 1;
3462 1.1.1.3 mrg }
3463 1.1.1.2 mrg else if (src_devicep == NULL)
3464 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3465 1.1.1.3 mrg (char *) dst + dst_off,
3466 1.1.1.7 mrg (const char *) src + src_off,
3467 1.1.1.7 mrg length);
3468 1.1.1.2 mrg else if (dst_devicep == NULL)
3469 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
3470 1.1.1.3 mrg (char *) dst + dst_off,
3471 1.1.1.7 mrg (const char *) src + src_off,
3472 1.1.1.7 mrg length);
3473 1.1.1.2 mrg else if (src_devicep == dst_devicep)
3474 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
3475 1.1.1.3 mrg (char *) dst + dst_off,
3476 1.1.1.7 mrg (const char *) src + src_off,
3477 1.1.1.7 mrg length);
3478 1.1.1.2 mrg else
3479 1.1.1.3 mrg ret = 0;
3480 1.1.1.3 mrg return ret ? 0 : EINVAL;
3481 1.1.1.2 mrg }
3482 1.1.1.2 mrg
3483 1.1.1.2 mrg /* FIXME: it would be nice to have some plugin function to handle
3484 1.1.1.2 mrg num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3485 1.1.1.2 mrg be handled in the generic recursion below, and for host-host it
3486 1.1.1.2 mrg should be used even for any num_dims >= 2. */
3487 1.1.1.2 mrg
3488 1.1.1.2 mrg for (i = 1; i < num_dims; i++)
3489 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3490 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3491 1.1.1.2 mrg return EINVAL;
3492 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3493 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3494 1.1.1.2 mrg return EINVAL;
3495 1.1.1.2 mrg for (j = 0; j < volume[0]; j++)
3496 1.1.1.2 mrg {
3497 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3498 1.1.1.7 mrg (const char *) src + src_off,
3499 1.1.1.2 mrg element_size, num_dims - 1,
3500 1.1.1.2 mrg volume + 1, dst_offsets + 1,
3501 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1,
3502 1.1.1.2 mrg src_dimensions + 1, dst_devicep,
3503 1.1.1.2 mrg src_devicep);
3504 1.1.1.2 mrg if (ret)
3505 1.1.1.2 mrg return ret;
3506 1.1.1.2 mrg dst_off += dst_slice;
3507 1.1.1.2 mrg src_off += src_slice;
3508 1.1.1.2 mrg }
3509 1.1.1.2 mrg return 0;
3510 1.1.1.2 mrg }
3511 1.1.1.2 mrg
3512 1.1.1.2 mrg int
3513 1.1.1.7 mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3514 1.1.1.2 mrg int num_dims, const size_t *volume,
3515 1.1.1.2 mrg const size_t *dst_offsets,
3516 1.1.1.2 mrg const size_t *src_offsets,
3517 1.1.1.2 mrg const size_t *dst_dimensions,
3518 1.1.1.2 mrg const size_t *src_dimensions,
3519 1.1.1.2 mrg int dst_device_num, int src_device_num)
3520 1.1.1.2 mrg {
3521 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3522 1.1.1.2 mrg
3523 1.1.1.2 mrg if (!dst && !src)
3524 1.1.1.2 mrg return INT_MAX;
3525 1.1.1.2 mrg
3526 1.1.1.10 mrg if (dst_device_num != gomp_get_num_devices ())
3527 1.1.1.2 mrg {
3528 1.1.1.2 mrg if (dst_device_num < 0)
3529 1.1.1.2 mrg return EINVAL;
3530 1.1.1.2 mrg
3531 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
3532 1.1.1.2 mrg if (dst_devicep == NULL)
3533 1.1.1.2 mrg return EINVAL;
3534 1.1.1.2 mrg
3535 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3536 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3537 1.1.1.2 mrg dst_devicep = NULL;
3538 1.1.1.2 mrg }
3539 1.1.1.10 mrg if (src_device_num != num_devices_openmp)
3540 1.1.1.2 mrg {
3541 1.1.1.2 mrg if (src_device_num < 0)
3542 1.1.1.2 mrg return EINVAL;
3543 1.1.1.2 mrg
3544 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
3545 1.1.1.2 mrg if (src_devicep == NULL)
3546 1.1.1.2 mrg return EINVAL;
3547 1.1.1.2 mrg
3548 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3549 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3550 1.1.1.2 mrg src_devicep = NULL;
3551 1.1.1.2 mrg }
3552 1.1.1.2 mrg
3553 1.1.1.2 mrg if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3554 1.1.1.2 mrg return EINVAL;
3555 1.1.1.2 mrg
3556 1.1.1.2 mrg if (src_devicep)
3557 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
3558 1.1.1.2 mrg else if (dst_devicep)
3559 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
3560 1.1.1.2 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3561 1.1.1.2 mrg volume, dst_offsets, src_offsets,
3562 1.1.1.2 mrg dst_dimensions, src_dimensions,
3563 1.1.1.2 mrg dst_devicep, src_devicep);
3564 1.1.1.2 mrg if (src_devicep)
3565 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
3566 1.1.1.2 mrg else if (dst_devicep)
3567 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
3568 1.1.1.2 mrg return ret;
3569 1.1.1.2 mrg }
3570 1.1.1.2 mrg
3571 1.1.1.2 mrg int
3572 1.1.1.7 mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3573 1.1.1.7 mrg size_t size, size_t device_offset, int device_num)
3574 1.1.1.2 mrg {
3575 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3576 1.1.1.2 mrg return EINVAL;
3577 1.1.1.2 mrg
3578 1.1.1.2 mrg if (device_num < 0)
3579 1.1.1.2 mrg return EINVAL;
3580 1.1.1.2 mrg
3581 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
3582 1.1.1.2 mrg if (devicep == NULL)
3583 1.1.1.2 mrg return EINVAL;
3584 1.1.1.2 mrg
3585 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3586 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3587 1.1.1.2 mrg return EINVAL;
3588 1.1.1.2 mrg
3589 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3590 1.1.1.2 mrg
3591 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
3592 1.1.1.2 mrg struct splay_tree_key_s cur_node;
3593 1.1.1.2 mrg int ret = EINVAL;
3594 1.1.1.2 mrg
3595 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr;
3596 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size;
3597 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3598 1.1.1.2 mrg if (n)
3599 1.1.1.2 mrg {
3600 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset
3601 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset
3602 1.1.1.2 mrg && n->host_start <= cur_node.host_start
3603 1.1.1.2 mrg && n->host_end >= cur_node.host_end)
3604 1.1.1.2 mrg ret = 0;
3605 1.1.1.2 mrg }
3606 1.1.1.2 mrg else
3607 1.1.1.2 mrg {
3608 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3609 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array));
3610 1.1.1.2 mrg tgt->refcount = 1;
3611 1.1.1.2 mrg tgt->tgt_start = 0;
3612 1.1.1.2 mrg tgt->tgt_end = 0;
3613 1.1.1.2 mrg tgt->to_free = NULL;
3614 1.1.1.2 mrg tgt->prev = NULL;
3615 1.1.1.2 mrg tgt->list_count = 0;
3616 1.1.1.2 mrg tgt->device_descr = devicep;
3617 1.1.1.2 mrg splay_tree_node array = tgt->array;
3618 1.1.1.2 mrg splay_tree_key k = &array->key;
3619 1.1.1.2 mrg k->host_start = cur_node.host_start;
3620 1.1.1.2 mrg k->host_end = cur_node.host_end;
3621 1.1.1.2 mrg k->tgt = tgt;
3622 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3623 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
3624 1.1.1.7 mrg k->dynamic_refcount = 0;
3625 1.1.1.8 mrg k->aux = NULL;
3626 1.1.1.2 mrg array->left = NULL;
3627 1.1.1.2 mrg array->right = NULL;
3628 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array);
3629 1.1.1.2 mrg ret = 0;
3630 1.1.1.2 mrg }
3631 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3632 1.1.1.2 mrg return ret;
3633 1.1.1.2 mrg }
3634 1.1.1.2 mrg
3635 1.1.1.2 mrg int
3636 1.1.1.7 mrg omp_target_disassociate_ptr (const void *ptr, int device_num)
3637 1.1.1.2 mrg {
3638 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3639 1.1.1.2 mrg return EINVAL;
3640 1.1.1.2 mrg
3641 1.1.1.2 mrg if (device_num < 0)
3642 1.1.1.2 mrg return EINVAL;
3643 1.1.1.2 mrg
3644 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
3645 1.1.1.2 mrg if (devicep == NULL)
3646 1.1.1.2 mrg return EINVAL;
3647 1.1.1.2 mrg
3648 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3649 1.1.1.2 mrg return EINVAL;
3650 1.1.1.2 mrg
3651 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3652 1.1.1.2 mrg
3653 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
3654 1.1.1.2 mrg struct splay_tree_key_s cur_node;
3655 1.1.1.2 mrg int ret = EINVAL;
3656 1.1.1.2 mrg
3657 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
3658 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
3659 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3660 1.1.1.2 mrg if (n
3661 1.1.1.2 mrg && n->host_start == cur_node.host_start
3662 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY
3663 1.1.1.2 mrg && n->tgt->tgt_start == 0
3664 1.1.1.2 mrg && n->tgt->to_free == NULL
3665 1.1.1.2 mrg && n->tgt->refcount == 1
3666 1.1.1.2 mrg && n->tgt->list_count == 0)
3667 1.1.1.2 mrg {
3668 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
3669 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
3670 1.1.1.2 mrg ret = 0;
3671 1.1.1.2 mrg }
3672 1.1.1.2 mrg
3673 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3674 1.1.1.2 mrg return ret;
3675 1.1.1.2 mrg }
3676 1.1.1.2 mrg
3677 1.1.1.7 mrg int
3678 1.1.1.7 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num)
3679 1.1.1.7 mrg {
3680 1.1.1.7 mrg (void) kind;
3681 1.1.1.10 mrg if (device_num == gomp_get_num_devices ())
3682 1.1.1.7 mrg return gomp_pause_host ();
3683 1.1.1.10 mrg if (device_num < 0 || device_num >= num_devices_openmp)
3684 1.1.1.7 mrg return -1;
3685 1.1.1.7 mrg /* Do nothing for target devices for now. */
3686 1.1.1.7 mrg return 0;
3687 1.1.1.7 mrg }
3688 1.1.1.7 mrg
3689 1.1.1.7 mrg int
3690 1.1.1.7 mrg omp_pause_resource_all (omp_pause_resource_t kind)
3691 1.1.1.7 mrg {
3692 1.1.1.7 mrg (void) kind;
3693 1.1.1.7 mrg if (gomp_pause_host ())
3694 1.1.1.7 mrg return -1;
3695 1.1.1.7 mrg /* Do nothing for target devices for now. */
3696 1.1.1.7 mrg return 0;
3697 1.1.1.7 mrg }
3698 1.1.1.7 mrg
3699 1.1.1.7 mrg ialias (omp_pause_resource)
3700 1.1.1.7 mrg ialias (omp_pause_resource_all)
3701 1.1.1.7 mrg
3702 1.1 mrg #ifdef PLUGIN_SUPPORT
3703 1.1 mrg
3704 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3705 1.1 mrg in PLUGIN_NAME.
3706 1.1 mrg The handles of the found functions are stored in the corresponding fields
3707 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3708 1.1 mrg
3709 1.1 mrg static bool
3710 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
3711 1.1 mrg const char *plugin_name)
3712 1.1 mrg {
3713 1.1 mrg const char *err = NULL, *last_missing = NULL;
3714 1.1 mrg
3715 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3716 1.1 mrg if (!plugin_handle)
3717 1.1.1.10 mrg #if OFFLOAD_DEFAULTED
3718 1.1.1.10 mrg return 0;
3719 1.1.1.10 mrg #else
3720 1.1.1.2 mrg goto dl_fail;
3721 1.1.1.10 mrg #endif
3722 1.1 mrg
3723 1.1 mrg /* Check if all required functions are available in the plugin and store
3724 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL,
3725 1.1.1.2 mrg so we don't need to check dlerror all the time. */
3726 1.1 mrg #define DLSYM(f) \
3727 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3728 1.1.1.2 mrg goto dl_fail
3729 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if
3730 1.1.1.2 mrg failed, true otherwise. */
3731 1.1.1.2 mrg #define DLSYM_OPT(f, n) \
3732 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3733 1.1.1.2 mrg || (last_missing = #n, 0))
3734 1.1.1.2 mrg
3735 1.1.1.2 mrg DLSYM (version);
3736 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION)
3737 1.1.1.2 mrg {
3738 1.1.1.2 mrg err = "plugin version mismatch";
3739 1.1.1.2 mrg goto fail;
3740 1.1.1.2 mrg }
3741 1.1 mrg
3742 1.1 mrg DLSYM (get_name);
3743 1.1 mrg DLSYM (get_caps);
3744 1.1 mrg DLSYM (get_type);
3745 1.1 mrg DLSYM (get_num_devices);
3746 1.1 mrg DLSYM (init_device);
3747 1.1 mrg DLSYM (fini_device);
3748 1.1 mrg DLSYM (load_image);
3749 1.1 mrg DLSYM (unload_image);
3750 1.1 mrg DLSYM (alloc);
3751 1.1 mrg DLSYM (free);
3752 1.1 mrg DLSYM (dev2host);
3753 1.1 mrg DLSYM (host2dev);
3754 1.1 mrg device->capabilities = device->get_caps_func ();
3755 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3756 1.1.1.2 mrg {
3757 1.1.1.2 mrg DLSYM (run);
3758 1.1.1.8 mrg DLSYM_OPT (async_run, async_run);
3759 1.1.1.2 mrg DLSYM_OPT (can_run, can_run);
3760 1.1.1.2 mrg DLSYM (dev2dev);
3761 1.1.1.2 mrg }
3762 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3763 1.1 mrg {
3764 1.1.1.3 mrg if (!DLSYM_OPT (openacc.exec, openacc_exec)
3765 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data,
3766 1.1.1.2 mrg openacc_create_thread_data)
3767 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data,
3768 1.1.1.8 mrg openacc_destroy_thread_data)
3769 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3770 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3771 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3772 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3773 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3774 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.queue_callback,
3775 1.1.1.8 mrg openacc_async_queue_callback)
3776 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3777 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3778 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3779 1.1.1.8 mrg || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3780 1.1 mrg {
3781 1.1.1.2 mrg /* Require all the OpenACC handlers if we have
3782 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
3783 1.1 mrg err = "plugin missing OpenACC handler function";
3784 1.1.1.2 mrg goto fail;
3785 1.1 mrg }
3786 1.1.1.2 mrg
3787 1.1.1.2 mrg unsigned cuda = 0;
3788 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3789 1.1.1.3 mrg openacc_cuda_get_current_device);
3790 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3791 1.1.1.3 mrg openacc_cuda_get_current_context);
3792 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3793 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3794 1.1.1.2 mrg if (cuda && cuda != 4)
3795 1.1 mrg {
3796 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */
3797 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
3798 1.1.1.2 mrg goto fail;
3799 1.1 mrg }
3800 1.1 mrg }
3801 1.1 mrg #undef DLSYM
3802 1.1 mrg #undef DLSYM_OPT
3803 1.1 mrg
3804 1.1.1.2 mrg return 1;
3805 1.1.1.2 mrg
3806 1.1.1.2 mrg dl_fail:
3807 1.1.1.2 mrg err = dlerror ();
3808 1.1.1.2 mrg fail:
3809 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err);
3810 1.1.1.2 mrg if (last_missing)
3811 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing);
3812 1.1.1.2 mrg if (plugin_handle)
3813 1.1.1.2 mrg dlclose (plugin_handle);
3814 1.1.1.2 mrg
3815 1.1.1.2 mrg return 0;
3816 1.1.1.2 mrg }
3817 1.1.1.2 mrg
3818 1.1.1.2 mrg /* This function finalizes all initialized devices. */
3819 1.1.1.2 mrg
3820 1.1.1.2 mrg static void
3821 1.1.1.2 mrg gomp_target_fini (void)
3822 1.1.1.2 mrg {
3823 1.1.1.2 mrg int i;
3824 1.1.1.2 mrg for (i = 0; i < num_devices; i++)
3825 1.1 mrg {
3826 1.1.1.3 mrg bool ret = true;
3827 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i];
3828 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3829 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
3830 1.1.1.8 mrg ret = gomp_fini_device (devicep);
3831 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3832 1.1.1.3 mrg if (!ret)
3833 1.1.1.3 mrg gomp_fatal ("device finalization failed");
3834 1.1 mrg }
3835 1.1 mrg }
3836 1.1 mrg
3837 1.1.1.7 mrg /* This function initializes the runtime for offloading.
3838 1.1.1.7 mrg It parses the list of offload plugins, and tries to load these.
3839 1.1.1.7 mrg On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3840 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
3841 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3842 1.1 mrg by the others. */
3843 1.1 mrg
3844 1.1 mrg static void
3845 1.1 mrg gomp_target_init (void)
3846 1.1 mrg {
3847 1.1 mrg const char *prefix ="libgomp-plugin-";
3848 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
3849 1.1 mrg const char *cur, *next;
3850 1.1 mrg char *plugin_name;
3851 1.1.1.10 mrg int i, new_num_devs;
3852 1.1.1.10 mrg int num_devs = 0, num_devs_openmp;
3853 1.1.1.10 mrg struct gomp_device_descr *devs = NULL;
3854 1.1 mrg
3855 1.1.1.10 mrg if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3856 1.1.1.10 mrg return;
3857 1.1 mrg
3858 1.1.1.7 mrg cur = OFFLOAD_PLUGINS;
3859 1.1 mrg if (*cur)
3860 1.1 mrg do
3861 1.1 mrg {
3862 1.1 mrg struct gomp_device_descr current_device;
3863 1.1.1.6 mrg size_t prefix_len, suffix_len, cur_len;
3864 1.1 mrg
3865 1.1 mrg next = strchr (cur, ',');
3866 1.1 mrg
3867 1.1.1.6 mrg prefix_len = strlen (prefix);
3868 1.1.1.6 mrg cur_len = next ? next - cur : strlen (cur);
3869 1.1.1.6 mrg suffix_len = strlen (suffix);
3870 1.1.1.6 mrg
3871 1.1.1.6 mrg plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3872 1.1 mrg if (!plugin_name)
3873 1.1 mrg {
3874 1.1.1.10 mrg num_devs = 0;
3875 1.1 mrg break;
3876 1.1 mrg }
3877 1.1 mrg
3878 1.1.1.6 mrg memcpy (plugin_name, prefix, prefix_len);
3879 1.1.1.6 mrg memcpy (plugin_name + prefix_len, cur, cur_len);
3880 1.1.1.6 mrg memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3881 1.1 mrg
3882 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
3883 1.1 mrg {
3884 1.1.1.10 mrg new_num_devs = current_device.get_num_devices_func ();
3885 1.1.1.10 mrg if (new_num_devs >= 1)
3886 1.1 mrg {
3887 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
3888 1.1 mrg
3889 1.1.1.10 mrg devs = realloc (devs, (num_devs + new_num_devs)
3890 1.1.1.10 mrg * sizeof (struct gomp_device_descr));
3891 1.1.1.10 mrg if (!devs)
3892 1.1 mrg {
3893 1.1.1.10 mrg num_devs = 0;
3894 1.1 mrg free (plugin_name);
3895 1.1 mrg break;
3896 1.1 mrg }
3897 1.1 mrg
3898 1.1 mrg current_device.name = current_device.get_name_func ();
3899 1.1 mrg /* current_device.capabilities has already been set. */
3900 1.1 mrg current_device.type = current_device.get_type_func ();
3901 1.1 mrg current_device.mem_map.root = NULL;
3902 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED;
3903 1.1.1.10 mrg for (i = 0; i < new_num_devs; i++)
3904 1.1 mrg {
3905 1.1 mrg current_device.target_id = i;
3906 1.1.1.10 mrg devs[num_devs] = current_device;
3907 1.1.1.10 mrg gomp_mutex_init (&devs[num_devs].lock);
3908 1.1.1.10 mrg num_devs++;
3909 1.1 mrg }
3910 1.1 mrg }
3911 1.1 mrg }
3912 1.1 mrg
3913 1.1 mrg free (plugin_name);
3914 1.1 mrg cur = next + 1;
3915 1.1 mrg }
3916 1.1 mrg while (next);
3917 1.1 mrg
3918 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3919 1.1 mrg NUM_DEVICES_OPENMP. */
3920 1.1.1.10 mrg struct gomp_device_descr *devs_s
3921 1.1.1.10 mrg = malloc (num_devs * sizeof (struct gomp_device_descr));
3922 1.1.1.10 mrg if (!devs_s)
3923 1.1.1.10 mrg {
3924 1.1.1.10 mrg num_devs = 0;
3925 1.1.1.10 mrg free (devs);
3926 1.1.1.10 mrg devs = NULL;
3927 1.1.1.10 mrg }
3928 1.1.1.10 mrg num_devs_openmp = 0;
3929 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
3930 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3931 1.1.1.10 mrg devs_s[num_devs_openmp++] = devs[i];
3932 1.1.1.10 mrg int num_devs_after_openmp = num_devs_openmp;
3933 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
3934 1.1.1.10 mrg if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3935 1.1.1.10 mrg devs_s[num_devs_after_openmp++] = devs[i];
3936 1.1.1.10 mrg free (devs);
3937 1.1.1.10 mrg devs = devs_s;
3938 1.1 mrg
3939 1.1.1.10 mrg for (i = 0; i < num_devs; i++)
3940 1.1 mrg {
3941 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
3942 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
3943 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
3944 1.1.1.10 mrg if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3945 1.1.1.10 mrg goacc_register (&devs[i]);
3946 1.1 mrg }
3947 1.1.1.2 mrg
3948 1.1.1.10 mrg num_devices = num_devs;
3949 1.1.1.10 mrg num_devices_openmp = num_devs_openmp;
3950 1.1.1.10 mrg devices = devs;
3951 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0)
3952 1.1.1.2 mrg gomp_fatal ("atexit failed");
3953 1.1 mrg }
3954 1.1 mrg
3955 1.1 mrg #else /* PLUGIN_SUPPORT */
3956 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
3957 1.1 mrg GOMP_target* routines are just stubs for this case. */
3958 1.1 mrg static void
3959 1.1 mrg gomp_target_init (void)
3960 1.1 mrg {
3961 1.1 mrg }
3962 1.1 mrg #endif /* PLUGIN_SUPPORT */
3963