target.c revision 1.1.1.8.6.1 1 1.1.1.8 mrg /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "libgomp.h"
29 1.1 mrg #include "oacc-plugin.h"
30 1.1 mrg #include "oacc-int.h"
31 1.1 mrg #include "gomp-constants.h"
32 1.1 mrg #include <limits.h>
33 1.1 mrg #include <stdbool.h>
34 1.1 mrg #include <stdlib.h>
35 1.1 mrg #ifdef HAVE_INTTYPES_H
36 1.1 mrg # include <inttypes.h> /* For PRIu64. */
37 1.1 mrg #endif
38 1.1 mrg #include <string.h>
39 1.1 mrg #include <assert.h>
40 1.1.1.2 mrg #include <errno.h>
41 1.1 mrg
42 1.1 mrg #ifdef PLUGIN_SUPPORT
43 1.1 mrg #include <dlfcn.h>
44 1.1 mrg #include "plugin-suffix.h"
45 1.1 mrg #endif
46 1.1 mrg
47 1.1.1.8 mrg #define FIELD_TGT_EMPTY (~(size_t) 0)
48 1.1.1.8 mrg
49 1.1 mrg static void gomp_target_init (void);
50 1.1 mrg
51 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
52 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 1.1 mrg
54 1.1 mrg /* Mutex for offload image registration. */
55 1.1 mrg static gomp_mutex_t register_lock;
56 1.1 mrg
57 1.1 mrg /* This structure describes an offload image.
58 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
59 1.1 mrg pointer to target data. */
60 1.1 mrg struct offload_image_descr {
61 1.1.1.2 mrg unsigned version;
62 1.1 mrg enum offload_target_type type;
63 1.1.1.2 mrg const void *host_table;
64 1.1.1.2 mrg const void *target_data;
65 1.1 mrg };
66 1.1 mrg
67 1.1 mrg /* Array of descriptors of offload images. */
68 1.1 mrg static struct offload_image_descr *offload_images;
69 1.1 mrg
70 1.1 mrg /* Total number of offload images. */
71 1.1 mrg static int num_offload_images;
72 1.1 mrg
73 1.1 mrg /* Array of descriptors for all available devices. */
74 1.1 mrg static struct gomp_device_descr *devices;
75 1.1 mrg
76 1.1 mrg /* Total number of available devices. */
77 1.1 mrg static int num_devices;
78 1.1 mrg
79 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 1.1 mrg static int num_devices_openmp;
81 1.1 mrg
82 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83 1.1 mrg
84 1.1 mrg static void *
85 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
86 1.1 mrg {
87 1.1 mrg void *ret = realloc (old, size);
88 1.1 mrg if (ret == NULL)
89 1.1 mrg {
90 1.1 mrg gomp_mutex_unlock (®ister_lock);
91 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 1.1 mrg }
93 1.1 mrg return ret;
94 1.1 mrg }
95 1.1 mrg
96 1.1 mrg attribute_hidden void
97 1.1 mrg gomp_init_targets_once (void)
98 1.1 mrg {
99 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 1.1 mrg }
101 1.1 mrg
102 1.1 mrg attribute_hidden int
103 1.1 mrg gomp_get_num_devices (void)
104 1.1 mrg {
105 1.1 mrg gomp_init_targets_once ();
106 1.1 mrg return num_devices_openmp;
107 1.1 mrg }
108 1.1 mrg
109 1.1 mrg static struct gomp_device_descr *
110 1.1 mrg resolve_device (int device_id)
111 1.1 mrg {
112 1.1 mrg if (device_id == GOMP_DEVICE_ICV)
113 1.1 mrg {
114 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
115 1.1 mrg device_id = icv->default_device_var;
116 1.1 mrg }
117 1.1 mrg
118 1.1 mrg if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 1.1 mrg return NULL;
120 1.1 mrg
121 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock);
122 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 1.1.1.2 mrg gomp_init_device (&devices[device_id]);
124 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 1.1.1.2 mrg {
126 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
127 1.1.1.2 mrg return NULL;
128 1.1.1.2 mrg }
129 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
130 1.1.1.2 mrg
131 1.1 mrg return &devices[device_id];
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg
135 1.1.1.2 mrg static inline splay_tree_key
136 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 1.1.1.2 mrg {
138 1.1.1.2 mrg if (key->host_start != key->host_end)
139 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
140 1.1.1.2 mrg
141 1.1.1.2 mrg key->host_end++;
142 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
143 1.1.1.2 mrg key->host_end--;
144 1.1.1.2 mrg if (n)
145 1.1.1.2 mrg return n;
146 1.1.1.2 mrg key->host_start--;
147 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key);
148 1.1.1.2 mrg key->host_start++;
149 1.1.1.2 mrg if (n)
150 1.1.1.2 mrg return n;
151 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
152 1.1.1.2 mrg }
153 1.1.1.2 mrg
154 1.1.1.2 mrg static inline splay_tree_key
155 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 1.1.1.2 mrg {
157 1.1.1.2 mrg if (key->host_start != key->host_end)
158 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
159 1.1.1.2 mrg
160 1.1.1.2 mrg key->host_end++;
161 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
162 1.1.1.2 mrg key->host_end--;
163 1.1.1.2 mrg return n;
164 1.1.1.2 mrg }
165 1.1.1.2 mrg
166 1.1.1.3 mrg static inline void
167 1.1.1.3 mrg gomp_device_copy (struct gomp_device_descr *devicep,
168 1.1.1.3 mrg bool (*copy_func) (int, void *, const void *, size_t),
169 1.1.1.3 mrg const char *dst, void *dstaddr,
170 1.1.1.3 mrg const char *src, const void *srcaddr,
171 1.1.1.3 mrg size_t size)
172 1.1.1.3 mrg {
173 1.1.1.3 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 1.1.1.3 mrg {
175 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
176 1.1.1.3 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 1.1.1.3 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178 1.1.1.3 mrg }
179 1.1.1.3 mrg }
180 1.1.1.3 mrg
181 1.1.1.8 mrg static inline void
182 1.1.1.8 mrg goacc_device_copy_async (struct gomp_device_descr *devicep,
183 1.1.1.8 mrg bool (*copy_func) (int, void *, const void *, size_t,
184 1.1.1.8 mrg struct goacc_asyncqueue *),
185 1.1.1.8 mrg const char *dst, void *dstaddr,
186 1.1.1.8 mrg const char *src, const void *srcaddr,
187 1.1.1.8 mrg size_t size, struct goacc_asyncqueue *aq)
188 1.1.1.8 mrg {
189 1.1.1.8 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 1.1.1.8 mrg {
191 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
192 1.1.1.8 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 1.1.1.8 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 1.1.1.8 mrg }
195 1.1.1.8 mrg }
196 1.1.1.8 mrg
197 1.1.1.6 mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 1.1.1.6 mrg host to device memory transfers. */
199 1.1.1.6 mrg
200 1.1.1.7 mrg struct gomp_coalesce_chunk
201 1.1.1.7 mrg {
202 1.1.1.7 mrg /* The starting and ending point of a coalesced chunk of memory. */
203 1.1.1.7 mrg size_t start, end;
204 1.1.1.7 mrg };
205 1.1.1.7 mrg
206 1.1.1.6 mrg struct gomp_coalesce_buf
207 1.1.1.6 mrg {
208 1.1.1.6 mrg /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 1.1.1.6 mrg it will be copied to the device. */
210 1.1.1.6 mrg void *buf;
211 1.1.1.6 mrg struct target_mem_desc *tgt;
212 1.1.1.7 mrg /* Array with offsets, chunks[i].start is the starting offset and
213 1.1.1.7 mrg chunks[i].end ending offset relative to tgt->tgt_start device address
214 1.1.1.6 mrg of chunks which are to be copied to buf and later copied to device. */
215 1.1.1.7 mrg struct gomp_coalesce_chunk *chunks;
216 1.1.1.6 mrg /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 1.1.1.6 mrg be performed. */
218 1.1.1.6 mrg long chunk_cnt;
219 1.1.1.6 mrg /* During construction of chunks array, how many memory regions are within
220 1.1.1.6 mrg the last chunk. If there is just one memory region for a chunk, we copy
221 1.1.1.6 mrg it directly to device rather than going through buf. */
222 1.1.1.6 mrg long use_cnt;
223 1.1.1.6 mrg };
224 1.1.1.6 mrg
225 1.1.1.6 mrg /* Maximum size of memory region considered for coalescing. Larger copies
226 1.1.1.6 mrg are performed directly. */
227 1.1.1.6 mrg #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228 1.1.1.6 mrg
229 1.1.1.6 mrg /* Maximum size of a gap in between regions to consider them being copied
230 1.1.1.6 mrg within the same chunk. All the device offsets considered are within
231 1.1.1.6 mrg newly allocated device memory, so it isn't fatal if we copy some padding
232 1.1.1.6 mrg in between from host to device. The gaps come either from alignment
233 1.1.1.6 mrg padding or from memory regions which are not supposed to be copied from
234 1.1.1.6 mrg host to device (e.g. map(alloc:), map(from:) etc.). */
235 1.1.1.6 mrg #define MAX_COALESCE_BUF_GAP (4 * 1024)
236 1.1.1.6 mrg
237 1.1.1.6 mrg /* Add region with device tgt_start relative offset and length to CBUF. */
238 1.1.1.6 mrg
239 1.1.1.6 mrg static inline void
240 1.1.1.6 mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 1.1.1.6 mrg {
242 1.1.1.6 mrg if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 1.1.1.6 mrg return;
244 1.1.1.6 mrg if (cbuf->chunk_cnt)
245 1.1.1.6 mrg {
246 1.1.1.6 mrg if (cbuf->chunk_cnt < 0)
247 1.1.1.6 mrg return;
248 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 1.1.1.6 mrg {
250 1.1.1.6 mrg cbuf->chunk_cnt = -1;
251 1.1.1.6 mrg return;
252 1.1.1.6 mrg }
253 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 1.1.1.6 mrg {
255 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 1.1.1.6 mrg cbuf->use_cnt++;
257 1.1.1.6 mrg return;
258 1.1.1.6 mrg }
259 1.1.1.6 mrg /* If the last chunk is only used by one mapping, discard it,
260 1.1.1.6 mrg as it will be one host to device copy anyway and
261 1.1.1.6 mrg memcpying it around will only waste cycles. */
262 1.1.1.6 mrg if (cbuf->use_cnt == 1)
263 1.1.1.6 mrg cbuf->chunk_cnt--;
264 1.1.1.6 mrg }
265 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].start = start;
266 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 1.1.1.6 mrg cbuf->chunk_cnt++;
268 1.1.1.6 mrg cbuf->use_cnt = 1;
269 1.1.1.6 mrg }
270 1.1.1.6 mrg
271 1.1.1.6 mrg /* Return true for mapping kinds which need to copy data from the
272 1.1.1.6 mrg host to device for regions that weren't previously mapped. */
273 1.1.1.6 mrg
274 1.1.1.6 mrg static inline bool
275 1.1.1.6 mrg gomp_to_device_kind_p (int kind)
276 1.1.1.6 mrg {
277 1.1.1.6 mrg switch (kind)
278 1.1.1.6 mrg {
279 1.1.1.6 mrg case GOMP_MAP_ALLOC:
280 1.1.1.6 mrg case GOMP_MAP_FROM:
281 1.1.1.6 mrg case GOMP_MAP_FORCE_ALLOC:
282 1.1.1.8 mrg case GOMP_MAP_FORCE_FROM:
283 1.1.1.6 mrg case GOMP_MAP_ALWAYS_FROM:
284 1.1.1.6 mrg return false;
285 1.1.1.6 mrg default:
286 1.1.1.6 mrg return true;
287 1.1.1.6 mrg }
288 1.1.1.6 mrg }
289 1.1.1.6 mrg
290 1.1.1.8 mrg attribute_hidden void
291 1.1.1.3 mrg gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 1.1.1.8 mrg struct goacc_asyncqueue *aq,
293 1.1.1.6 mrg void *d, const void *h, size_t sz,
294 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
295 1.1.1.3 mrg {
296 1.1.1.6 mrg if (cbuf)
297 1.1.1.6 mrg {
298 1.1.1.6 mrg uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 1.1.1.7 mrg if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 1.1.1.6 mrg {
301 1.1.1.6 mrg long first = 0;
302 1.1.1.6 mrg long last = cbuf->chunk_cnt - 1;
303 1.1.1.6 mrg while (first <= last)
304 1.1.1.6 mrg {
305 1.1.1.6 mrg long middle = (first + last) >> 1;
306 1.1.1.7 mrg if (cbuf->chunks[middle].end <= doff)
307 1.1.1.6 mrg first = middle + 1;
308 1.1.1.7 mrg else if (cbuf->chunks[middle].start <= doff)
309 1.1.1.6 mrg {
310 1.1.1.7 mrg if (doff + sz > cbuf->chunks[middle].end)
311 1.1.1.6 mrg gomp_fatal ("internal libgomp cbuf error");
312 1.1.1.7 mrg memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 1.1.1.6 mrg h, sz);
314 1.1.1.6 mrg return;
315 1.1.1.6 mrg }
316 1.1.1.6 mrg else
317 1.1.1.6 mrg last = middle - 1;
318 1.1.1.6 mrg }
319 1.1.1.6 mrg }
320 1.1.1.6 mrg }
321 1.1.1.8 mrg if (__builtin_expect (aq != NULL, 0))
322 1.1.1.8 mrg goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 1.1.1.8 mrg "dev", d, "host", h, sz, aq);
324 1.1.1.8 mrg else
325 1.1.1.8 mrg gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 1.1.1.3 mrg }
327 1.1.1.3 mrg
328 1.1.1.8 mrg attribute_hidden void
329 1.1.1.3 mrg gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 1.1.1.8 mrg struct goacc_asyncqueue *aq,
331 1.1.1.3 mrg void *h, const void *d, size_t sz)
332 1.1.1.3 mrg {
333 1.1.1.8 mrg if (__builtin_expect (aq != NULL, 0))
334 1.1.1.8 mrg goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 1.1.1.8 mrg "host", h, "dev", d, sz, aq);
336 1.1.1.8 mrg else
337 1.1.1.8 mrg gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 1.1.1.3 mrg }
339 1.1.1.3 mrg
340 1.1.1.3 mrg static void
341 1.1.1.3 mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 1.1.1.3 mrg {
343 1.1.1.3 mrg if (!devicep->free_func (devicep->target_id, devptr))
344 1.1.1.3 mrg {
345 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
346 1.1.1.3 mrg gomp_fatal ("error in freeing device memory block at %p", devptr);
347 1.1.1.3 mrg }
348 1.1.1.3 mrg }
349 1.1.1.3 mrg
350 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn.
352 1.1 mrg Helper function of gomp_map_vars. */
353 1.1 mrg
354 1.1 mrg static inline void
355 1.1.1.8 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var,
358 1.1.1.6 mrg unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 1.1 mrg {
360 1.1.1.8 mrg assert (kind != GOMP_MAP_ATTACH);
361 1.1.1.8 mrg
362 1.1.1.2 mrg tgt_var->key = oldn;
363 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365 1.1.1.8 mrg tgt_var->is_attach = false;
366 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start;
367 1.1.1.2 mrg tgt_var->length = newn->host_end - newn->host_start;
368 1.1.1.2 mrg
369 1.1 mrg if ((kind & GOMP_MAP_FLAG_FORCE)
370 1.1 mrg || oldn->host_start > newn->host_start
371 1.1 mrg || oldn->host_end < newn->host_end)
372 1.1 mrg {
373 1.1 mrg gomp_mutex_unlock (&devicep->lock);
374 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
375 1.1 mrg "[%p..%p) is already mapped",
376 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
377 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
378 1.1 mrg }
379 1.1.1.2 mrg
380 1.1.1.2 mrg if (GOMP_MAP_ALWAYS_TO_P (kind))
381 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
382 1.1.1.3 mrg (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383 1.1.1.3 mrg + newn->host_start - oldn->host_start),
384 1.1.1.3 mrg (void *) newn->host_start,
385 1.1.1.6 mrg newn->host_end - newn->host_start, cbuf);
386 1.1.1.3 mrg
387 1.1.1.2 mrg if (oldn->refcount != REFCOUNT_INFINITY)
388 1.1.1.2 mrg oldn->refcount++;
389 1.1 mrg }
390 1.1 mrg
391 1.1 mrg static int
392 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx)
393 1.1.1.2 mrg {
394 1.1.1.2 mrg return short_mapkind ? ((unsigned short *) kinds)[idx]
395 1.1.1.2 mrg : ((unsigned char *) kinds)[idx];
396 1.1.1.2 mrg }
397 1.1.1.2 mrg
398 1.1.1.2 mrg static void
399 1.1.1.8 mrg gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400 1.1.1.8 mrg uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
402 1.1.1.2 mrg {
403 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
404 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
405 1.1.1.2 mrg struct splay_tree_key_s cur_node;
406 1.1.1.2 mrg
407 1.1.1.2 mrg cur_node.host_start = host_ptr;
408 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL)
409 1.1.1.2 mrg {
410 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL;
411 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
412 1.1.1.3 mrg (void *) (tgt->tgt_start + target_offset),
413 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
414 1.1.1.6 mrg sizeof (void *), cbuf);
415 1.1.1.2 mrg return;
416 1.1.1.2 mrg }
417 1.1.1.2 mrg /* Add bias to the pointer value. */
418 1.1.1.2 mrg cur_node.host_start += bias;
419 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
420 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421 1.1.1.2 mrg if (n == NULL)
422 1.1.1.2 mrg {
423 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
424 1.1.1.2 mrg gomp_fatal ("Pointer target of array section wasn't mapped");
425 1.1.1.2 mrg }
426 1.1.1.2 mrg cur_node.host_start -= n->host_start;
427 1.1.1.2 mrg cur_node.tgt_offset
428 1.1.1.2 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429 1.1.1.2 mrg /* At this point tgt_offset is target address of the
430 1.1.1.2 mrg array section. Now subtract bias to get what we want
431 1.1.1.2 mrg to initialize the pointer with. */
432 1.1.1.2 mrg cur_node.tgt_offset -= bias;
433 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
435 1.1.1.2 mrg }
436 1.1.1.2 mrg
437 1.1.1.2 mrg static void
438 1.1.1.8 mrg gomp_map_fields_existing (struct target_mem_desc *tgt,
439 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
440 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs,
441 1.1.1.6 mrg size_t *sizes, void *kinds,
442 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
443 1.1 mrg {
444 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
445 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
446 1.1.1.2 mrg struct splay_tree_key_s cur_node;
447 1.1.1.2 mrg int kind;
448 1.1.1.2 mrg const bool short_mapkind = true;
449 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
450 1.1.1.2 mrg
451 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
452 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
453 1.1.1.2 mrg splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i);
455 1.1.1.2 mrg if (n2
456 1.1.1.2 mrg && n2->tgt == n->tgt
457 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
458 1.1.1.2 mrg {
459 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460 1.1.1.6 mrg &tgt->list[i], kind & typemask, cbuf);
461 1.1.1.2 mrg return;
462 1.1.1.2 mrg }
463 1.1.1.2 mrg if (sizes[i] == 0)
464 1.1.1.2 mrg {
465 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
466 1.1.1.2 mrg {
467 1.1.1.2 mrg cur_node.host_start--;
468 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
469 1.1.1.2 mrg cur_node.host_start++;
470 1.1.1.2 mrg if (n2
471 1.1.1.2 mrg && n2->tgt == n->tgt
472 1.1.1.2 mrg && n2->host_start - n->host_start
473 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset)
474 1.1.1.2 mrg {
475 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476 1.1.1.8 mrg &tgt->list[i], kind & typemask, cbuf);
477 1.1.1.2 mrg return;
478 1.1.1.2 mrg }
479 1.1.1.2 mrg }
480 1.1.1.2 mrg cur_node.host_end++;
481 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
482 1.1.1.2 mrg cur_node.host_end--;
483 1.1.1.2 mrg if (n2
484 1.1.1.2 mrg && n2->tgt == n->tgt
485 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
486 1.1.1.2 mrg {
487 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488 1.1.1.6 mrg kind & typemask, cbuf);
489 1.1.1.2 mrg return;
490 1.1.1.2 mrg }
491 1.1.1.2 mrg }
492 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
493 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped "
495 1.1.1.2 mrg "together with it", (void *) cur_node.host_start,
496 1.1.1.2 mrg (void *) cur_node.host_end);
497 1.1.1.2 mrg }
498 1.1.1.2 mrg
499 1.1.1.8 mrg attribute_hidden void
500 1.1.1.8 mrg gomp_attach_pointer (struct gomp_device_descr *devicep,
501 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree mem_map,
502 1.1.1.8 mrg splay_tree_key n, uintptr_t attach_to, size_t bias,
503 1.1.1.8 mrg struct gomp_coalesce_buf *cbufp)
504 1.1.1.8 mrg {
505 1.1.1.8 mrg struct splay_tree_key_s s;
506 1.1.1.8 mrg size_t size, idx;
507 1.1.1.8 mrg
508 1.1.1.8 mrg if (n == NULL)
509 1.1.1.8 mrg {
510 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
511 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for attach");
512 1.1.1.8 mrg }
513 1.1.1.8 mrg
514 1.1.1.8 mrg size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515 1.1.1.8 mrg /* We might have a pointer in a packed struct: however we cannot have more
516 1.1.1.8 mrg than one such pointer in each pointer-sized portion of the struct, so
517 1.1.1.8 mrg this is safe. */
518 1.1.1.8 mrg idx = (attach_to - n->host_start) / sizeof (void *);
519 1.1.1.8 mrg
520 1.1.1.8 mrg if (!n->aux)
521 1.1.1.8 mrg n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
522 1.1.1.8 mrg
523 1.1.1.8 mrg if (!n->aux->attach_count)
524 1.1.1.8 mrg n->aux->attach_count
525 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
526 1.1.1.8 mrg
527 1.1.1.8 mrg if (n->aux->attach_count[idx] < UINTPTR_MAX)
528 1.1.1.8 mrg n->aux->attach_count[idx]++;
529 1.1.1.8 mrg else
530 1.1.1.8 mrg {
531 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
532 1.1.1.8 mrg gomp_fatal ("attach count overflow");
533 1.1.1.8 mrg }
534 1.1.1.8 mrg
535 1.1.1.8 mrg if (n->aux->attach_count[idx] == 1)
536 1.1.1.8 mrg {
537 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538 1.1.1.8 mrg - n->host_start;
539 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) attach_to;
540 1.1.1.8 mrg splay_tree_key tn;
541 1.1.1.8 mrg uintptr_t data;
542 1.1.1.8 mrg
543 1.1.1.8 mrg if ((void *) target == NULL)
544 1.1.1.8 mrg {
545 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
546 1.1.1.8 mrg gomp_fatal ("attempt to attach null pointer");
547 1.1.1.8 mrg }
548 1.1.1.8 mrg
549 1.1.1.8 mrg s.host_start = target + bias;
550 1.1.1.8 mrg s.host_end = s.host_start + 1;
551 1.1.1.8 mrg tn = splay_tree_lookup (mem_map, &s);
552 1.1.1.8 mrg
553 1.1.1.8 mrg if (!tn)
554 1.1.1.8 mrg {
555 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
556 1.1.1.8 mrg gomp_fatal ("pointer target not mapped for attach");
557 1.1.1.8 mrg }
558 1.1.1.8 mrg
559 1.1.1.8 mrg data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
560 1.1.1.8 mrg
561 1.1.1.8 mrg gomp_debug (1,
562 1.1.1.8 mrg "%s: attaching host %p, target %p (struct base %p) to %p\n",
563 1.1.1.8 mrg __FUNCTION__, (void *) attach_to, (void *) devptr,
564 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
565 1.1.1.8 mrg
566 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567 1.1.1.8 mrg sizeof (void *), cbufp);
568 1.1.1.8 mrg }
569 1.1.1.8 mrg else
570 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571 1.1.1.8 mrg (void *) attach_to, (int) n->aux->attach_count[idx]);
572 1.1.1.8 mrg }
573 1.1.1.8 mrg
574 1.1.1.8 mrg attribute_hidden void
575 1.1.1.8 mrg gomp_detach_pointer (struct gomp_device_descr *devicep,
576 1.1.1.8 mrg struct goacc_asyncqueue *aq, splay_tree_key n,
577 1.1.1.8 mrg uintptr_t detach_from, bool finalize,
578 1.1.1.8 mrg struct gomp_coalesce_buf *cbufp)
579 1.1.1.8 mrg {
580 1.1.1.8 mrg size_t idx;
581 1.1.1.8 mrg
582 1.1.1.8 mrg if (n == NULL)
583 1.1.1.8 mrg {
584 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
585 1.1.1.8 mrg gomp_fatal ("enclosing struct not mapped for detach");
586 1.1.1.8 mrg }
587 1.1.1.8 mrg
588 1.1.1.8 mrg idx = (detach_from - n->host_start) / sizeof (void *);
589 1.1.1.8 mrg
590 1.1.1.8 mrg if (!n->aux || !n->aux->attach_count)
591 1.1.1.8 mrg {
592 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
593 1.1.1.8 mrg gomp_fatal ("no attachment counters for struct");
594 1.1.1.8 mrg }
595 1.1.1.8 mrg
596 1.1.1.8 mrg if (finalize)
597 1.1.1.8 mrg n->aux->attach_count[idx] = 1;
598 1.1.1.8 mrg
599 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
600 1.1.1.8 mrg {
601 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
602 1.1.1.8 mrg gomp_fatal ("attach count underflow");
603 1.1.1.8 mrg }
604 1.1.1.8 mrg else
605 1.1.1.8 mrg n->aux->attach_count[idx]--;
606 1.1.1.8 mrg
607 1.1.1.8 mrg if (n->aux->attach_count[idx] == 0)
608 1.1.1.8 mrg {
609 1.1.1.8 mrg uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610 1.1.1.8 mrg - n->host_start;
611 1.1.1.8 mrg uintptr_t target = (uintptr_t) *(void **) detach_from;
612 1.1.1.8 mrg
613 1.1.1.8 mrg gomp_debug (1,
614 1.1.1.8 mrg "%s: detaching host %p, target %p (struct base %p) to %p\n",
615 1.1.1.8 mrg __FUNCTION__, (void *) detach_from, (void *) devptr,
616 1.1.1.8 mrg (void *) (n->tgt->tgt_start + n->tgt_offset),
617 1.1.1.8 mrg (void *) target);
618 1.1.1.8 mrg
619 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620 1.1.1.8 mrg sizeof (void *), cbufp);
621 1.1.1.8 mrg }
622 1.1.1.8 mrg else
623 1.1.1.8 mrg gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624 1.1.1.8 mrg (void *) detach_from, (int) n->aux->attach_count[idx]);
625 1.1.1.8 mrg }
626 1.1.1.8 mrg
627 1.1.1.8 mrg attribute_hidden uintptr_t
628 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
629 1.1.1.2 mrg {
630 1.1.1.2 mrg if (tgt->list[i].key != NULL)
631 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start
632 1.1.1.2 mrg + tgt->list[i].key->tgt_offset
633 1.1.1.2 mrg + tgt->list[i].offset;
634 1.1.1.8 mrg
635 1.1.1.8 mrg switch (tgt->list[i].offset)
636 1.1.1.8 mrg {
637 1.1.1.8 mrg case OFFSET_INLINED:
638 1.1.1.8 mrg return (uintptr_t) hostaddrs[i];
639 1.1.1.8 mrg
640 1.1.1.8 mrg case OFFSET_POINTER:
641 1.1.1.8 mrg return 0;
642 1.1.1.8 mrg
643 1.1.1.8 mrg case OFFSET_STRUCT:
644 1.1.1.8 mrg return tgt->list[i + 1].key->tgt->tgt_start
645 1.1.1.8 mrg + tgt->list[i + 1].key->tgt_offset
646 1.1.1.8 mrg + tgt->list[i + 1].offset
647 1.1.1.8 mrg + (uintptr_t) hostaddrs[i]
648 1.1.1.8 mrg - (uintptr_t) hostaddrs[i + 1];
649 1.1.1.8 mrg
650 1.1.1.8 mrg default:
651 1.1.1.8 mrg return tgt->tgt_start + tgt->list[i].offset;
652 1.1.1.8 mrg }
653 1.1 mrg }
654 1.1 mrg
655 1.1.1.8 mrg static inline __attribute__((always_inline)) struct target_mem_desc *
656 1.1.1.8 mrg gomp_map_vars_internal (struct gomp_device_descr *devicep,
657 1.1.1.8 mrg struct goacc_asyncqueue *aq, size_t mapnum,
658 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
659 1.1.1.8 mrg void *kinds, bool short_mapkind,
660 1.1.1.8 mrg enum gomp_map_vars_kind pragma_kind)
661 1.1 mrg {
662 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663 1.1.1.2 mrg bool has_firstprivate = false;
664 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3;
665 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
666 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
667 1.1 mrg struct splay_tree_key_s cur_node;
668 1.1 mrg struct target_mem_desc *tgt
669 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670 1.1 mrg tgt->list_count = mapnum;
671 1.1.1.2 mrg tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672 1.1 mrg tgt->device_descr = devicep;
673 1.1.1.8 mrg tgt->prev = NULL;
674 1.1.1.6 mrg struct gomp_coalesce_buf cbuf, *cbufp = NULL;
675 1.1 mrg
676 1.1 mrg if (mapnum == 0)
677 1.1.1.2 mrg {
678 1.1.1.2 mrg tgt->tgt_start = 0;
679 1.1.1.2 mrg tgt->tgt_end = 0;
680 1.1.1.2 mrg return tgt;
681 1.1.1.2 mrg }
682 1.1 mrg
683 1.1 mrg tgt_align = sizeof (void *);
684 1.1 mrg tgt_size = 0;
685 1.1.1.6 mrg cbuf.chunks = NULL;
686 1.1.1.6 mrg cbuf.chunk_cnt = -1;
687 1.1.1.6 mrg cbuf.use_cnt = 0;
688 1.1.1.6 mrg cbuf.buf = NULL;
689 1.1.1.6 mrg if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
690 1.1.1.6 mrg {
691 1.1.1.7 mrg size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692 1.1.1.7 mrg cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693 1.1.1.6 mrg cbuf.chunk_cnt = 0;
694 1.1.1.6 mrg }
695 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
696 1.1 mrg {
697 1.1 mrg size_t align = 4 * sizeof (void *);
698 1.1 mrg tgt_align = align;
699 1.1 mrg tgt_size = mapnum * sizeof (void *);
700 1.1.1.6 mrg cbuf.chunk_cnt = 1;
701 1.1.1.6 mrg cbuf.use_cnt = 1 + (mapnum > 1);
702 1.1.1.7 mrg cbuf.chunks[0].start = 0;
703 1.1.1.7 mrg cbuf.chunks[0].end = tgt_size;
704 1.1 mrg }
705 1.1 mrg
706 1.1 mrg gomp_mutex_lock (&devicep->lock);
707 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
708 1.1.1.2 mrg {
709 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
710 1.1.1.2 mrg free (tgt);
711 1.1.1.2 mrg return NULL;
712 1.1.1.2 mrg }
713 1.1 mrg
714 1.1 mrg for (i = 0; i < mapnum; i++)
715 1.1 mrg {
716 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
717 1.1.1.2 mrg if (hostaddrs[i] == NULL
718 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
719 1.1.1.2 mrg {
720 1.1.1.2 mrg tgt->list[i].key = NULL;
721 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
722 1.1.1.2 mrg continue;
723 1.1.1.2 mrg }
724 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725 1.1.1.8 mrg || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
726 1.1.1.2 mrg {
727 1.1.1.8 mrg tgt->list[i].key = NULL;
728 1.1.1.8 mrg if (!not_found_cnt)
729 1.1.1.2 mrg {
730 1.1.1.8 mrg /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731 1.1.1.8 mrg on a separate construct prior to using use_device_{addr,ptr}.
732 1.1.1.8 mrg In OpenMP 5.0, map directives need to be ordered by the
733 1.1.1.8 mrg middle-end before the use_device_* clauses. If
734 1.1.1.8 mrg !not_found_cnt, all mappings requested (if any) are already
735 1.1.1.8 mrg mapped, so use_device_{addr,ptr} can be resolved right away.
736 1.1.1.8 mrg Otherwise, if not_found_cnt, gomp_map_lookup might fail
737 1.1.1.8 mrg now but would succeed after performing the mappings in the
738 1.1.1.8 mrg following loop. We can't defer this always to the second
739 1.1.1.8 mrg loop, because it is not even invoked when !not_found_cnt
740 1.1.1.8 mrg after the first loop. */
741 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
742 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
743 1.1.1.8 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744 1.1.1.8 mrg if (n != NULL)
745 1.1.1.8 mrg {
746 1.1.1.8 mrg cur_node.host_start -= n->host_start;
747 1.1.1.8 mrg hostaddrs[i]
748 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
749 1.1.1.8 mrg + cur_node.host_start);
750 1.1.1.8 mrg }
751 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
752 1.1.1.8 mrg {
753 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
754 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
755 1.1.1.8 mrg }
756 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757 1.1.1.8 mrg /* If not present, continue using the host address. */
758 1.1.1.8 mrg ;
759 1.1.1.8 mrg else
760 1.1.1.8 mrg __builtin_unreachable ();
761 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
762 1.1.1.2 mrg }
763 1.1.1.8 mrg else
764 1.1.1.8 mrg tgt->list[i].offset = 0;
765 1.1.1.2 mrg continue;
766 1.1.1.2 mrg }
767 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT)
768 1.1.1.2 mrg {
769 1.1.1.2 mrg size_t first = i + 1;
770 1.1.1.2 mrg size_t last = i + sizes[i];
771 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
772 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
773 1.1.1.2 mrg + sizes[last];
774 1.1.1.2 mrg tgt->list[i].key = NULL;
775 1.1.1.8 mrg tgt->list[i].offset = OFFSET_STRUCT;
776 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777 1.1.1.2 mrg if (n == NULL)
778 1.1.1.2 mrg {
779 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
780 1.1.1.2 mrg if (tgt_align < align)
781 1.1.1.2 mrg tgt_align = align;
782 1.1.1.6 mrg tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
784 1.1.1.6 mrg tgt_size += cur_node.host_end - cur_node.host_start;
785 1.1.1.2 mrg not_found_cnt += last - i;
786 1.1.1.2 mrg for (i = first; i <= last; i++)
787 1.1.1.6 mrg {
788 1.1.1.6 mrg tgt->list[i].key = NULL;
789 1.1.1.6 mrg if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790 1.1.1.6 mrg & typemask))
791 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf,
792 1.1.1.6 mrg tgt_size - cur_node.host_end
793 1.1.1.6 mrg + (uintptr_t) hostaddrs[i],
794 1.1.1.6 mrg sizes[i]);
795 1.1.1.6 mrg }
796 1.1.1.2 mrg i--;
797 1.1.1.2 mrg continue;
798 1.1.1.2 mrg }
799 1.1.1.2 mrg for (i = first; i <= last; i++)
800 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801 1.1.1.6 mrg sizes, kinds, NULL);
802 1.1.1.2 mrg i--;
803 1.1.1.2 mrg continue;
804 1.1.1.2 mrg }
805 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
806 1.1 mrg {
807 1.1.1.2 mrg tgt->list[i].key = NULL;
808 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
809 1.1.1.8 mrg has_firstprivate = true;
810 1.1.1.8 mrg continue;
811 1.1.1.8 mrg }
812 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_ATTACH)
813 1.1.1.8 mrg {
814 1.1.1.8 mrg tgt->list[i].key = NULL;
815 1.1.1.2 mrg has_firstprivate = true;
816 1.1 mrg continue;
817 1.1 mrg }
818 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
819 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
820 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
821 1.1 mrg else
822 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
823 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824 1.1.1.2 mrg {
825 1.1.1.2 mrg tgt->list[i].key = NULL;
826 1.1.1.2 mrg
827 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
828 1.1.1.2 mrg if (tgt_align < align)
829 1.1.1.2 mrg tgt_align = align;
830 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
832 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
833 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start;
834 1.1.1.2 mrg has_firstprivate = true;
835 1.1.1.2 mrg continue;
836 1.1.1.2 mrg }
837 1.1.1.2 mrg splay_tree_key n;
838 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839 1.1 mrg {
840 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node);
841 1.1.1.2 mrg if (!n)
842 1.1.1.2 mrg {
843 1.1.1.2 mrg tgt->list[i].key = NULL;
844 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
845 1.1.1.2 mrg continue;
846 1.1.1.2 mrg }
847 1.1 mrg }
848 1.1 mrg else
849 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
850 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
851 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 1.1.1.6 mrg kind & typemask, NULL);
853 1.1.1.2 mrg else
854 1.1 mrg {
855 1.1.1.2 mrg tgt->list[i].key = NULL;
856 1.1 mrg
857 1.1.1.8 mrg if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858 1.1.1.8 mrg {
859 1.1.1.8 mrg /* Not present, hence, skip entry - including its MAP_POINTER,
860 1.1.1.8 mrg when existing. */
861 1.1.1.8 mrg tgt->list[i].offset = OFFSET_POINTER;
862 1.1.1.8 mrg if (i + 1 < mapnum
863 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 1.1.1.8 mrg == GOMP_MAP_POINTER))
865 1.1.1.8 mrg {
866 1.1.1.8 mrg ++i;
867 1.1.1.8 mrg tgt->list[i].key = NULL;
868 1.1.1.8 mrg tgt->list[i].offset = 0;
869 1.1.1.8 mrg }
870 1.1.1.8 mrg continue;
871 1.1.1.8 mrg }
872 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
873 1.1 mrg not_found_cnt++;
874 1.1 mrg if (tgt_align < align)
875 1.1 mrg tgt_align = align;
876 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 1.1.1.6 mrg if (gomp_to_device_kind_p (kind & typemask))
878 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
879 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
880 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
881 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
882 1.1 mrg {
883 1.1 mrg size_t j;
884 1.1 mrg for (j = i + 1; j < mapnum; j++)
885 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 1.1 mrg & typemask))
887 1.1 mrg break;
888 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 1.1 mrg > cur_node.host_end))
891 1.1 mrg break;
892 1.1 mrg else
893 1.1 mrg {
894 1.1.1.2 mrg tgt->list[j].key = NULL;
895 1.1 mrg i++;
896 1.1 mrg }
897 1.1 mrg }
898 1.1 mrg }
899 1.1 mrg }
900 1.1 mrg
901 1.1 mrg if (devaddrs)
902 1.1 mrg {
903 1.1 mrg if (mapnum != 1)
904 1.1 mrg {
905 1.1 mrg gomp_mutex_unlock (&devicep->lock);
906 1.1 mrg gomp_fatal ("unexpected aggregation");
907 1.1 mrg }
908 1.1 mrg tgt->to_free = devaddrs[0];
909 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
910 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
911 1.1 mrg }
912 1.1.1.2 mrg else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913 1.1 mrg {
914 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
915 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
916 1.1 mrg memory. */
917 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
918 1.1 mrg tgt_size + tgt_align - 1);
919 1.1.1.3 mrg if (!tgt->to_free)
920 1.1.1.3 mrg {
921 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
922 1.1.1.3 mrg gomp_fatal ("device memory allocation fail");
923 1.1.1.3 mrg }
924 1.1.1.3 mrg
925 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
926 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
928 1.1.1.6 mrg
929 1.1.1.6 mrg if (cbuf.use_cnt == 1)
930 1.1.1.6 mrg cbuf.chunk_cnt--;
931 1.1.1.6 mrg if (cbuf.chunk_cnt > 0)
932 1.1.1.6 mrg {
933 1.1.1.6 mrg cbuf.buf
934 1.1.1.7 mrg = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 1.1.1.6 mrg if (cbuf.buf)
936 1.1.1.6 mrg {
937 1.1.1.6 mrg cbuf.tgt = tgt;
938 1.1.1.6 mrg cbufp = &cbuf;
939 1.1.1.6 mrg }
940 1.1.1.6 mrg }
941 1.1 mrg }
942 1.1 mrg else
943 1.1 mrg {
944 1.1 mrg tgt->to_free = NULL;
945 1.1 mrg tgt->tgt_start = 0;
946 1.1 mrg tgt->tgt_end = 0;
947 1.1 mrg }
948 1.1 mrg
949 1.1 mrg tgt_size = 0;
950 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
951 1.1 mrg tgt_size = mapnum * sizeof (void *);
952 1.1 mrg
953 1.1 mrg tgt->array = NULL;
954 1.1.1.2 mrg if (not_found_cnt || has_firstprivate)
955 1.1 mrg {
956 1.1.1.2 mrg if (not_found_cnt)
957 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958 1.1 mrg splay_tree_node array = tgt->array;
959 1.1.1.2 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960 1.1.1.2 mrg uintptr_t field_tgt_base = 0;
961 1.1 mrg
962 1.1 mrg for (i = 0; i < mapnum; i++)
963 1.1.1.2 mrg if (tgt->list[i].key == NULL)
964 1.1 mrg {
965 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
966 1.1 mrg if (hostaddrs[i] == NULL)
967 1.1 mrg continue;
968 1.1.1.2 mrg switch (kind & typemask)
969 1.1.1.2 mrg {
970 1.1.1.2 mrg size_t align, len, first, last;
971 1.1.1.2 mrg splay_tree_key n;
972 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE:
973 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift);
974 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 1.1.1.2 mrg tgt->list[i].offset = tgt_size;
976 1.1.1.2 mrg len = sizes[i];
977 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
978 1.1.1.3 mrg (void *) (tgt->tgt_start + tgt_size),
979 1.1.1.6 mrg (void *) hostaddrs[i], len, cbufp);
980 1.1.1.2 mrg tgt_size += len;
981 1.1.1.2 mrg continue;
982 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
983 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 1.1.1.2 mrg continue;
985 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 1.1.1.8 mrg /* The OpenACC 'host_data' construct only allows 'use_device'
987 1.1.1.8 mrg "mapping" clauses, so in the first loop, 'not_found_cnt'
988 1.1.1.8 mrg must always have been zero, so all OpenACC 'use_device'
989 1.1.1.8 mrg clauses have already been handled. (We can only easily test
990 1.1.1.8 mrg 'use_device' with 'if_present' clause here.) */
991 1.1.1.8 mrg assert (tgt->list[i].offset == OFFSET_INLINED);
992 1.1.1.8 mrg /* Nevertheless, FALLTHRU to the normal handling, to keep the
993 1.1.1.8 mrg code conceptually simple, similar to the first loop. */
994 1.1.1.8 mrg case GOMP_MAP_USE_DEVICE_PTR:
995 1.1.1.8 mrg if (tgt->list[i].offset == 0)
996 1.1.1.8 mrg {
997 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
998 1.1.1.8 mrg cur_node.host_end = cur_node.host_start;
999 1.1.1.8 mrg n = gomp_map_lookup (mem_map, &cur_node);
1000 1.1.1.8 mrg if (n != NULL)
1001 1.1.1.8 mrg {
1002 1.1.1.8 mrg cur_node.host_start -= n->host_start;
1003 1.1.1.8 mrg hostaddrs[i]
1004 1.1.1.8 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 1.1.1.8 mrg + cur_node.host_start);
1006 1.1.1.8 mrg }
1007 1.1.1.8 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008 1.1.1.8 mrg {
1009 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1010 1.1.1.8 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011 1.1.1.8 mrg }
1012 1.1.1.8 mrg else if ((kind & typemask)
1013 1.1.1.8 mrg == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 1.1.1.8 mrg /* If not present, continue using the host address. */
1015 1.1.1.8 mrg ;
1016 1.1.1.8 mrg else
1017 1.1.1.8 mrg __builtin_unreachable ();
1018 1.1.1.8 mrg tgt->list[i].offset = OFFSET_INLINED;
1019 1.1.1.8 mrg }
1020 1.1.1.8 mrg continue;
1021 1.1.1.2 mrg case GOMP_MAP_STRUCT:
1022 1.1.1.2 mrg first = i + 1;
1023 1.1.1.2 mrg last = i + sizes[i];
1024 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 1.1.1.2 mrg + sizes[last];
1027 1.1.1.2 mrg if (tgt->list[first].key != NULL)
1028 1.1.1.2 mrg continue;
1029 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1030 1.1.1.2 mrg if (n == NULL)
1031 1.1.1.2 mrg {
1032 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
1033 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
1034 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1035 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first]
1037 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
1038 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first];
1039 1.1.1.2 mrg field_tgt_offset = tgt_size;
1040 1.1.1.2 mrg field_tgt_clear = last;
1041 1.1.1.2 mrg tgt_size += cur_node.host_end
1042 1.1.1.2 mrg - (uintptr_t) hostaddrs[first];
1043 1.1.1.2 mrg continue;
1044 1.1.1.2 mrg }
1045 1.1.1.2 mrg for (i = first; i <= last; i++)
1046 1.1.1.8 mrg gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 1.1.1.6 mrg sizes, kinds, cbufp);
1048 1.1.1.2 mrg i--;
1049 1.1.1.2 mrg continue;
1050 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER:
1051 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
1054 1.1.1.2 mrg if (n == NULL
1055 1.1.1.2 mrg || n->host_start > cur_node.host_start
1056 1.1.1.2 mrg || n->host_end < cur_node.host_end)
1057 1.1.1.2 mrg {
1058 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1059 1.1.1.2 mrg gomp_fatal ("always pointer not mapped");
1060 1.1.1.2 mrg }
1061 1.1.1.2 mrg if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 1.1.1.2 mrg != GOMP_MAP_ALWAYS_POINTER)
1063 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 1.1.1.2 mrg if (cur_node.tgt_offset)
1065 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i];
1066 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1067 1.1.1.3 mrg (void *) (n->tgt->tgt_start
1068 1.1.1.3 mrg + n->tgt_offset
1069 1.1.1.3 mrg + cur_node.host_start
1070 1.1.1.3 mrg - n->host_start),
1071 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
1072 1.1.1.6 mrg sizeof (void *), cbufp);
1073 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 1.1.1.2 mrg + cur_node.host_start - n->host_start;
1075 1.1.1.2 mrg continue;
1076 1.1.1.8 mrg case GOMP_MAP_IF_PRESENT:
1077 1.1.1.8 mrg /* Not present - otherwise handled above. Skip over its
1078 1.1.1.8 mrg MAP_POINTER as well. */
1079 1.1.1.8 mrg if (i + 1 < mapnum
1080 1.1.1.8 mrg && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 1.1.1.8 mrg == GOMP_MAP_POINTER))
1082 1.1.1.8 mrg ++i;
1083 1.1.1.8 mrg continue;
1084 1.1.1.8 mrg case GOMP_MAP_ATTACH:
1085 1.1.1.8 mrg {
1086 1.1.1.8 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 1.1.1.8 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 1.1.1.8 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 1.1.1.8 mrg if (n != NULL)
1090 1.1.1.8 mrg {
1091 1.1.1.8 mrg tgt->list[i].key = n;
1092 1.1.1.8 mrg tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 1.1.1.8 mrg tgt->list[i].length = n->host_end - n->host_start;
1094 1.1.1.8 mrg tgt->list[i].copy_from = false;
1095 1.1.1.8 mrg tgt->list[i].always_copy_from = false;
1096 1.1.1.8 mrg tgt->list[i].is_attach = true;
1097 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect
1098 1.1.1.8 mrg structured/dynamic reference counts ('n->refcount',
1099 1.1.1.8 mrg 'n->dynamic_refcount'). */
1100 1.1.1.8 mrg }
1101 1.1.1.8 mrg else
1102 1.1.1.8 mrg {
1103 1.1.1.8 mrg gomp_mutex_unlock (&devicep->lock);
1104 1.1.1.8 mrg gomp_fatal ("outer struct not mapped for attach");
1105 1.1.1.8 mrg }
1106 1.1.1.8 mrg gomp_attach_pointer (devicep, aq, mem_map, n,
1107 1.1.1.8 mrg (uintptr_t) hostaddrs[i], sizes[i],
1108 1.1.1.8 mrg cbufp);
1109 1.1.1.8 mrg continue;
1110 1.1.1.8 mrg }
1111 1.1.1.2 mrg default:
1112 1.1.1.2 mrg break;
1113 1.1.1.2 mrg }
1114 1.1 mrg splay_tree_key k = &array->key;
1115 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
1116 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
1117 1.1 mrg k->host_end = k->host_start + sizes[i];
1118 1.1 mrg else
1119 1.1 mrg k->host_end = k->host_start + sizeof (void *);
1120 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
1121 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
1122 1.1.1.8 mrg gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1123 1.1.1.6 mrg kind & typemask, cbufp);
1124 1.1 mrg else
1125 1.1 mrg {
1126 1.1.1.8 mrg k->aux = NULL;
1127 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK)
1128 1.1.1.2 mrg {
1129 1.1.1.2 mrg /* Replace target address of the pointer with target address
1130 1.1.1.2 mrg of mapped object in the splay tree. */
1131 1.1.1.2 mrg splay_tree_remove (mem_map, n);
1132 1.1.1.8 mrg k->aux
1133 1.1.1.8 mrg = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1134 1.1.1.8 mrg k->aux->link_key = n;
1135 1.1.1.2 mrg }
1136 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
1137 1.1.1.2 mrg tgt->list[i].key = k;
1138 1.1 mrg k->tgt = tgt;
1139 1.1.1.8 mrg if (field_tgt_clear != FIELD_TGT_EMPTY)
1140 1.1.1.2 mrg {
1141 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base
1142 1.1.1.2 mrg + field_tgt_offset;
1143 1.1.1.2 mrg if (i == field_tgt_clear)
1144 1.1.1.8 mrg field_tgt_clear = FIELD_TGT_EMPTY;
1145 1.1.1.2 mrg }
1146 1.1.1.2 mrg else
1147 1.1.1.2 mrg {
1148 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1149 1.1.1.2 mrg k->tgt_offset = tgt_size;
1150 1.1.1.2 mrg tgt_size += k->host_end - k->host_start;
1151 1.1.1.2 mrg }
1152 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1153 1.1.1.2 mrg tgt->list[i].always_copy_from
1154 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1155 1.1.1.8 mrg tgt->list[i].is_attach = false;
1156 1.1.1.2 mrg tgt->list[i].offset = 0;
1157 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start;
1158 1.1 mrg k->refcount = 1;
1159 1.1.1.7 mrg k->dynamic_refcount = 0;
1160 1.1 mrg tgt->refcount++;
1161 1.1 mrg array->left = NULL;
1162 1.1 mrg array->right = NULL;
1163 1.1 mrg splay_tree_insert (mem_map, array);
1164 1.1 mrg switch (kind & typemask)
1165 1.1 mrg {
1166 1.1 mrg case GOMP_MAP_ALLOC:
1167 1.1 mrg case GOMP_MAP_FROM:
1168 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
1169 1.1 mrg case GOMP_MAP_FORCE_FROM:
1170 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
1171 1.1 mrg break;
1172 1.1 mrg case GOMP_MAP_TO:
1173 1.1 mrg case GOMP_MAP_TOFROM:
1174 1.1 mrg case GOMP_MAP_FORCE_TO:
1175 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
1176 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO:
1177 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM:
1178 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1179 1.1.1.3 mrg (void *) (tgt->tgt_start
1180 1.1.1.3 mrg + k->tgt_offset),
1181 1.1.1.3 mrg (void *) k->host_start,
1182 1.1.1.6 mrg k->host_end - k->host_start, cbufp);
1183 1.1 mrg break;
1184 1.1 mrg case GOMP_MAP_POINTER:
1185 1.1.1.8 mrg gomp_map_pointer (tgt, aq,
1186 1.1.1.8 mrg (uintptr_t) *(void **) k->host_start,
1187 1.1.1.6 mrg k->tgt_offset, sizes[i], cbufp);
1188 1.1 mrg break;
1189 1.1 mrg case GOMP_MAP_TO_PSET:
1190 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1191 1.1.1.3 mrg (void *) (tgt->tgt_start
1192 1.1.1.3 mrg + k->tgt_offset),
1193 1.1.1.3 mrg (void *) k->host_start,
1194 1.1.1.6 mrg k->host_end - k->host_start, cbufp);
1195 1.1 mrg
1196 1.1 mrg for (j = i + 1; j < mapnum; j++)
1197 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198 1.1.1.2 mrg j)
1199 1.1 mrg & typemask))
1200 1.1 mrg break;
1201 1.1 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
1202 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203 1.1 mrg > k->host_end))
1204 1.1 mrg break;
1205 1.1 mrg else
1206 1.1 mrg {
1207 1.1.1.2 mrg tgt->list[j].key = k;
1208 1.1.1.2 mrg tgt->list[j].copy_from = false;
1209 1.1.1.2 mrg tgt->list[j].always_copy_from = false;
1210 1.1.1.8 mrg tgt->list[j].is_attach = false;
1211 1.1.1.2 mrg if (k->refcount != REFCOUNT_INFINITY)
1212 1.1.1.2 mrg k->refcount++;
1213 1.1.1.8 mrg gomp_map_pointer (tgt, aq,
1214 1.1.1.2 mrg (uintptr_t) *(void **) hostaddrs[j],
1215 1.1.1.2 mrg k->tgt_offset
1216 1.1.1.2 mrg + ((uintptr_t) hostaddrs[j]
1217 1.1.1.2 mrg - k->host_start),
1218 1.1.1.6 mrg sizes[j], cbufp);
1219 1.1 mrg i++;
1220 1.1 mrg }
1221 1.1 mrg break;
1222 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
1223 1.1 mrg {
1224 1.1 mrg /* We already looked up the memory region above and it
1225 1.1 mrg was missing. */
1226 1.1 mrg size_t size = k->host_end - k->host_start;
1227 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1228 1.1 mrg #ifdef HAVE_INTTYPES_H
1229 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
1230 1.1 mrg "%"PRIu64" (0x%"PRIx64"))",
1231 1.1 mrg (void *) k->host_start,
1232 1.1 mrg (uint64_t) size, (uint64_t) size);
1233 1.1 mrg #else
1234 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
1235 1.1 mrg "%lu (0x%lx))", (void *) k->host_start,
1236 1.1 mrg (unsigned long) size, (unsigned long) size);
1237 1.1 mrg #endif
1238 1.1 mrg }
1239 1.1 mrg break;
1240 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
1241 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
1242 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1243 1.1.1.3 mrg (void *) (tgt->tgt_start
1244 1.1.1.3 mrg + k->tgt_offset),
1245 1.1.1.3 mrg (void *) k->host_start,
1246 1.1.1.6 mrg sizeof (void *), cbufp);
1247 1.1 mrg break;
1248 1.1 mrg default:
1249 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1250 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1251 1.1 mrg kind);
1252 1.1 mrg }
1253 1.1.1.2 mrg
1254 1.1.1.8 mrg if (k->aux && k->aux->link_key)
1255 1.1.1.2 mrg {
1256 1.1.1.2 mrg /* Set link pointer on target to the device address of the
1257 1.1.1.2 mrg mapped object. */
1258 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1259 1.1.1.7 mrg /* We intentionally do not use coalescing here, as it's not
1260 1.1.1.7 mrg data allocated by the current call to this function. */
1261 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1262 1.1.1.7 mrg &tgt_addr, sizeof (void *), NULL);
1263 1.1.1.2 mrg }
1264 1.1 mrg array++;
1265 1.1 mrg }
1266 1.1 mrg }
1267 1.1 mrg }
1268 1.1 mrg
1269 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270 1.1 mrg {
1271 1.1 mrg for (i = 0; i < mapnum; i++)
1272 1.1 mrg {
1273 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1274 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1275 1.1.1.3 mrg (void *) (tgt->tgt_start + i * sizeof (void *)),
1276 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
1277 1.1.1.6 mrg cbufp);
1278 1.1 mrg }
1279 1.1 mrg }
1280 1.1 mrg
1281 1.1.1.6 mrg if (cbufp)
1282 1.1.1.6 mrg {
1283 1.1.1.6 mrg long c = 0;
1284 1.1.1.6 mrg for (c = 0; c < cbuf.chunk_cnt; ++c)
1285 1.1.1.8 mrg gomp_copy_host2dev (devicep, aq,
1286 1.1.1.7 mrg (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1287 1.1.1.7 mrg (char *) cbuf.buf + (cbuf.chunks[c].start
1288 1.1.1.7 mrg - cbuf.chunks[0].start),
1289 1.1.1.7 mrg cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1290 1.1.1.6 mrg free (cbuf.buf);
1291 1.1.1.7 mrg cbuf.buf = NULL;
1292 1.1.1.7 mrg cbufp = NULL;
1293 1.1.1.6 mrg }
1294 1.1.1.6 mrg
1295 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped,
1296 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1297 1.1.1.2 mrg gomp_exit_data. */
1298 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299 1.1.1.2 mrg {
1300 1.1.1.2 mrg free (tgt);
1301 1.1.1.2 mrg tgt = NULL;
1302 1.1.1.2 mrg }
1303 1.1.1.2 mrg
1304 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1305 1.1 mrg return tgt;
1306 1.1 mrg }
1307 1.1 mrg
1308 1.1.1.8 mrg attribute_hidden struct target_mem_desc *
1309 1.1.1.8 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1310 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1311 1.1.1.8 mrg bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312 1.1.1.8 mrg {
1313 1.1.1.8 mrg return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1314 1.1.1.8 mrg sizes, kinds, short_mapkind, pragma_kind);
1315 1.1.1.8 mrg }
1316 1.1.1.8 mrg
1317 1.1.1.8 mrg attribute_hidden struct target_mem_desc *
1318 1.1.1.8 mrg gomp_map_vars_async (struct gomp_device_descr *devicep,
1319 1.1.1.8 mrg struct goacc_asyncqueue *aq, size_t mapnum,
1320 1.1.1.8 mrg void **hostaddrs, void **devaddrs, size_t *sizes,
1321 1.1.1.8 mrg void *kinds, bool short_mapkind,
1322 1.1.1.8 mrg enum gomp_map_vars_kind pragma_kind)
1323 1.1.1.8 mrg {
1324 1.1.1.8 mrg return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1325 1.1.1.8 mrg sizes, kinds, short_mapkind, pragma_kind);
1326 1.1.1.8 mrg }
1327 1.1.1.8 mrg
1328 1.1 mrg static void
1329 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
1330 1.1 mrg {
1331 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1332 1.1 mrg if (tgt->tgt_end)
1333 1.1.1.3 mrg gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334 1.1 mrg
1335 1.1 mrg free (tgt->array);
1336 1.1 mrg free (tgt);
1337 1.1 mrg }
1338 1.1 mrg
1339 1.1.1.8 mrg static bool
1340 1.1.1.8 mrg gomp_unref_tgt (void *ptr)
1341 1.1.1.7 mrg {
1342 1.1.1.7 mrg bool is_tgt_unmapped = false;
1343 1.1.1.8 mrg
1344 1.1.1.8 mrg struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345 1.1.1.8 mrg
1346 1.1.1.8 mrg if (tgt->refcount > 1)
1347 1.1.1.8 mrg tgt->refcount--;
1348 1.1.1.7 mrg else
1349 1.1.1.7 mrg {
1350 1.1.1.8 mrg gomp_unmap_tgt (tgt);
1351 1.1.1.7 mrg is_tgt_unmapped = true;
1352 1.1.1.7 mrg }
1353 1.1.1.8 mrg
1354 1.1.1.8 mrg return is_tgt_unmapped;
1355 1.1.1.8 mrg }
1356 1.1.1.8 mrg
1357 1.1.1.8 mrg static void
1358 1.1.1.8 mrg gomp_unref_tgt_void (void *ptr)
1359 1.1.1.8 mrg {
1360 1.1.1.8 mrg (void) gomp_unref_tgt (ptr);
1361 1.1.1.8 mrg }
1362 1.1.1.8 mrg
1363 1.1.1.8 mrg static inline __attribute__((always_inline)) bool
1364 1.1.1.8 mrg gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1365 1.1.1.8 mrg struct goacc_asyncqueue *aq)
1366 1.1.1.8 mrg {
1367 1.1.1.8 mrg bool is_tgt_unmapped = false;
1368 1.1.1.8 mrg splay_tree_remove (&devicep->mem_map, k);
1369 1.1.1.8 mrg if (k->aux)
1370 1.1.1.8 mrg {
1371 1.1.1.8 mrg if (k->aux->link_key)
1372 1.1.1.8 mrg splay_tree_insert (&devicep->mem_map,
1373 1.1.1.8 mrg (splay_tree_node) k->aux->link_key);
1374 1.1.1.8 mrg if (k->aux->attach_count)
1375 1.1.1.8 mrg free (k->aux->attach_count);
1376 1.1.1.8 mrg free (k->aux);
1377 1.1.1.8 mrg k->aux = NULL;
1378 1.1.1.8 mrg }
1379 1.1.1.8 mrg if (aq)
1380 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1381 1.1.1.8 mrg (void *) k->tgt);
1382 1.1.1.8 mrg else
1383 1.1.1.8 mrg is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1384 1.1.1.7 mrg return is_tgt_unmapped;
1385 1.1.1.7 mrg }
1386 1.1.1.7 mrg
1387 1.1.1.8 mrg attribute_hidden bool
1388 1.1.1.8 mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389 1.1.1.8 mrg {
1390 1.1.1.8 mrg return gomp_remove_var_internal (devicep, k, NULL);
1391 1.1.1.8 mrg }
1392 1.1.1.8 mrg
1393 1.1.1.8 mrg /* Remove a variable asynchronously. This actually removes the variable
1394 1.1.1.8 mrg mapping immediately, but retains the linked target_mem_desc until the
1395 1.1.1.8 mrg asynchronous operation has completed (as it may still refer to target
1396 1.1.1.8 mrg memory). The device lock must be held before entry, and remains locked on
1397 1.1.1.8 mrg exit. */
1398 1.1.1.8 mrg
1399 1.1.1.8 mrg attribute_hidden void
1400 1.1.1.8 mrg gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1401 1.1.1.8 mrg struct goacc_asyncqueue *aq)
1402 1.1.1.8 mrg {
1403 1.1.1.8 mrg (void) gomp_remove_var_internal (devicep, k, aq);
1404 1.1.1.8 mrg }
1405 1.1.1.8 mrg
1406 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1407 1.1 mrg variables back from device to host: if it is false, it is assumed that this
1408 1.1.1.3 mrg has been done already. */
1409 1.1 mrg
1410 1.1.1.8 mrg static inline __attribute__((always_inline)) void
1411 1.1.1.8 mrg gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1412 1.1.1.8 mrg struct goacc_asyncqueue *aq)
1413 1.1 mrg {
1414 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
1415 1.1 mrg
1416 1.1 mrg if (tgt->list_count == 0)
1417 1.1 mrg {
1418 1.1 mrg free (tgt);
1419 1.1 mrg return;
1420 1.1 mrg }
1421 1.1 mrg
1422 1.1 mrg gomp_mutex_lock (&devicep->lock);
1423 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1424 1.1.1.2 mrg {
1425 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1426 1.1.1.2 mrg free (tgt->array);
1427 1.1.1.2 mrg free (tgt);
1428 1.1.1.2 mrg return;
1429 1.1.1.2 mrg }
1430 1.1 mrg
1431 1.1 mrg size_t i;
1432 1.1.1.8 mrg
1433 1.1.1.8 mrg /* We must perform detachments before any copies back to the host. */
1434 1.1.1.8 mrg for (i = 0; i < tgt->list_count; i++)
1435 1.1.1.8 mrg {
1436 1.1.1.8 mrg splay_tree_key k = tgt->list[i].key;
1437 1.1.1.8 mrg
1438 1.1.1.8 mrg if (k != NULL && tgt->list[i].is_attach)
1439 1.1.1.8 mrg gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1440 1.1.1.8 mrg + tgt->list[i].offset,
1441 1.1.1.8 mrg false, NULL);
1442 1.1.1.8 mrg }
1443 1.1.1.8 mrg
1444 1.1 mrg for (i = 0; i < tgt->list_count; i++)
1445 1.1.1.2 mrg {
1446 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key;
1447 1.1.1.2 mrg if (k == NULL)
1448 1.1.1.2 mrg continue;
1449 1.1.1.2 mrg
1450 1.1.1.8 mrg /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1451 1.1.1.8 mrg counts ('n->refcount', 'n->dynamic_refcount'). */
1452 1.1.1.8 mrg if (tgt->list[i].is_attach)
1453 1.1.1.8 mrg continue;
1454 1.1.1.8 mrg
1455 1.1.1.2 mrg bool do_unmap = false;
1456 1.1.1.2 mrg if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1457 1.1.1.2 mrg k->refcount--;
1458 1.1.1.2 mrg else if (k->refcount == 1)
1459 1.1.1.2 mrg {
1460 1.1.1.3 mrg k->refcount--;
1461 1.1.1.3 mrg do_unmap = true;
1462 1.1.1.2 mrg }
1463 1.1.1.2 mrg
1464 1.1.1.2 mrg if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1465 1.1.1.2 mrg || tgt->list[i].always_copy_from)
1466 1.1.1.8 mrg gomp_copy_dev2host (devicep, aq,
1467 1.1.1.3 mrg (void *) (k->host_start + tgt->list[i].offset),
1468 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
1469 1.1.1.3 mrg + tgt->list[i].offset),
1470 1.1.1.3 mrg tgt->list[i].length);
1471 1.1.1.2 mrg if (do_unmap)
1472 1.1.1.8 mrg {
1473 1.1.1.8 mrg struct target_mem_desc *k_tgt = k->tgt;
1474 1.1.1.8 mrg bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1475 1.1.1.8 mrg /* It would be bad if TGT got unmapped while we're still iterating
1476 1.1.1.8 mrg over its LIST_COUNT, and also expect to use it in the following
1477 1.1.1.8 mrg code. */
1478 1.1.1.8 mrg assert (!is_tgt_unmapped
1479 1.1.1.8 mrg || k_tgt != tgt);
1480 1.1.1.8 mrg }
1481 1.1.1.2 mrg }
1482 1.1 mrg
1483 1.1.1.8 mrg if (aq)
1484 1.1.1.8 mrg devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1485 1.1.1.8 mrg (void *) tgt);
1486 1.1 mrg else
1487 1.1.1.8 mrg gomp_unref_tgt ((void *) tgt);
1488 1.1 mrg
1489 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1490 1.1 mrg }
1491 1.1 mrg
1492 1.1.1.8 mrg attribute_hidden void
1493 1.1.1.8 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1494 1.1.1.8 mrg {
1495 1.1.1.8 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1496 1.1.1.8 mrg }
1497 1.1.1.8 mrg
1498 1.1.1.8 mrg attribute_hidden void
1499 1.1.1.8 mrg gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1500 1.1.1.8 mrg struct goacc_asyncqueue *aq)
1501 1.1.1.8 mrg {
1502 1.1.1.8 mrg gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1503 1.1.1.8 mrg }
1504 1.1.1.8 mrg
1505 1.1 mrg static void
1506 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1507 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind)
1508 1.1 mrg {
1509 1.1 mrg size_t i;
1510 1.1 mrg struct splay_tree_key_s cur_node;
1511 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
1512 1.1 mrg
1513 1.1 mrg if (!devicep)
1514 1.1 mrg return;
1515 1.1 mrg
1516 1.1 mrg if (mapnum == 0)
1517 1.1 mrg return;
1518 1.1 mrg
1519 1.1 mrg gomp_mutex_lock (&devicep->lock);
1520 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1521 1.1.1.2 mrg {
1522 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1523 1.1.1.2 mrg return;
1524 1.1.1.2 mrg }
1525 1.1.1.2 mrg
1526 1.1 mrg for (i = 0; i < mapnum; i++)
1527 1.1 mrg if (sizes[i])
1528 1.1 mrg {
1529 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1530 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1531 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1532 1.1 mrg if (n)
1533 1.1 mrg {
1534 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
1535 1.1 mrg if (n->host_start > cur_node.host_start
1536 1.1 mrg || n->host_end < cur_node.host_end)
1537 1.1 mrg {
1538 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1539 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
1540 1.1 mrg "only [%p..%p) is mapped",
1541 1.1 mrg (void *) cur_node.host_start,
1542 1.1 mrg (void *) cur_node.host_end,
1543 1.1 mrg (void *) n->host_start,
1544 1.1 mrg (void *) n->host_end);
1545 1.1 mrg }
1546 1.1.1.3 mrg
1547 1.1.1.3 mrg
1548 1.1.1.3 mrg void *hostaddr = (void *) cur_node.host_start;
1549 1.1.1.3 mrg void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1550 1.1.1.3 mrg + cur_node.host_start - n->host_start);
1551 1.1.1.3 mrg size_t size = cur_node.host_end - cur_node.host_start;
1552 1.1.1.3 mrg
1553 1.1 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
1554 1.1.1.8 mrg gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1555 1.1.1.8 mrg NULL);
1556 1.1 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1557 1.1.1.8 mrg gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1558 1.1 mrg }
1559 1.1 mrg }
1560 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1561 1.1 mrg }
1562 1.1 mrg
1563 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1564 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
1565 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler
1566 1.1.1.2 mrg emitting variable and functions in the same order. */
1567 1.1 mrg
1568 1.1 mrg static void
1569 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1570 1.1.1.2 mrg const void *host_table, const void *target_data,
1571 1.1.1.2 mrg bool is_register_lock)
1572 1.1 mrg {
1573 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
1574 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
1575 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
1576 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
1577 1.1 mrg
1578 1.1 mrg /* The func table contains only addresses, the var table contains addresses
1579 1.1 mrg and corresponding sizes. */
1580 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
1581 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1582 1.1 mrg
1583 1.1 mrg /* Load image to device and get target addresses for the image. */
1584 1.1 mrg struct addr_pair *target_table = NULL;
1585 1.1.1.2 mrg int i, num_target_entries;
1586 1.1.1.2 mrg
1587 1.1.1.2 mrg num_target_entries
1588 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version,
1589 1.1.1.2 mrg target_data, &target_table);
1590 1.1 mrg
1591 1.1 mrg if (num_target_entries != num_funcs + num_vars)
1592 1.1 mrg {
1593 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1594 1.1 mrg if (is_register_lock)
1595 1.1 mrg gomp_mutex_unlock (®ister_lock);
1596 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables"
1597 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars,
1598 1.1.1.2 mrg num_target_entries);
1599 1.1 mrg }
1600 1.1 mrg
1601 1.1 mrg /* Insert host-target address mapping into splay tree. */
1602 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1603 1.1 mrg tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1604 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY;
1605 1.1 mrg tgt->tgt_start = 0;
1606 1.1 mrg tgt->tgt_end = 0;
1607 1.1 mrg tgt->to_free = NULL;
1608 1.1 mrg tgt->prev = NULL;
1609 1.1 mrg tgt->list_count = 0;
1610 1.1 mrg tgt->device_descr = devicep;
1611 1.1 mrg splay_tree_node array = tgt->array;
1612 1.1 mrg
1613 1.1 mrg for (i = 0; i < num_funcs; i++)
1614 1.1 mrg {
1615 1.1 mrg splay_tree_key k = &array->key;
1616 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
1617 1.1 mrg k->host_end = k->host_start + 1;
1618 1.1 mrg k->tgt = tgt;
1619 1.1 mrg k->tgt_offset = target_table[i].start;
1620 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
1621 1.1.1.7 mrg k->dynamic_refcount = 0;
1622 1.1.1.8 mrg k->aux = NULL;
1623 1.1 mrg array->left = NULL;
1624 1.1 mrg array->right = NULL;
1625 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1626 1.1 mrg array++;
1627 1.1 mrg }
1628 1.1 mrg
1629 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1630 1.1.1.2 mrg "omp declare target link" variables. */
1631 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1632 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1633 1.1.1.2 mrg
1634 1.1 mrg for (i = 0; i < num_vars; i++)
1635 1.1 mrg {
1636 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
1637 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start;
1638 1.1.1.8 mrg bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1639 1.1.1.2 mrg
1640 1.1.1.8 mrg if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1641 1.1 mrg {
1642 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1643 1.1 mrg if (is_register_lock)
1644 1.1 mrg gomp_mutex_unlock (®ister_lock);
1645 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)");
1646 1.1 mrg }
1647 1.1 mrg
1648 1.1 mrg splay_tree_key k = &array->key;
1649 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
1650 1.1.1.2 mrg k->host_end
1651 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1652 1.1 mrg k->tgt = tgt;
1653 1.1 mrg k->tgt_offset = target_var->start;
1654 1.1.1.8 mrg k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1655 1.1.1.7 mrg k->dynamic_refcount = 0;
1656 1.1.1.8 mrg k->aux = NULL;
1657 1.1 mrg array->left = NULL;
1658 1.1 mrg array->right = NULL;
1659 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1660 1.1 mrg array++;
1661 1.1 mrg }
1662 1.1 mrg
1663 1.1 mrg free (target_table);
1664 1.1 mrg }
1665 1.1 mrg
1666 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P.
1667 1.1.1.2 mrg The device must be locked. */
1668 1.1.1.2 mrg
1669 1.1.1.2 mrg static void
1670 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1671 1.1.1.2 mrg unsigned version,
1672 1.1.1.2 mrg const void *host_table, const void *target_data)
1673 1.1.1.2 mrg {
1674 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0];
1675 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1];
1676 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2];
1677 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3];
1678 1.1.1.2 mrg
1679 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses
1680 1.1.1.2 mrg and corresponding sizes. */
1681 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table;
1682 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1683 1.1.1.2 mrg
1684 1.1.1.2 mrg struct splay_tree_key_s k;
1685 1.1.1.2 mrg splay_tree_key node = NULL;
1686 1.1.1.2 mrg
1687 1.1.1.2 mrg /* Find mapping at start of node array */
1688 1.1.1.2 mrg if (num_funcs || num_vars)
1689 1.1.1.2 mrg {
1690 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1691 1.1.1.2 mrg : (uintptr_t) host_var_table[0]);
1692 1.1.1.2 mrg k.host_end = k.host_start + 1;
1693 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
1694 1.1.1.2 mrg }
1695 1.1.1.2 mrg
1696 1.1.1.3 mrg if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1697 1.1.1.3 mrg {
1698 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1699 1.1.1.3 mrg gomp_fatal ("image unload fail");
1700 1.1.1.3 mrg }
1701 1.1.1.2 mrg
1702 1.1.1.2 mrg /* Remove mappings from splay tree. */
1703 1.1.1.2 mrg int i;
1704 1.1.1.2 mrg for (i = 0; i < num_funcs; i++)
1705 1.1.1.2 mrg {
1706 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i];
1707 1.1.1.2 mrg k.host_end = k.host_start + 1;
1708 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1709 1.1.1.2 mrg }
1710 1.1.1.2 mrg
1711 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1712 1.1.1.2 mrg "omp declare target link" variables. */
1713 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1714 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1715 1.1.1.2 mrg bool is_tgt_unmapped = false;
1716 1.1.1.2 mrg
1717 1.1.1.2 mrg for (i = 0; i < num_vars; i++)
1718 1.1.1.2 mrg {
1719 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2];
1720 1.1.1.2 mrg k.host_end
1721 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1722 1.1.1.2 mrg
1723 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1724 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1725 1.1.1.2 mrg else
1726 1.1.1.2 mrg {
1727 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1728 1.1.1.7 mrg is_tgt_unmapped = gomp_remove_var (devicep, n);
1729 1.1.1.2 mrg }
1730 1.1.1.2 mrg }
1731 1.1.1.2 mrg
1732 1.1.1.2 mrg if (node && !is_tgt_unmapped)
1733 1.1.1.2 mrg {
1734 1.1.1.2 mrg free (node->tgt);
1735 1.1.1.2 mrg free (node);
1736 1.1.1.2 mrg }
1737 1.1.1.2 mrg }
1738 1.1.1.2 mrg
1739 1.1 mrg /* This function should be called from every offload image while loading.
1740 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1741 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1742 1.1 mrg
1743 1.1 mrg void
1744 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
1745 1.1.1.2 mrg int target_type, const void *target_data)
1746 1.1 mrg {
1747 1.1 mrg int i;
1748 1.1.1.2 mrg
1749 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1750 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
1751 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
1752 1.1.1.2 mrg
1753 1.1 mrg gomp_mutex_lock (®ister_lock);
1754 1.1 mrg
1755 1.1 mrg /* Load image to all initialized devices. */
1756 1.1 mrg for (i = 0; i < num_devices; i++)
1757 1.1 mrg {
1758 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1759 1.1 mrg gomp_mutex_lock (&devicep->lock);
1760 1.1.1.2 mrg if (devicep->type == target_type
1761 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1762 1.1.1.2 mrg gomp_load_image_to_device (devicep, version,
1763 1.1.1.2 mrg host_table, target_data, true);
1764 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1765 1.1 mrg }
1766 1.1 mrg
1767 1.1 mrg /* Insert image to array of pending images. */
1768 1.1 mrg offload_images
1769 1.1 mrg = gomp_realloc_unlock (offload_images,
1770 1.1 mrg (num_offload_images + 1)
1771 1.1 mrg * sizeof (struct offload_image_descr));
1772 1.1.1.2 mrg offload_images[num_offload_images].version = version;
1773 1.1 mrg offload_images[num_offload_images].type = target_type;
1774 1.1 mrg offload_images[num_offload_images].host_table = host_table;
1775 1.1 mrg offload_images[num_offload_images].target_data = target_data;
1776 1.1 mrg
1777 1.1 mrg num_offload_images++;
1778 1.1 mrg gomp_mutex_unlock (®ister_lock);
1779 1.1 mrg }
1780 1.1 mrg
1781 1.1.1.2 mrg void
1782 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type,
1783 1.1.1.2 mrg const void *target_data)
1784 1.1.1.2 mrg {
1785 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data);
1786 1.1.1.2 mrg }
1787 1.1.1.2 mrg
1788 1.1 mrg /* This function should be called from every offload image while unloading.
1789 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1790 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1791 1.1 mrg
1792 1.1 mrg void
1793 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1794 1.1.1.2 mrg int target_type, const void *target_data)
1795 1.1 mrg {
1796 1.1 mrg int i;
1797 1.1 mrg
1798 1.1 mrg gomp_mutex_lock (®ister_lock);
1799 1.1 mrg
1800 1.1 mrg /* Unload image from all initialized devices. */
1801 1.1 mrg for (i = 0; i < num_devices; i++)
1802 1.1 mrg {
1803 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1804 1.1 mrg gomp_mutex_lock (&devicep->lock);
1805 1.1.1.2 mrg if (devicep->type == target_type
1806 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1807 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version,
1808 1.1.1.2 mrg host_table, target_data);
1809 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1810 1.1 mrg }
1811 1.1 mrg
1812 1.1 mrg /* Remove image from array of pending images. */
1813 1.1 mrg for (i = 0; i < num_offload_images; i++)
1814 1.1 mrg if (offload_images[i].target_data == target_data)
1815 1.1 mrg {
1816 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
1817 1.1 mrg break;
1818 1.1 mrg }
1819 1.1 mrg
1820 1.1 mrg gomp_mutex_unlock (®ister_lock);
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1.1.2 mrg void
1824 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type,
1825 1.1.1.2 mrg const void *target_data)
1826 1.1.1.2 mrg {
1827 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1828 1.1.1.2 mrg }
1829 1.1.1.2 mrg
1830 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
1831 1.1 mrg must be locked on entry, and remains locked on return. */
1832 1.1 mrg
1833 1.1 mrg attribute_hidden void
1834 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
1835 1.1 mrg {
1836 1.1 mrg int i;
1837 1.1.1.3 mrg if (!devicep->init_device_func (devicep->target_id))
1838 1.1.1.3 mrg {
1839 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1840 1.1.1.3 mrg gomp_fatal ("device initialization failed");
1841 1.1.1.3 mrg }
1842 1.1 mrg
1843 1.1 mrg /* Load to device all images registered by the moment. */
1844 1.1 mrg for (i = 0; i < num_offload_images; i++)
1845 1.1 mrg {
1846 1.1 mrg struct offload_image_descr *image = &offload_images[i];
1847 1.1 mrg if (image->type == devicep->type)
1848 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version,
1849 1.1.1.2 mrg image->host_table, image->target_data,
1850 1.1.1.2 mrg false);
1851 1.1 mrg }
1852 1.1 mrg
1853 1.1.1.8 mrg /* Initialize OpenACC asynchronous queues. */
1854 1.1.1.8 mrg goacc_init_asyncqueues (devicep);
1855 1.1.1.8 mrg
1856 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED;
1857 1.1.1.2 mrg }
1858 1.1.1.2 mrg
1859 1.1.1.8 mrg /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1860 1.1.1.8 mrg must be locked on entry, and remains locked on return. */
1861 1.1.1.8 mrg
1862 1.1.1.8 mrg attribute_hidden bool
1863 1.1.1.8 mrg gomp_fini_device (struct gomp_device_descr *devicep)
1864 1.1.1.8 mrg {
1865 1.1.1.8 mrg bool ret = goacc_fini_asyncqueues (devicep);
1866 1.1.1.8 mrg ret &= devicep->fini_device_func (devicep->target_id);
1867 1.1.1.8 mrg devicep->state = GOMP_DEVICE_FINALIZED;
1868 1.1.1.8 mrg return ret;
1869 1.1.1.8 mrg }
1870 1.1.1.8 mrg
1871 1.1.1.2 mrg attribute_hidden void
1872 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep)
1873 1.1.1.2 mrg {
1874 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
1875 1.1.1.2 mrg {
1876 1.1.1.2 mrg unsigned i;
1877 1.1.1.2 mrg
1878 1.1.1.2 mrg /* Unload from device all images registered at the moment. */
1879 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++)
1880 1.1.1.2 mrg {
1881 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i];
1882 1.1.1.2 mrg if (image->type == devicep->type)
1883 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version,
1884 1.1.1.2 mrg image->host_table,
1885 1.1.1.2 mrg image->target_data);
1886 1.1.1.2 mrg }
1887 1.1.1.2 mrg }
1888 1.1 mrg }
1889 1.1 mrg
1890 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */
1891 1.1 mrg
1892 1.1.1.2 mrg static void
1893 1.1.1.2 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1894 1.1 mrg {
1895 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
1896 1.1.1.2 mrg old_thr = *thr;
1897 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr));
1898 1.1.1.2 mrg if (gomp_places_list)
1899 1.1.1.2 mrg {
1900 1.1.1.2 mrg thr->place = old_thr.place;
1901 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len;
1902 1.1.1.2 mrg }
1903 1.1.1.2 mrg fn (hostaddrs);
1904 1.1.1.2 mrg gomp_free_thread (thr);
1905 1.1.1.2 mrg *thr = old_thr;
1906 1.1 mrg }
1907 1.1 mrg
1908 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared
1909 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1910 1.1.1.2 mrg
1911 1.1.1.2 mrg static inline void
1912 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1913 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align,
1914 1.1.1.2 mrg size_t *tgt_size)
1915 1.1.1.2 mrg {
1916 1.1.1.2 mrg size_t i;
1917 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1918 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1919 1.1.1.2 mrg {
1920 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1921 1.1.1.2 mrg if (*tgt_align < align)
1922 1.1.1.2 mrg *tgt_align = align;
1923 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1924 1.1.1.2 mrg *tgt_size += sizes[i];
1925 1.1.1.2 mrg }
1926 1.1.1.2 mrg }
1927 1.1.1.2 mrg
1928 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1929 1.1.1.2 mrg
1930 1.1.1.2 mrg static inline void
1931 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1932 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align,
1933 1.1.1.2 mrg size_t tgt_size)
1934 1.1.1.2 mrg {
1935 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1936 1.1.1.2 mrg if (al)
1937 1.1.1.2 mrg tgt += tgt_align - al;
1938 1.1.1.2 mrg tgt_size = 0;
1939 1.1.1.2 mrg size_t i;
1940 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1941 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1942 1.1.1.2 mrg {
1943 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1944 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1945 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1946 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size;
1947 1.1.1.2 mrg tgt_size = tgt_size + sizes[i];
1948 1.1.1.2 mrg }
1949 1.1.1.2 mrg }
1950 1.1.1.2 mrg
1951 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */
1952 1.1.1.2 mrg
1953 1.1.1.2 mrg static void *
1954 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1955 1.1.1.2 mrg void (*host_fn) (void *))
1956 1.1.1.2 mrg {
1957 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1958 1.1.1.2 mrg return (void *) host_fn;
1959 1.1.1.2 mrg else
1960 1.1.1.2 mrg {
1961 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
1962 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1963 1.1.1.2 mrg {
1964 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1965 1.1.1.2 mrg return NULL;
1966 1.1.1.2 mrg }
1967 1.1.1.2 mrg
1968 1.1.1.2 mrg struct splay_tree_key_s k;
1969 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn;
1970 1.1.1.2 mrg k.host_end = k.host_start + 1;
1971 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1972 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1973 1.1.1.2 mrg if (tgt_fn == NULL)
1974 1.1.1.2 mrg return NULL;
1975 1.1.1.2 mrg
1976 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset;
1977 1.1.1.2 mrg }
1978 1.1.1.2 mrg }
1979 1.1.1.2 mrg
1980 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE
1981 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1982 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
1983 1.1 mrg larger than last available hw device), use host fallback.
1984 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
1985 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1986 1.1 mrg with MAPNUM entries, with addresses of the host objects,
1987 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
1988 1.1 mrg and assumed sizeof (void *) size) and kinds. */
1989 1.1 mrg
1990 1.1 mrg void
1991 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
1992 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
1993 1.1 mrg unsigned char *kinds)
1994 1.1 mrg {
1995 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1996 1.1 mrg
1997 1.1.1.2 mrg void *fn_addr;
1998 1.1 mrg if (devicep == NULL
1999 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2000 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */
2001 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2002 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2003 1.1.1.2 mrg return gomp_target_fallback (fn, hostaddrs);
2004 1.1.1.2 mrg
2005 1.1.1.2 mrg struct target_mem_desc *tgt_vars
2006 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2007 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
2008 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2009 1.1.1.2 mrg NULL);
2010 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
2011 1.1.1.2 mrg }
2012 1.1.1.2 mrg
2013 1.1.1.8 mrg static inline unsigned int
2014 1.1.1.8 mrg clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2015 1.1.1.8 mrg {
2016 1.1.1.8 mrg /* If we cannot run asynchronously, simply ignore nowait. */
2017 1.1.1.8 mrg if (devicep != NULL && devicep->async_run_func == NULL)
2018 1.1.1.8 mrg flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2019 1.1.1.8 mrg
2020 1.1.1.8 mrg return flags;
2021 1.1.1.8 mrg }
2022 1.1.1.8 mrg
2023 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2024 1.1.1.2 mrg and several arguments have been added:
2025 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2026 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details.
2027 1.1.1.2 mrg
2028 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both
2029 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two
2030 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type
2031 1.1.1.2 mrg and optionally also the value. If the value is not present in the first
2032 1.1.1.2 mrg one, the whole second element the actual value. The last element of the
2033 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example
2034 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT.
2035 1.1.1.2 mrg
2036 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with
2037 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if
2038 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is
2039 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host
2040 1.1.1.2 mrg what value will GOMP_teams have on the device.
2041 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2042 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit
2043 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be
2044 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */
2045 1.1.1.2 mrg
2046 1.1.1.2 mrg void
2047 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2048 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
2049 1.1.1.2 mrg unsigned int flags, void **depend, void **args)
2050 1.1.1.2 mrg {
2051 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2052 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0;
2053 1.1.1.2 mrg bool fpc_done = false;
2054 1.1.1.2 mrg
2055 1.1.1.8 mrg flags = clear_unsupported_flags (devicep, flags);
2056 1.1.1.8 mrg
2057 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058 1.1 mrg {
2059 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2060 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait
2061 1.1.1.2 mrg target tasks make sense to run asynchronously even when
2062 1.1.1.2 mrg outside of any parallel. */
2063 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0))
2064 1.1.1.2 mrg {
2065 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1);
2066 1.1.1.2 mrg struct gomp_task *task = thr->task;
2067 1.1.1.8.6.1 martin struct gomp_task **implicit_task = &task;
2068 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2069 1.1.1.2 mrg team->prev_ts = thr->ts;
2070 1.1.1.2 mrg thr->ts.team = team;
2071 1.1.1.2 mrg thr->ts.team_id = 0;
2072 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
2073 1.1.1.2 mrg thr->ts.last_work_share = NULL;
2074 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS
2075 1.1.1.2 mrg thr->ts.single_count = 0;
2076 1.1.1.2 mrg #endif
2077 1.1.1.2 mrg thr->ts.static_trip = 0;
2078 1.1.1.2 mrg thr->task = &team->implicit_task[0];
2079 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv);
2080 1.1.1.8.6.1 martin while (*implicit_task
2081 1.1.1.8.6.1 martin && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2082 1.1.1.8.6.1 martin implicit_task = &(*implicit_task)->parent;
2083 1.1.1.8.6.1 martin if (*implicit_task)
2084 1.1.1.2 mrg {
2085 1.1.1.8.6.1 martin thr->task = *implicit_task;
2086 1.1.1.2 mrg gomp_end_task ();
2087 1.1.1.8.6.1 martin free (*implicit_task);
2088 1.1.1.2 mrg thr->task = &team->implicit_task[0];
2089 1.1.1.2 mrg }
2090 1.1.1.2 mrg else
2091 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr);
2092 1.1.1.8.6.1 martin if (implicit_task != &task)
2093 1.1.1.8.6.1 martin {
2094 1.1.1.8.6.1 martin *implicit_task = thr->task;
2095 1.1.1.8.6.1 martin thr->task = task;
2096 1.1.1.8.6.1 martin }
2097 1.1.1.2 mrg }
2098 1.1.1.2 mrg if (thr->ts.team
2099 1.1.1.2 mrg && !thr->task->final_task)
2100 1.1.1.2 mrg {
2101 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2102 1.1.1.2 mrg sizes, kinds, flags, depend, args,
2103 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP);
2104 1.1.1.2 mrg return;
2105 1.1.1.2 mrg }
2106 1.1 mrg }
2107 1.1 mrg
2108 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present
2109 1.1.1.2 mrg (or we are in a final task), block the parent task until the
2110 1.1.1.2 mrg dependencies are resolved and then just continue with the rest
2111 1.1.1.2 mrg of the function as if it is a merged task. */
2112 1.1.1.2 mrg if (depend != NULL)
2113 1.1.1.2 mrg {
2114 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2115 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2116 1.1.1.2 mrg {
2117 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */
2118 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2119 1.1.1.2 mrg &tgt_align, &tgt_size);
2120 1.1.1.2 mrg if (tgt_align)
2121 1.1.1.2 mrg {
2122 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2123 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2124 1.1.1.2 mrg tgt_align, tgt_size);
2125 1.1.1.2 mrg }
2126 1.1.1.2 mrg fpc_done = true;
2127 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2128 1.1.1.2 mrg }
2129 1.1.1.2 mrg }
2130 1.1 mrg
2131 1.1 mrg void *fn_addr;
2132 1.1.1.2 mrg if (devicep == NULL
2133 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2134 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2135 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2136 1.1 mrg {
2137 1.1.1.2 mrg if (!fpc_done)
2138 1.1 mrg {
2139 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2140 1.1.1.2 mrg &tgt_align, &tgt_size);
2141 1.1.1.2 mrg if (tgt_align)
2142 1.1.1.2 mrg {
2143 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2144 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2145 1.1.1.2 mrg tgt_align, tgt_size);
2146 1.1.1.2 mrg }
2147 1.1 mrg }
2148 1.1.1.2 mrg gomp_target_fallback (fn, hostaddrs);
2149 1.1.1.2 mrg return;
2150 1.1.1.2 mrg }
2151 1.1 mrg
2152 1.1.1.2 mrg struct target_mem_desc *tgt_vars;
2153 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2154 1.1.1.2 mrg {
2155 1.1.1.2 mrg if (!fpc_done)
2156 1.1.1.2 mrg {
2157 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
2158 1.1.1.2 mrg &tgt_align, &tgt_size);
2159 1.1.1.2 mrg if (tgt_align)
2160 1.1.1.2 mrg {
2161 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2162 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2163 1.1.1.2 mrg tgt_align, tgt_size);
2164 1.1.1.2 mrg }
2165 1.1.1.2 mrg }
2166 1.1.1.2 mrg tgt_vars = NULL;
2167 1.1 mrg }
2168 1.1.1.2 mrg else
2169 1.1.1.2 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2170 1.1.1.2 mrg true, GOMP_MAP_VARS_TARGET);
2171 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr,
2172 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2173 1.1.1.2 mrg args);
2174 1.1.1.2 mrg if (tgt_vars)
2175 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
2176 1.1.1.2 mrg }
2177 1.1 mrg
2178 1.1.1.2 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */
2179 1.1.1.2 mrg
2180 1.1.1.2 mrg static void
2181 1.1.1.2 mrg gomp_target_data_fallback (void)
2182 1.1.1.2 mrg {
2183 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (false);
2184 1.1.1.2 mrg if (icv->target_data)
2185 1.1 mrg {
2186 1.1.1.2 mrg /* Even when doing a host fallback, if there are any active
2187 1.1.1.2 mrg #pragma omp target data constructs, need to remember the
2188 1.1.1.2 mrg new #pragma omp target data, otherwise GOMP_target_end_data
2189 1.1.1.2 mrg would get out of sync. */
2190 1.1.1.2 mrg struct target_mem_desc *tgt
2191 1.1.1.2 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2192 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
2193 1.1.1.2 mrg tgt->prev = icv->target_data;
2194 1.1.1.2 mrg icv->target_data = tgt;
2195 1.1 mrg }
2196 1.1 mrg }
2197 1.1 mrg
2198 1.1 mrg void
2199 1.1 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
2200 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
2201 1.1 mrg {
2202 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
2203 1.1 mrg
2204 1.1 mrg if (devicep == NULL
2205 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2206 1.1.1.2 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2207 1.1.1.2 mrg return gomp_target_data_fallback ();
2208 1.1 mrg
2209 1.1 mrg struct target_mem_desc *tgt
2210 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2211 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
2212 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (true);
2213 1.1.1.2 mrg tgt->prev = icv->target_data;
2214 1.1.1.2 mrg icv->target_data = tgt;
2215 1.1.1.2 mrg }
2216 1.1.1.2 mrg
2217 1.1.1.2 mrg void
2218 1.1.1.2 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2219 1.1.1.2 mrg size_t *sizes, unsigned short *kinds)
2220 1.1.1.2 mrg {
2221 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2222 1.1.1.2 mrg
2223 1.1.1.2 mrg if (devicep == NULL
2224 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2225 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2226 1.1.1.2 mrg return gomp_target_data_fallback ();
2227 1.1.1.2 mrg
2228 1.1.1.2 mrg struct target_mem_desc *tgt
2229 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2230 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
2231 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
2232 1.1 mrg tgt->prev = icv->target_data;
2233 1.1 mrg icv->target_data = tgt;
2234 1.1 mrg }
2235 1.1 mrg
2236 1.1 mrg void
2237 1.1 mrg GOMP_target_end_data (void)
2238 1.1 mrg {
2239 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
2240 1.1 mrg if (icv->target_data)
2241 1.1 mrg {
2242 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
2243 1.1 mrg icv->target_data = tgt->prev;
2244 1.1 mrg gomp_unmap_vars (tgt, true);
2245 1.1 mrg }
2246 1.1 mrg }
2247 1.1 mrg
2248 1.1 mrg void
2249 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
2250 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
2251 1.1 mrg {
2252 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
2253 1.1 mrg
2254 1.1 mrg if (devicep == NULL
2255 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2256 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2257 1.1.1.2 mrg return;
2258 1.1.1.2 mrg
2259 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2260 1.1.1.2 mrg }
2261 1.1.1.2 mrg
2262 1.1.1.2 mrg void
2263 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2264 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
2265 1.1.1.2 mrg unsigned int flags, void **depend)
2266 1.1.1.2 mrg {
2267 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2268 1.1.1.2 mrg
2269 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
2270 1.1.1.2 mrg block the parent task until the dependencies are resolved
2271 1.1.1.2 mrg and then just continue with the rest of the function as if it
2272 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
2273 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
2274 1.1.1.2 mrg are not present. */
2275 1.1.1.2 mrg if (depend != NULL)
2276 1.1.1.2 mrg {
2277 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2278 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2279 1.1.1.2 mrg {
2280 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2281 1.1.1.2 mrg && thr->ts.team
2282 1.1.1.2 mrg && !thr->task->final_task)
2283 1.1.1.2 mrg {
2284 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2285 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
2286 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE,
2287 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA))
2288 1.1.1.2 mrg return;
2289 1.1.1.2 mrg }
2290 1.1.1.2 mrg else
2291 1.1.1.2 mrg {
2292 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2293 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
2294 1.1.1.2 mrg tasks. */
2295 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2296 1.1.1.7 mrg {
2297 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2298 1.1.1.7 mrg return;
2299 1.1.1.7 mrg if (thr->task->taskgroup)
2300 1.1.1.7 mrg {
2301 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2302 1.1.1.7 mrg return;
2303 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2304 1.1.1.7 mrg && thr->task->taskgroup->prev
2305 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2306 1.1.1.7 mrg return;
2307 1.1.1.7 mrg }
2308 1.1.1.7 mrg }
2309 1.1.1.2 mrg
2310 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2311 1.1.1.2 mrg }
2312 1.1.1.2 mrg }
2313 1.1.1.2 mrg }
2314 1.1.1.2 mrg
2315 1.1.1.2 mrg if (devicep == NULL
2316 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2317 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2318 1.1.1.2 mrg return;
2319 1.1.1.2 mrg
2320 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2321 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2322 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2323 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2324 1.1.1.7 mrg {
2325 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2326 1.1.1.7 mrg return;
2327 1.1.1.7 mrg if (thr->task->taskgroup)
2328 1.1.1.7 mrg {
2329 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2330 1.1.1.7 mrg return;
2331 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2332 1.1.1.7 mrg && thr->task->taskgroup->prev
2333 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2334 1.1.1.7 mrg return;
2335 1.1.1.7 mrg }
2336 1.1.1.7 mrg }
2337 1.1 mrg
2338 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2339 1.1.1.2 mrg }
2340 1.1.1.2 mrg
2341 1.1.1.2 mrg static void
2342 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2343 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
2344 1.1.1.2 mrg {
2345 1.1.1.2 mrg const int typemask = 0xff;
2346 1.1.1.2 mrg size_t i;
2347 1.1 mrg gomp_mutex_lock (&devicep->lock);
2348 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
2349 1.1.1.2 mrg {
2350 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2351 1.1.1.2 mrg return;
2352 1.1.1.2 mrg }
2353 1.1.1.2 mrg
2354 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2355 1.1.1.2 mrg {
2356 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2357 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask;
2358 1.1.1.2 mrg switch (kind)
2359 1.1.1.2 mrg {
2360 1.1.1.2 mrg case GOMP_MAP_FROM:
2361 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
2362 1.1.1.2 mrg case GOMP_MAP_DELETE:
2363 1.1.1.2 mrg case GOMP_MAP_RELEASE:
2364 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2365 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2366 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
2367 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
2368 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2369 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2370 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2371 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node);
2372 1.1.1.2 mrg if (!k)
2373 1.1.1.2 mrg continue;
2374 1.1.1.2 mrg
2375 1.1.1.2 mrg if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2376 1.1.1.2 mrg k->refcount--;
2377 1.1.1.2 mrg if ((kind == GOMP_MAP_DELETE
2378 1.1.1.2 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2379 1.1.1.2 mrg && k->refcount != REFCOUNT_INFINITY)
2380 1.1.1.2 mrg k->refcount = 0;
2381 1.1.1.2 mrg
2382 1.1.1.2 mrg if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2383 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM)
2384 1.1.1.8 mrg gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2385 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
2386 1.1.1.3 mrg + cur_node.host_start
2387 1.1.1.3 mrg - k->host_start),
2388 1.1.1.3 mrg cur_node.host_end - cur_node.host_start);
2389 1.1.1.2 mrg if (k->refcount == 0)
2390 1.1.1.8 mrg gomp_remove_var (devicep, k);
2391 1.1.1.2 mrg
2392 1.1.1.2 mrg break;
2393 1.1.1.2 mrg default:
2394 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2395 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2396 1.1.1.2 mrg kind);
2397 1.1.1.2 mrg }
2398 1.1.1.2 mrg }
2399 1.1.1.2 mrg
2400 1.1 mrg gomp_mutex_unlock (&devicep->lock);
2401 1.1.1.2 mrg }
2402 1.1 mrg
2403 1.1.1.2 mrg void
2404 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2405 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
2406 1.1.1.2 mrg unsigned int flags, void **depend)
2407 1.1.1.2 mrg {
2408 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
2409 1.1.1.2 mrg
2410 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
2411 1.1.1.2 mrg block the parent task until the dependencies are resolved
2412 1.1.1.2 mrg and then just continue with the rest of the function as if it
2413 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
2414 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
2415 1.1.1.2 mrg are not present. */
2416 1.1.1.2 mrg if (depend != NULL)
2417 1.1.1.2 mrg {
2418 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2419 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2420 1.1.1.2 mrg {
2421 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2422 1.1.1.2 mrg && thr->ts.team
2423 1.1.1.2 mrg && !thr->task->final_task)
2424 1.1.1.2 mrg {
2425 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2426 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
2427 1.1.1.2 mrg flags, depend, NULL,
2428 1.1.1.2 mrg GOMP_TARGET_TASK_DATA))
2429 1.1.1.2 mrg return;
2430 1.1.1.2 mrg }
2431 1.1.1.2 mrg else
2432 1.1.1.2 mrg {
2433 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2434 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
2435 1.1.1.2 mrg tasks. */
2436 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2437 1.1.1.7 mrg {
2438 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2439 1.1.1.7 mrg return;
2440 1.1.1.7 mrg if (thr->task->taskgroup)
2441 1.1.1.7 mrg {
2442 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2443 1.1.1.7 mrg return;
2444 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2445 1.1.1.7 mrg && thr->task->taskgroup->prev
2446 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2447 1.1.1.7 mrg return;
2448 1.1.1.7 mrg }
2449 1.1.1.7 mrg }
2450 1.1.1.2 mrg
2451 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2452 1.1.1.2 mrg }
2453 1.1.1.2 mrg }
2454 1.1.1.2 mrg }
2455 1.1.1.2 mrg
2456 1.1.1.2 mrg if (devicep == NULL
2457 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2458 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2459 1.1.1.2 mrg return;
2460 1.1.1.2 mrg
2461 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2462 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2463 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2464 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2465 1.1.1.7 mrg {
2466 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2467 1.1.1.7 mrg return;
2468 1.1.1.7 mrg if (thr->task->taskgroup)
2469 1.1.1.7 mrg {
2470 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2471 1.1.1.7 mrg return;
2472 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2473 1.1.1.7 mrg && thr->task->taskgroup->prev
2474 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2475 1.1.1.7 mrg return;
2476 1.1.1.7 mrg }
2477 1.1.1.7 mrg }
2478 1.1.1.2 mrg
2479 1.1.1.8 mrg /* The variables are mapped separately such that they can be released
2480 1.1.1.8 mrg independently. */
2481 1.1.1.8 mrg size_t i, j;
2482 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2483 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2484 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2485 1.1.1.2 mrg {
2486 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2487 1.1.1.2 mrg &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2488 1.1.1.2 mrg i += sizes[i];
2489 1.1.1.2 mrg }
2490 1.1.1.8 mrg else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2491 1.1.1.8 mrg {
2492 1.1.1.8 mrg for (j = i + 1; j < mapnum; j++)
2493 1.1.1.8 mrg if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2494 1.1.1.8 mrg break;
2495 1.1.1.8 mrg gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2496 1.1.1.8 mrg &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2497 1.1.1.8 mrg i += j - i - 1;
2498 1.1.1.8 mrg }
2499 1.1.1.2 mrg else
2500 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2501 1.1.1.2 mrg true, GOMP_MAP_VARS_ENTER_DATA);
2502 1.1.1.2 mrg else
2503 1.1.1.2 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2504 1.1.1.2 mrg }
2505 1.1.1.2 mrg
2506 1.1.1.2 mrg bool
2507 1.1.1.2 mrg gomp_target_task_fn (void *data)
2508 1.1.1.2 mrg {
2509 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2510 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep;
2511 1.1.1.2 mrg
2512 1.1.1.2 mrg if (ttask->fn != NULL)
2513 1.1.1.2 mrg {
2514 1.1.1.2 mrg void *fn_addr;
2515 1.1.1.2 mrg if (devicep == NULL
2516 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2517 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2518 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2519 1.1.1.2 mrg {
2520 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK;
2521 1.1.1.2 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2522 1.1.1.2 mrg return false;
2523 1.1.1.2 mrg }
2524 1.1.1.2 mrg
2525 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2526 1.1.1.2 mrg {
2527 1.1.1.2 mrg if (ttask->tgt)
2528 1.1.1.2 mrg gomp_unmap_vars (ttask->tgt, true);
2529 1.1.1.2 mrg return false;
2530 1.1.1.2 mrg }
2531 1.1.1.2 mrg
2532 1.1.1.2 mrg void *actual_arguments;
2533 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2534 1.1.1.2 mrg {
2535 1.1.1.2 mrg ttask->tgt = NULL;
2536 1.1.1.2 mrg actual_arguments = ttask->hostaddrs;
2537 1.1.1.2 mrg }
2538 1.1.1.2 mrg else
2539 1.1.1.2 mrg {
2540 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2541 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true,
2542 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
2543 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start;
2544 1.1.1.2 mrg }
2545 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2546 1.1.1.2 mrg
2547 1.1.1.8 mrg assert (devicep->async_run_func);
2548 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2549 1.1.1.2 mrg ttask->args, (void *) ttask);
2550 1.1.1.2 mrg return true;
2551 1.1.1.2 mrg }
2552 1.1.1.2 mrg else if (devicep == NULL
2553 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2554 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2555 1.1.1.2 mrg return false;
2556 1.1.1.2 mrg
2557 1.1.1.2 mrg size_t i;
2558 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2559 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2560 1.1.1.2 mrg ttask->kinds, true);
2561 1.1.1.2 mrg else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2562 1.1.1.2 mrg for (i = 0; i < ttask->mapnum; i++)
2563 1.1.1.2 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2564 1.1.1.2 mrg {
2565 1.1.1.2 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2566 1.1.1.2 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2567 1.1.1.2 mrg GOMP_MAP_VARS_ENTER_DATA);
2568 1.1.1.2 mrg i += ttask->sizes[i];
2569 1.1.1.2 mrg }
2570 1.1.1.2 mrg else
2571 1.1.1.2 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2572 1.1.1.2 mrg &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2573 1.1.1.2 mrg else
2574 1.1.1.2 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2575 1.1.1.2 mrg ttask->kinds);
2576 1.1.1.2 mrg return false;
2577 1.1 mrg }
2578 1.1 mrg
2579 1.1 mrg void
2580 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2581 1.1 mrg {
2582 1.1 mrg if (thread_limit)
2583 1.1 mrg {
2584 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
2585 1.1 mrg icv->thread_limit_var
2586 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2587 1.1 mrg }
2588 1.1 mrg (void) num_teams;
2589 1.1 mrg }
2590 1.1 mrg
2591 1.1.1.2 mrg void *
2592 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num)
2593 1.1.1.2 mrg {
2594 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2595 1.1.1.2 mrg return malloc (size);
2596 1.1.1.2 mrg
2597 1.1.1.2 mrg if (device_num < 0)
2598 1.1.1.2 mrg return NULL;
2599 1.1.1.2 mrg
2600 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2601 1.1.1.2 mrg if (devicep == NULL)
2602 1.1.1.2 mrg return NULL;
2603 1.1.1.2 mrg
2604 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2605 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2606 1.1.1.2 mrg return malloc (size);
2607 1.1.1.2 mrg
2608 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2609 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size);
2610 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2611 1.1.1.2 mrg return ret;
2612 1.1.1.2 mrg }
2613 1.1.1.2 mrg
2614 1.1.1.2 mrg void
2615 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num)
2616 1.1.1.2 mrg {
2617 1.1.1.2 mrg if (device_ptr == NULL)
2618 1.1.1.2 mrg return;
2619 1.1.1.2 mrg
2620 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2621 1.1.1.2 mrg {
2622 1.1.1.2 mrg free (device_ptr);
2623 1.1.1.2 mrg return;
2624 1.1.1.2 mrg }
2625 1.1.1.2 mrg
2626 1.1.1.2 mrg if (device_num < 0)
2627 1.1.1.2 mrg return;
2628 1.1.1.2 mrg
2629 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2630 1.1.1.2 mrg if (devicep == NULL)
2631 1.1.1.2 mrg return;
2632 1.1.1.2 mrg
2633 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2634 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2635 1.1.1.2 mrg {
2636 1.1.1.2 mrg free (device_ptr);
2637 1.1.1.2 mrg return;
2638 1.1.1.2 mrg }
2639 1.1.1.2 mrg
2640 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2641 1.1.1.3 mrg gomp_free_device_memory (devicep, device_ptr);
2642 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2643 1.1.1.2 mrg }
2644 1.1.1.2 mrg
2645 1.1.1.2 mrg int
2646 1.1.1.7 mrg omp_target_is_present (const void *ptr, int device_num)
2647 1.1.1.2 mrg {
2648 1.1.1.2 mrg if (ptr == NULL)
2649 1.1.1.2 mrg return 1;
2650 1.1.1.2 mrg
2651 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652 1.1.1.2 mrg return 1;
2653 1.1.1.2 mrg
2654 1.1.1.2 mrg if (device_num < 0)
2655 1.1.1.2 mrg return 0;
2656 1.1.1.2 mrg
2657 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2658 1.1.1.2 mrg if (devicep == NULL)
2659 1.1.1.2 mrg return 0;
2660 1.1.1.2 mrg
2661 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2662 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2663 1.1.1.2 mrg return 1;
2664 1.1.1.2 mrg
2665 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2666 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2667 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2668 1.1.1.2 mrg
2669 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2670 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2671 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2672 1.1.1.2 mrg int ret = n != NULL;
2673 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2674 1.1.1.2 mrg return ret;
2675 1.1.1.2 mrg }
2676 1.1.1.2 mrg
2677 1.1.1.2 mrg int
2678 1.1.1.7 mrg omp_target_memcpy (void *dst, const void *src, size_t length,
2679 1.1.1.7 mrg size_t dst_offset, size_t src_offset, int dst_device_num,
2680 1.1.1.7 mrg int src_device_num)
2681 1.1.1.2 mrg {
2682 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2683 1.1.1.3 mrg bool ret;
2684 1.1.1.2 mrg
2685 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2686 1.1.1.2 mrg {
2687 1.1.1.2 mrg if (dst_device_num < 0)
2688 1.1.1.2 mrg return EINVAL;
2689 1.1.1.2 mrg
2690 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2691 1.1.1.2 mrg if (dst_devicep == NULL)
2692 1.1.1.2 mrg return EINVAL;
2693 1.1.1.2 mrg
2694 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2695 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2696 1.1.1.2 mrg dst_devicep = NULL;
2697 1.1.1.2 mrg }
2698 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2699 1.1.1.2 mrg {
2700 1.1.1.2 mrg if (src_device_num < 0)
2701 1.1.1.2 mrg return EINVAL;
2702 1.1.1.2 mrg
2703 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2704 1.1.1.2 mrg if (src_devicep == NULL)
2705 1.1.1.2 mrg return EINVAL;
2706 1.1.1.2 mrg
2707 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2708 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2709 1.1.1.2 mrg src_devicep = NULL;
2710 1.1.1.2 mrg }
2711 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL)
2712 1.1.1.2 mrg {
2713 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2714 1.1.1.2 mrg return 0;
2715 1.1.1.2 mrg }
2716 1.1.1.2 mrg if (src_devicep == NULL)
2717 1.1.1.2 mrg {
2718 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2719 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2720 1.1.1.3 mrg (char *) dst + dst_offset,
2721 1.1.1.3 mrg (char *) src + src_offset, length);
2722 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2723 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2724 1.1.1.2 mrg }
2725 1.1.1.2 mrg if (dst_devicep == NULL)
2726 1.1.1.2 mrg {
2727 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2728 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
2729 1.1.1.3 mrg (char *) dst + dst_offset,
2730 1.1.1.3 mrg (char *) src + src_offset, length);
2731 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2732 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2733 1.1.1.2 mrg }
2734 1.1.1.2 mrg if (src_devicep == dst_devicep)
2735 1.1.1.2 mrg {
2736 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2737 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
2738 1.1.1.3 mrg (char *) dst + dst_offset,
2739 1.1.1.3 mrg (char *) src + src_offset, length);
2740 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2741 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2742 1.1.1.2 mrg }
2743 1.1.1.2 mrg return EINVAL;
2744 1.1.1.2 mrg }
2745 1.1.1.2 mrg
2746 1.1.1.2 mrg static int
2747 1.1.1.7 mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2748 1.1.1.2 mrg int num_dims, const size_t *volume,
2749 1.1.1.2 mrg const size_t *dst_offsets,
2750 1.1.1.2 mrg const size_t *src_offsets,
2751 1.1.1.2 mrg const size_t *dst_dimensions,
2752 1.1.1.2 mrg const size_t *src_dimensions,
2753 1.1.1.2 mrg struct gomp_device_descr *dst_devicep,
2754 1.1.1.2 mrg struct gomp_device_descr *src_devicep)
2755 1.1.1.2 mrg {
2756 1.1.1.2 mrg size_t dst_slice = element_size;
2757 1.1.1.2 mrg size_t src_slice = element_size;
2758 1.1.1.2 mrg size_t j, dst_off, src_off, length;
2759 1.1.1.2 mrg int i, ret;
2760 1.1.1.2 mrg
2761 1.1.1.2 mrg if (num_dims == 1)
2762 1.1.1.2 mrg {
2763 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length)
2764 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2765 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2766 1.1.1.2 mrg return EINVAL;
2767 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL)
2768 1.1.1.3 mrg {
2769 1.1.1.7 mrg memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2770 1.1.1.7 mrg length);
2771 1.1.1.3 mrg ret = 1;
2772 1.1.1.3 mrg }
2773 1.1.1.2 mrg else if (src_devicep == NULL)
2774 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2775 1.1.1.3 mrg (char *) dst + dst_off,
2776 1.1.1.7 mrg (const char *) src + src_off,
2777 1.1.1.7 mrg length);
2778 1.1.1.2 mrg else if (dst_devicep == NULL)
2779 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
2780 1.1.1.3 mrg (char *) dst + dst_off,
2781 1.1.1.7 mrg (const char *) src + src_off,
2782 1.1.1.7 mrg length);
2783 1.1.1.2 mrg else if (src_devicep == dst_devicep)
2784 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
2785 1.1.1.3 mrg (char *) dst + dst_off,
2786 1.1.1.7 mrg (const char *) src + src_off,
2787 1.1.1.7 mrg length);
2788 1.1.1.2 mrg else
2789 1.1.1.3 mrg ret = 0;
2790 1.1.1.3 mrg return ret ? 0 : EINVAL;
2791 1.1.1.2 mrg }
2792 1.1.1.2 mrg
2793 1.1.1.2 mrg /* FIXME: it would be nice to have some plugin function to handle
2794 1.1.1.2 mrg num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2795 1.1.1.2 mrg be handled in the generic recursion below, and for host-host it
2796 1.1.1.2 mrg should be used even for any num_dims >= 2. */
2797 1.1.1.2 mrg
2798 1.1.1.2 mrg for (i = 1; i < num_dims; i++)
2799 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2800 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2801 1.1.1.2 mrg return EINVAL;
2802 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2803 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2804 1.1.1.2 mrg return EINVAL;
2805 1.1.1.2 mrg for (j = 0; j < volume[0]; j++)
2806 1.1.1.2 mrg {
2807 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2808 1.1.1.7 mrg (const char *) src + src_off,
2809 1.1.1.2 mrg element_size, num_dims - 1,
2810 1.1.1.2 mrg volume + 1, dst_offsets + 1,
2811 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1,
2812 1.1.1.2 mrg src_dimensions + 1, dst_devicep,
2813 1.1.1.2 mrg src_devicep);
2814 1.1.1.2 mrg if (ret)
2815 1.1.1.2 mrg return ret;
2816 1.1.1.2 mrg dst_off += dst_slice;
2817 1.1.1.2 mrg src_off += src_slice;
2818 1.1.1.2 mrg }
2819 1.1.1.2 mrg return 0;
2820 1.1.1.2 mrg }
2821 1.1.1.2 mrg
2822 1.1.1.2 mrg int
2823 1.1.1.7 mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2824 1.1.1.2 mrg int num_dims, const size_t *volume,
2825 1.1.1.2 mrg const size_t *dst_offsets,
2826 1.1.1.2 mrg const size_t *src_offsets,
2827 1.1.1.2 mrg const size_t *dst_dimensions,
2828 1.1.1.2 mrg const size_t *src_dimensions,
2829 1.1.1.2 mrg int dst_device_num, int src_device_num)
2830 1.1.1.2 mrg {
2831 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2832 1.1.1.2 mrg
2833 1.1.1.2 mrg if (!dst && !src)
2834 1.1.1.2 mrg return INT_MAX;
2835 1.1.1.2 mrg
2836 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2837 1.1.1.2 mrg {
2838 1.1.1.2 mrg if (dst_device_num < 0)
2839 1.1.1.2 mrg return EINVAL;
2840 1.1.1.2 mrg
2841 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2842 1.1.1.2 mrg if (dst_devicep == NULL)
2843 1.1.1.2 mrg return EINVAL;
2844 1.1.1.2 mrg
2845 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2846 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2847 1.1.1.2 mrg dst_devicep = NULL;
2848 1.1.1.2 mrg }
2849 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2850 1.1.1.2 mrg {
2851 1.1.1.2 mrg if (src_device_num < 0)
2852 1.1.1.2 mrg return EINVAL;
2853 1.1.1.2 mrg
2854 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2855 1.1.1.2 mrg if (src_devicep == NULL)
2856 1.1.1.2 mrg return EINVAL;
2857 1.1.1.2 mrg
2858 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2859 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2860 1.1.1.2 mrg src_devicep = NULL;
2861 1.1.1.2 mrg }
2862 1.1.1.2 mrg
2863 1.1.1.2 mrg if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2864 1.1.1.2 mrg return EINVAL;
2865 1.1.1.2 mrg
2866 1.1.1.2 mrg if (src_devicep)
2867 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2868 1.1.1.2 mrg else if (dst_devicep)
2869 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2870 1.1.1.2 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2871 1.1.1.2 mrg volume, dst_offsets, src_offsets,
2872 1.1.1.2 mrg dst_dimensions, src_dimensions,
2873 1.1.1.2 mrg dst_devicep, src_devicep);
2874 1.1.1.2 mrg if (src_devicep)
2875 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2876 1.1.1.2 mrg else if (dst_devicep)
2877 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2878 1.1.1.2 mrg return ret;
2879 1.1.1.2 mrg }
2880 1.1.1.2 mrg
2881 1.1.1.2 mrg int
2882 1.1.1.7 mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2883 1.1.1.7 mrg size_t size, size_t device_offset, int device_num)
2884 1.1.1.2 mrg {
2885 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2886 1.1.1.2 mrg return EINVAL;
2887 1.1.1.2 mrg
2888 1.1.1.2 mrg if (device_num < 0)
2889 1.1.1.2 mrg return EINVAL;
2890 1.1.1.2 mrg
2891 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2892 1.1.1.2 mrg if (devicep == NULL)
2893 1.1.1.2 mrg return EINVAL;
2894 1.1.1.2 mrg
2895 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2896 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2897 1.1.1.2 mrg return EINVAL;
2898 1.1.1.2 mrg
2899 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2900 1.1.1.2 mrg
2901 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2902 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2903 1.1.1.2 mrg int ret = EINVAL;
2904 1.1.1.2 mrg
2905 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr;
2906 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size;
2907 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2908 1.1.1.2 mrg if (n)
2909 1.1.1.2 mrg {
2910 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset
2911 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset
2912 1.1.1.2 mrg && n->host_start <= cur_node.host_start
2913 1.1.1.2 mrg && n->host_end >= cur_node.host_end)
2914 1.1.1.2 mrg ret = 0;
2915 1.1.1.2 mrg }
2916 1.1.1.2 mrg else
2917 1.1.1.2 mrg {
2918 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2919 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array));
2920 1.1.1.2 mrg tgt->refcount = 1;
2921 1.1.1.2 mrg tgt->tgt_start = 0;
2922 1.1.1.2 mrg tgt->tgt_end = 0;
2923 1.1.1.2 mrg tgt->to_free = NULL;
2924 1.1.1.2 mrg tgt->prev = NULL;
2925 1.1.1.2 mrg tgt->list_count = 0;
2926 1.1.1.2 mrg tgt->device_descr = devicep;
2927 1.1.1.2 mrg splay_tree_node array = tgt->array;
2928 1.1.1.2 mrg splay_tree_key k = &array->key;
2929 1.1.1.2 mrg k->host_start = cur_node.host_start;
2930 1.1.1.2 mrg k->host_end = cur_node.host_end;
2931 1.1.1.2 mrg k->tgt = tgt;
2932 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2933 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
2934 1.1.1.7 mrg k->dynamic_refcount = 0;
2935 1.1.1.8 mrg k->aux = NULL;
2936 1.1.1.2 mrg array->left = NULL;
2937 1.1.1.2 mrg array->right = NULL;
2938 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array);
2939 1.1.1.2 mrg ret = 0;
2940 1.1.1.2 mrg }
2941 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2942 1.1.1.2 mrg return ret;
2943 1.1.1.2 mrg }
2944 1.1.1.2 mrg
2945 1.1.1.2 mrg int
2946 1.1.1.7 mrg omp_target_disassociate_ptr (const void *ptr, int device_num)
2947 1.1.1.2 mrg {
2948 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2949 1.1.1.2 mrg return EINVAL;
2950 1.1.1.2 mrg
2951 1.1.1.2 mrg if (device_num < 0)
2952 1.1.1.2 mrg return EINVAL;
2953 1.1.1.2 mrg
2954 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2955 1.1.1.2 mrg if (devicep == NULL)
2956 1.1.1.2 mrg return EINVAL;
2957 1.1.1.2 mrg
2958 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2959 1.1.1.2 mrg return EINVAL;
2960 1.1.1.2 mrg
2961 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2962 1.1.1.2 mrg
2963 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2964 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2965 1.1.1.2 mrg int ret = EINVAL;
2966 1.1.1.2 mrg
2967 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2968 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2969 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2970 1.1.1.2 mrg if (n
2971 1.1.1.2 mrg && n->host_start == cur_node.host_start
2972 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY
2973 1.1.1.2 mrg && n->tgt->tgt_start == 0
2974 1.1.1.2 mrg && n->tgt->to_free == NULL
2975 1.1.1.2 mrg && n->tgt->refcount == 1
2976 1.1.1.2 mrg && n->tgt->list_count == 0)
2977 1.1.1.2 mrg {
2978 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
2979 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
2980 1.1.1.2 mrg ret = 0;
2981 1.1.1.2 mrg }
2982 1.1.1.2 mrg
2983 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2984 1.1.1.2 mrg return ret;
2985 1.1.1.2 mrg }
2986 1.1.1.2 mrg
2987 1.1.1.7 mrg int
2988 1.1.1.7 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num)
2989 1.1.1.7 mrg {
2990 1.1.1.7 mrg (void) kind;
2991 1.1.1.7 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2992 1.1.1.7 mrg return gomp_pause_host ();
2993 1.1.1.7 mrg if (device_num < 0 || device_num >= gomp_get_num_devices ())
2994 1.1.1.7 mrg return -1;
2995 1.1.1.7 mrg /* Do nothing for target devices for now. */
2996 1.1.1.7 mrg return 0;
2997 1.1.1.7 mrg }
2998 1.1.1.7 mrg
2999 1.1.1.7 mrg int
3000 1.1.1.7 mrg omp_pause_resource_all (omp_pause_resource_t kind)
3001 1.1.1.7 mrg {
3002 1.1.1.7 mrg (void) kind;
3003 1.1.1.7 mrg if (gomp_pause_host ())
3004 1.1.1.7 mrg return -1;
3005 1.1.1.7 mrg /* Do nothing for target devices for now. */
3006 1.1.1.7 mrg return 0;
3007 1.1.1.7 mrg }
3008 1.1.1.7 mrg
3009 1.1.1.7 mrg ialias (omp_pause_resource)
3010 1.1.1.7 mrg ialias (omp_pause_resource_all)
3011 1.1.1.7 mrg
3012 1.1 mrg #ifdef PLUGIN_SUPPORT
3013 1.1 mrg
3014 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3015 1.1 mrg in PLUGIN_NAME.
3016 1.1 mrg The handles of the found functions are stored in the corresponding fields
3017 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3018 1.1 mrg
3019 1.1 mrg static bool
3020 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
3021 1.1 mrg const char *plugin_name)
3022 1.1 mrg {
3023 1.1 mrg const char *err = NULL, *last_missing = NULL;
3024 1.1 mrg
3025 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3026 1.1 mrg if (!plugin_handle)
3027 1.1.1.2 mrg goto dl_fail;
3028 1.1 mrg
3029 1.1 mrg /* Check if all required functions are available in the plugin and store
3030 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL,
3031 1.1.1.2 mrg so we don't need to check dlerror all the time. */
3032 1.1 mrg #define DLSYM(f) \
3033 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3034 1.1.1.2 mrg goto dl_fail
3035 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if
3036 1.1.1.2 mrg failed, true otherwise. */
3037 1.1.1.2 mrg #define DLSYM_OPT(f, n) \
3038 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3039 1.1.1.2 mrg || (last_missing = #n, 0))
3040 1.1.1.2 mrg
3041 1.1.1.2 mrg DLSYM (version);
3042 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION)
3043 1.1.1.2 mrg {
3044 1.1.1.2 mrg err = "plugin version mismatch";
3045 1.1.1.2 mrg goto fail;
3046 1.1.1.2 mrg }
3047 1.1 mrg
3048 1.1 mrg DLSYM (get_name);
3049 1.1 mrg DLSYM (get_caps);
3050 1.1 mrg DLSYM (get_type);
3051 1.1 mrg DLSYM (get_num_devices);
3052 1.1 mrg DLSYM (init_device);
3053 1.1 mrg DLSYM (fini_device);
3054 1.1 mrg DLSYM (load_image);
3055 1.1 mrg DLSYM (unload_image);
3056 1.1 mrg DLSYM (alloc);
3057 1.1 mrg DLSYM (free);
3058 1.1 mrg DLSYM (dev2host);
3059 1.1 mrg DLSYM (host2dev);
3060 1.1 mrg device->capabilities = device->get_caps_func ();
3061 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3062 1.1.1.2 mrg {
3063 1.1.1.2 mrg DLSYM (run);
3064 1.1.1.8 mrg DLSYM_OPT (async_run, async_run);
3065 1.1.1.2 mrg DLSYM_OPT (can_run, can_run);
3066 1.1.1.2 mrg DLSYM (dev2dev);
3067 1.1.1.2 mrg }
3068 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3069 1.1 mrg {
3070 1.1.1.3 mrg if (!DLSYM_OPT (openacc.exec, openacc_exec)
3071 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data,
3072 1.1.1.2 mrg openacc_create_thread_data)
3073 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data,
3074 1.1.1.8 mrg openacc_destroy_thread_data)
3075 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3076 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3077 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3078 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3079 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3080 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.queue_callback,
3081 1.1.1.8 mrg openacc_async_queue_callback)
3082 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3083 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3084 1.1.1.8 mrg || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3085 1.1.1.8 mrg || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3086 1.1 mrg {
3087 1.1.1.2 mrg /* Require all the OpenACC handlers if we have
3088 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
3089 1.1 mrg err = "plugin missing OpenACC handler function";
3090 1.1.1.2 mrg goto fail;
3091 1.1 mrg }
3092 1.1.1.2 mrg
3093 1.1.1.2 mrg unsigned cuda = 0;
3094 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3095 1.1.1.3 mrg openacc_cuda_get_current_device);
3096 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3097 1.1.1.3 mrg openacc_cuda_get_current_context);
3098 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3099 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3100 1.1.1.2 mrg if (cuda && cuda != 4)
3101 1.1 mrg {
3102 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */
3103 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
3104 1.1.1.2 mrg goto fail;
3105 1.1 mrg }
3106 1.1 mrg }
3107 1.1 mrg #undef DLSYM
3108 1.1 mrg #undef DLSYM_OPT
3109 1.1 mrg
3110 1.1.1.2 mrg return 1;
3111 1.1.1.2 mrg
3112 1.1.1.2 mrg dl_fail:
3113 1.1.1.2 mrg err = dlerror ();
3114 1.1.1.2 mrg fail:
3115 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err);
3116 1.1.1.2 mrg if (last_missing)
3117 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing);
3118 1.1.1.2 mrg if (plugin_handle)
3119 1.1.1.2 mrg dlclose (plugin_handle);
3120 1.1.1.2 mrg
3121 1.1.1.2 mrg return 0;
3122 1.1.1.2 mrg }
3123 1.1.1.2 mrg
3124 1.1.1.2 mrg /* This function finalizes all initialized devices. */
3125 1.1.1.2 mrg
3126 1.1.1.2 mrg static void
3127 1.1.1.2 mrg gomp_target_fini (void)
3128 1.1.1.2 mrg {
3129 1.1.1.2 mrg int i;
3130 1.1.1.2 mrg for (i = 0; i < num_devices; i++)
3131 1.1 mrg {
3132 1.1.1.3 mrg bool ret = true;
3133 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i];
3134 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
3135 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
3136 1.1.1.8 mrg ret = gomp_fini_device (devicep);
3137 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
3138 1.1.1.3 mrg if (!ret)
3139 1.1.1.3 mrg gomp_fatal ("device finalization failed");
3140 1.1 mrg }
3141 1.1 mrg }
3142 1.1 mrg
3143 1.1.1.7 mrg /* This function initializes the runtime for offloading.
3144 1.1.1.7 mrg It parses the list of offload plugins, and tries to load these.
3145 1.1.1.7 mrg On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3146 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
3147 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3148 1.1 mrg by the others. */
3149 1.1 mrg
3150 1.1 mrg static void
3151 1.1 mrg gomp_target_init (void)
3152 1.1 mrg {
3153 1.1 mrg const char *prefix ="libgomp-plugin-";
3154 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
3155 1.1 mrg const char *cur, *next;
3156 1.1 mrg char *plugin_name;
3157 1.1 mrg int i, new_num_devices;
3158 1.1 mrg
3159 1.1 mrg num_devices = 0;
3160 1.1 mrg devices = NULL;
3161 1.1 mrg
3162 1.1.1.7 mrg cur = OFFLOAD_PLUGINS;
3163 1.1 mrg if (*cur)
3164 1.1 mrg do
3165 1.1 mrg {
3166 1.1 mrg struct gomp_device_descr current_device;
3167 1.1.1.6 mrg size_t prefix_len, suffix_len, cur_len;
3168 1.1 mrg
3169 1.1 mrg next = strchr (cur, ',');
3170 1.1 mrg
3171 1.1.1.6 mrg prefix_len = strlen (prefix);
3172 1.1.1.6 mrg cur_len = next ? next - cur : strlen (cur);
3173 1.1.1.6 mrg suffix_len = strlen (suffix);
3174 1.1.1.6 mrg
3175 1.1.1.6 mrg plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3176 1.1 mrg if (!plugin_name)
3177 1.1 mrg {
3178 1.1 mrg num_devices = 0;
3179 1.1 mrg break;
3180 1.1 mrg }
3181 1.1 mrg
3182 1.1.1.6 mrg memcpy (plugin_name, prefix, prefix_len);
3183 1.1.1.6 mrg memcpy (plugin_name + prefix_len, cur, cur_len);
3184 1.1.1.6 mrg memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3185 1.1 mrg
3186 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
3187 1.1 mrg {
3188 1.1 mrg new_num_devices = current_device.get_num_devices_func ();
3189 1.1 mrg if (new_num_devices >= 1)
3190 1.1 mrg {
3191 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
3192 1.1 mrg
3193 1.1 mrg devices = realloc (devices, (num_devices + new_num_devices)
3194 1.1 mrg * sizeof (struct gomp_device_descr));
3195 1.1 mrg if (!devices)
3196 1.1 mrg {
3197 1.1 mrg num_devices = 0;
3198 1.1 mrg free (plugin_name);
3199 1.1 mrg break;
3200 1.1 mrg }
3201 1.1 mrg
3202 1.1 mrg current_device.name = current_device.get_name_func ();
3203 1.1 mrg /* current_device.capabilities has already been set. */
3204 1.1 mrg current_device.type = current_device.get_type_func ();
3205 1.1 mrg current_device.mem_map.root = NULL;
3206 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED;
3207 1.1 mrg for (i = 0; i < new_num_devices; i++)
3208 1.1 mrg {
3209 1.1 mrg current_device.target_id = i;
3210 1.1 mrg devices[num_devices] = current_device;
3211 1.1 mrg gomp_mutex_init (&devices[num_devices].lock);
3212 1.1 mrg num_devices++;
3213 1.1 mrg }
3214 1.1 mrg }
3215 1.1 mrg }
3216 1.1 mrg
3217 1.1 mrg free (plugin_name);
3218 1.1 mrg cur = next + 1;
3219 1.1 mrg }
3220 1.1 mrg while (next);
3221 1.1 mrg
3222 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3223 1.1 mrg NUM_DEVICES_OPENMP. */
3224 1.1 mrg struct gomp_device_descr *devices_s
3225 1.1 mrg = malloc (num_devices * sizeof (struct gomp_device_descr));
3226 1.1 mrg if (!devices_s)
3227 1.1 mrg {
3228 1.1 mrg num_devices = 0;
3229 1.1 mrg free (devices);
3230 1.1 mrg devices = NULL;
3231 1.1 mrg }
3232 1.1 mrg num_devices_openmp = 0;
3233 1.1 mrg for (i = 0; i < num_devices; i++)
3234 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3235 1.1 mrg devices_s[num_devices_openmp++] = devices[i];
3236 1.1 mrg int num_devices_after_openmp = num_devices_openmp;
3237 1.1 mrg for (i = 0; i < num_devices; i++)
3238 1.1 mrg if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3239 1.1 mrg devices_s[num_devices_after_openmp++] = devices[i];
3240 1.1 mrg free (devices);
3241 1.1 mrg devices = devices_s;
3242 1.1 mrg
3243 1.1 mrg for (i = 0; i < num_devices; i++)
3244 1.1 mrg {
3245 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
3246 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
3247 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
3248 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3249 1.1 mrg goacc_register (&devices[i]);
3250 1.1 mrg }
3251 1.1.1.2 mrg
3252 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0)
3253 1.1.1.2 mrg gomp_fatal ("atexit failed");
3254 1.1 mrg }
3255 1.1 mrg
3256 1.1 mrg #else /* PLUGIN_SUPPORT */
3257 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
3258 1.1 mrg GOMP_target* routines are just stubs for this case. */
3259 1.1 mrg static void
3260 1.1 mrg gomp_target_init (void)
3261 1.1 mrg {
3262 1.1 mrg }
3263 1.1 mrg #endif /* PLUGIN_SUPPORT */
3264