target.c revision 1.1.1.7 1 1.1.1.7 mrg /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "config.h"
29 1.1 mrg #include "libgomp.h"
30 1.1 mrg #include "oacc-plugin.h"
31 1.1 mrg #include "oacc-int.h"
32 1.1 mrg #include "gomp-constants.h"
33 1.1 mrg #include <limits.h>
34 1.1 mrg #include <stdbool.h>
35 1.1 mrg #include <stdlib.h>
36 1.1 mrg #ifdef HAVE_INTTYPES_H
37 1.1 mrg # include <inttypes.h> /* For PRIu64. */
38 1.1 mrg #endif
39 1.1 mrg #include <string.h>
40 1.1 mrg #include <assert.h>
41 1.1.1.2 mrg #include <errno.h>
42 1.1 mrg
43 1.1 mrg #ifdef PLUGIN_SUPPORT
44 1.1 mrg #include <dlfcn.h>
45 1.1 mrg #include "plugin-suffix.h"
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg static void gomp_target_init (void);
49 1.1 mrg
50 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
51 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 1.1 mrg
53 1.1 mrg /* Mutex for offload image registration. */
54 1.1 mrg static gomp_mutex_t register_lock;
55 1.1 mrg
56 1.1 mrg /* This structure describes an offload image.
57 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
58 1.1 mrg pointer to target data. */
59 1.1 mrg struct offload_image_descr {
60 1.1.1.2 mrg unsigned version;
61 1.1 mrg enum offload_target_type type;
62 1.1.1.2 mrg const void *host_table;
63 1.1.1.2 mrg const void *target_data;
64 1.1 mrg };
65 1.1 mrg
66 1.1 mrg /* Array of descriptors of offload images. */
67 1.1 mrg static struct offload_image_descr *offload_images;
68 1.1 mrg
69 1.1 mrg /* Total number of offload images. */
70 1.1 mrg static int num_offload_images;
71 1.1 mrg
72 1.1 mrg /* Array of descriptors for all available devices. */
73 1.1 mrg static struct gomp_device_descr *devices;
74 1.1 mrg
75 1.1 mrg /* Total number of available devices. */
76 1.1 mrg static int num_devices;
77 1.1 mrg
78 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 1.1 mrg static int num_devices_openmp;
80 1.1 mrg
81 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 1.1 mrg
83 1.1 mrg static void *
84 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
85 1.1 mrg {
86 1.1 mrg void *ret = realloc (old, size);
87 1.1 mrg if (ret == NULL)
88 1.1 mrg {
89 1.1 mrg gomp_mutex_unlock (®ister_lock);
90 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 1.1 mrg }
92 1.1 mrg return ret;
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg attribute_hidden void
96 1.1 mrg gomp_init_targets_once (void)
97 1.1 mrg {
98 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg attribute_hidden int
102 1.1 mrg gomp_get_num_devices (void)
103 1.1 mrg {
104 1.1 mrg gomp_init_targets_once ();
105 1.1 mrg return num_devices_openmp;
106 1.1 mrg }
107 1.1 mrg
108 1.1 mrg static struct gomp_device_descr *
109 1.1 mrg resolve_device (int device_id)
110 1.1 mrg {
111 1.1 mrg if (device_id == GOMP_DEVICE_ICV)
112 1.1 mrg {
113 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
114 1.1 mrg device_id = icv->default_device_var;
115 1.1 mrg }
116 1.1 mrg
117 1.1 mrg if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 1.1 mrg return NULL;
119 1.1 mrg
120 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock);
121 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 1.1.1.2 mrg gomp_init_device (&devices[device_id]);
123 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 1.1.1.2 mrg {
125 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
126 1.1.1.2 mrg return NULL;
127 1.1.1.2 mrg }
128 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
129 1.1.1.2 mrg
130 1.1 mrg return &devices[device_id];
131 1.1 mrg }
132 1.1 mrg
133 1.1 mrg
134 1.1.1.2 mrg static inline splay_tree_key
135 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 1.1.1.2 mrg {
137 1.1.1.2 mrg if (key->host_start != key->host_end)
138 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
139 1.1.1.2 mrg
140 1.1.1.2 mrg key->host_end++;
141 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
142 1.1.1.2 mrg key->host_end--;
143 1.1.1.2 mrg if (n)
144 1.1.1.2 mrg return n;
145 1.1.1.2 mrg key->host_start--;
146 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key);
147 1.1.1.2 mrg key->host_start++;
148 1.1.1.2 mrg if (n)
149 1.1.1.2 mrg return n;
150 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
151 1.1.1.2 mrg }
152 1.1.1.2 mrg
153 1.1.1.2 mrg static inline splay_tree_key
154 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 1.1.1.2 mrg {
156 1.1.1.2 mrg if (key->host_start != key->host_end)
157 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
158 1.1.1.2 mrg
159 1.1.1.2 mrg key->host_end++;
160 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
161 1.1.1.2 mrg key->host_end--;
162 1.1.1.2 mrg return n;
163 1.1.1.2 mrg }
164 1.1.1.2 mrg
165 1.1.1.3 mrg static inline void
166 1.1.1.3 mrg gomp_device_copy (struct gomp_device_descr *devicep,
167 1.1.1.3 mrg bool (*copy_func) (int, void *, const void *, size_t),
168 1.1.1.3 mrg const char *dst, void *dstaddr,
169 1.1.1.3 mrg const char *src, const void *srcaddr,
170 1.1.1.3 mrg size_t size)
171 1.1.1.3 mrg {
172 1.1.1.3 mrg if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 1.1.1.3 mrg {
174 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
175 1.1.1.3 mrg gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 1.1.1.3 mrg src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177 1.1.1.3 mrg }
178 1.1.1.3 mrg }
179 1.1.1.3 mrg
180 1.1.1.6 mrg /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 1.1.1.6 mrg host to device memory transfers. */
182 1.1.1.6 mrg
183 1.1.1.7 mrg struct gomp_coalesce_chunk
184 1.1.1.7 mrg {
185 1.1.1.7 mrg /* The starting and ending point of a coalesced chunk of memory. */
186 1.1.1.7 mrg size_t start, end;
187 1.1.1.7 mrg };
188 1.1.1.7 mrg
189 1.1.1.6 mrg struct gomp_coalesce_buf
190 1.1.1.6 mrg {
191 1.1.1.6 mrg /* Buffer into which gomp_copy_host2dev will memcpy data and from which
192 1.1.1.6 mrg it will be copied to the device. */
193 1.1.1.6 mrg void *buf;
194 1.1.1.6 mrg struct target_mem_desc *tgt;
195 1.1.1.7 mrg /* Array with offsets, chunks[i].start is the starting offset and
196 1.1.1.7 mrg chunks[i].end ending offset relative to tgt->tgt_start device address
197 1.1.1.6 mrg of chunks which are to be copied to buf and later copied to device. */
198 1.1.1.7 mrg struct gomp_coalesce_chunk *chunks;
199 1.1.1.6 mrg /* Number of chunks in chunks array, or -1 if coalesce buffering should not
200 1.1.1.6 mrg be performed. */
201 1.1.1.6 mrg long chunk_cnt;
202 1.1.1.6 mrg /* During construction of chunks array, how many memory regions are within
203 1.1.1.6 mrg the last chunk. If there is just one memory region for a chunk, we copy
204 1.1.1.6 mrg it directly to device rather than going through buf. */
205 1.1.1.6 mrg long use_cnt;
206 1.1.1.6 mrg };
207 1.1.1.6 mrg
208 1.1.1.6 mrg /* Maximum size of memory region considered for coalescing. Larger copies
209 1.1.1.6 mrg are performed directly. */
210 1.1.1.6 mrg #define MAX_COALESCE_BUF_SIZE (32 * 1024)
211 1.1.1.6 mrg
212 1.1.1.6 mrg /* Maximum size of a gap in between regions to consider them being copied
213 1.1.1.6 mrg within the same chunk. All the device offsets considered are within
214 1.1.1.6 mrg newly allocated device memory, so it isn't fatal if we copy some padding
215 1.1.1.6 mrg in between from host to device. The gaps come either from alignment
216 1.1.1.6 mrg padding or from memory regions which are not supposed to be copied from
217 1.1.1.6 mrg host to device (e.g. map(alloc:), map(from:) etc.). */
218 1.1.1.6 mrg #define MAX_COALESCE_BUF_GAP (4 * 1024)
219 1.1.1.6 mrg
220 1.1.1.6 mrg /* Add region with device tgt_start relative offset and length to CBUF. */
221 1.1.1.6 mrg
222 1.1.1.6 mrg static inline void
223 1.1.1.6 mrg gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
224 1.1.1.6 mrg {
225 1.1.1.6 mrg if (len > MAX_COALESCE_BUF_SIZE || len == 0)
226 1.1.1.6 mrg return;
227 1.1.1.6 mrg if (cbuf->chunk_cnt)
228 1.1.1.6 mrg {
229 1.1.1.6 mrg if (cbuf->chunk_cnt < 0)
230 1.1.1.6 mrg return;
231 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
232 1.1.1.6 mrg {
233 1.1.1.6 mrg cbuf->chunk_cnt = -1;
234 1.1.1.6 mrg return;
235 1.1.1.6 mrg }
236 1.1.1.7 mrg if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
237 1.1.1.6 mrg {
238 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
239 1.1.1.6 mrg cbuf->use_cnt++;
240 1.1.1.6 mrg return;
241 1.1.1.6 mrg }
242 1.1.1.6 mrg /* If the last chunk is only used by one mapping, discard it,
243 1.1.1.6 mrg as it will be one host to device copy anyway and
244 1.1.1.6 mrg memcpying it around will only waste cycles. */
245 1.1.1.6 mrg if (cbuf->use_cnt == 1)
246 1.1.1.6 mrg cbuf->chunk_cnt--;
247 1.1.1.6 mrg }
248 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].start = start;
249 1.1.1.7 mrg cbuf->chunks[cbuf->chunk_cnt].end = start + len;
250 1.1.1.6 mrg cbuf->chunk_cnt++;
251 1.1.1.6 mrg cbuf->use_cnt = 1;
252 1.1.1.6 mrg }
253 1.1.1.6 mrg
254 1.1.1.6 mrg /* Return true for mapping kinds which need to copy data from the
255 1.1.1.6 mrg host to device for regions that weren't previously mapped. */
256 1.1.1.6 mrg
257 1.1.1.6 mrg static inline bool
258 1.1.1.6 mrg gomp_to_device_kind_p (int kind)
259 1.1.1.6 mrg {
260 1.1.1.6 mrg switch (kind)
261 1.1.1.6 mrg {
262 1.1.1.6 mrg case GOMP_MAP_ALLOC:
263 1.1.1.6 mrg case GOMP_MAP_FROM:
264 1.1.1.6 mrg case GOMP_MAP_FORCE_ALLOC:
265 1.1.1.6 mrg case GOMP_MAP_ALWAYS_FROM:
266 1.1.1.6 mrg return false;
267 1.1.1.6 mrg default:
268 1.1.1.6 mrg return true;
269 1.1.1.6 mrg }
270 1.1.1.6 mrg }
271 1.1.1.6 mrg
272 1.1.1.3 mrg static void
273 1.1.1.3 mrg gomp_copy_host2dev (struct gomp_device_descr *devicep,
274 1.1.1.6 mrg void *d, const void *h, size_t sz,
275 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
276 1.1.1.3 mrg {
277 1.1.1.6 mrg if (cbuf)
278 1.1.1.6 mrg {
279 1.1.1.6 mrg uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
280 1.1.1.7 mrg if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
281 1.1.1.6 mrg {
282 1.1.1.6 mrg long first = 0;
283 1.1.1.6 mrg long last = cbuf->chunk_cnt - 1;
284 1.1.1.6 mrg while (first <= last)
285 1.1.1.6 mrg {
286 1.1.1.6 mrg long middle = (first + last) >> 1;
287 1.1.1.7 mrg if (cbuf->chunks[middle].end <= doff)
288 1.1.1.6 mrg first = middle + 1;
289 1.1.1.7 mrg else if (cbuf->chunks[middle].start <= doff)
290 1.1.1.6 mrg {
291 1.1.1.7 mrg if (doff + sz > cbuf->chunks[middle].end)
292 1.1.1.6 mrg gomp_fatal ("internal libgomp cbuf error");
293 1.1.1.7 mrg memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
294 1.1.1.6 mrg h, sz);
295 1.1.1.6 mrg return;
296 1.1.1.6 mrg }
297 1.1.1.6 mrg else
298 1.1.1.6 mrg last = middle - 1;
299 1.1.1.6 mrg }
300 1.1.1.6 mrg }
301 1.1.1.6 mrg }
302 1.1.1.3 mrg gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
303 1.1.1.3 mrg }
304 1.1.1.3 mrg
305 1.1.1.3 mrg static void
306 1.1.1.3 mrg gomp_copy_dev2host (struct gomp_device_descr *devicep,
307 1.1.1.3 mrg void *h, const void *d, size_t sz)
308 1.1.1.3 mrg {
309 1.1.1.3 mrg gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
310 1.1.1.3 mrg }
311 1.1.1.3 mrg
312 1.1.1.3 mrg static void
313 1.1.1.3 mrg gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
314 1.1.1.3 mrg {
315 1.1.1.3 mrg if (!devicep->free_func (devicep->target_id, devptr))
316 1.1.1.3 mrg {
317 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
318 1.1.1.3 mrg gomp_fatal ("error in freeing device memory block at %p", devptr);
319 1.1.1.3 mrg }
320 1.1.1.3 mrg }
321 1.1.1.3 mrg
322 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
323 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn.
324 1.1 mrg Helper function of gomp_map_vars. */
325 1.1 mrg
326 1.1 mrg static inline void
327 1.1 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
328 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var,
329 1.1.1.6 mrg unsigned char kind, struct gomp_coalesce_buf *cbuf)
330 1.1 mrg {
331 1.1.1.2 mrg tgt_var->key = oldn;
332 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
333 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
334 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start;
335 1.1.1.2 mrg tgt_var->length = newn->host_end - newn->host_start;
336 1.1.1.2 mrg
337 1.1 mrg if ((kind & GOMP_MAP_FLAG_FORCE)
338 1.1 mrg || oldn->host_start > newn->host_start
339 1.1 mrg || oldn->host_end < newn->host_end)
340 1.1 mrg {
341 1.1 mrg gomp_mutex_unlock (&devicep->lock);
342 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
343 1.1 mrg "[%p..%p) is already mapped",
344 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
345 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
346 1.1 mrg }
347 1.1.1.2 mrg
348 1.1.1.2 mrg if (GOMP_MAP_ALWAYS_TO_P (kind))
349 1.1.1.3 mrg gomp_copy_host2dev (devicep,
350 1.1.1.3 mrg (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
351 1.1.1.3 mrg + newn->host_start - oldn->host_start),
352 1.1.1.3 mrg (void *) newn->host_start,
353 1.1.1.6 mrg newn->host_end - newn->host_start, cbuf);
354 1.1.1.3 mrg
355 1.1.1.2 mrg if (oldn->refcount != REFCOUNT_INFINITY)
356 1.1.1.2 mrg oldn->refcount++;
357 1.1 mrg }
358 1.1 mrg
359 1.1 mrg static int
360 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx)
361 1.1.1.2 mrg {
362 1.1.1.2 mrg return short_mapkind ? ((unsigned short *) kinds)[idx]
363 1.1.1.2 mrg : ((unsigned char *) kinds)[idx];
364 1.1.1.2 mrg }
365 1.1.1.2 mrg
366 1.1.1.2 mrg static void
367 1.1.1.2 mrg gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
368 1.1.1.6 mrg uintptr_t target_offset, uintptr_t bias,
369 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
370 1.1.1.2 mrg {
371 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
372 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
373 1.1.1.2 mrg struct splay_tree_key_s cur_node;
374 1.1.1.2 mrg
375 1.1.1.2 mrg cur_node.host_start = host_ptr;
376 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL)
377 1.1.1.2 mrg {
378 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL;
379 1.1.1.3 mrg gomp_copy_host2dev (devicep,
380 1.1.1.3 mrg (void *) (tgt->tgt_start + target_offset),
381 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
382 1.1.1.6 mrg sizeof (void *), cbuf);
383 1.1.1.2 mrg return;
384 1.1.1.2 mrg }
385 1.1.1.2 mrg /* Add bias to the pointer value. */
386 1.1.1.2 mrg cur_node.host_start += bias;
387 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
388 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
389 1.1.1.2 mrg if (n == NULL)
390 1.1.1.2 mrg {
391 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
392 1.1.1.2 mrg gomp_fatal ("Pointer target of array section wasn't mapped");
393 1.1.1.2 mrg }
394 1.1.1.2 mrg cur_node.host_start -= n->host_start;
395 1.1.1.2 mrg cur_node.tgt_offset
396 1.1.1.2 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
397 1.1.1.2 mrg /* At this point tgt_offset is target address of the
398 1.1.1.2 mrg array section. Now subtract bias to get what we want
399 1.1.1.2 mrg to initialize the pointer with. */
400 1.1.1.2 mrg cur_node.tgt_offset -= bias;
401 1.1.1.3 mrg gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
402 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
403 1.1.1.2 mrg }
404 1.1.1.2 mrg
405 1.1.1.2 mrg static void
406 1.1.1.2 mrg gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
407 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs,
408 1.1.1.6 mrg size_t *sizes, void *kinds,
409 1.1.1.6 mrg struct gomp_coalesce_buf *cbuf)
410 1.1 mrg {
411 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
412 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
413 1.1.1.2 mrg struct splay_tree_key_s cur_node;
414 1.1.1.2 mrg int kind;
415 1.1.1.2 mrg const bool short_mapkind = true;
416 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
417 1.1.1.2 mrg
418 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
419 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
420 1.1.1.2 mrg splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
421 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i);
422 1.1.1.2 mrg if (n2
423 1.1.1.2 mrg && n2->tgt == n->tgt
424 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
425 1.1.1.2 mrg {
426 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node,
427 1.1.1.6 mrg &tgt->list[i], kind & typemask, cbuf);
428 1.1.1.2 mrg return;
429 1.1.1.2 mrg }
430 1.1.1.2 mrg if (sizes[i] == 0)
431 1.1.1.2 mrg {
432 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
433 1.1.1.2 mrg {
434 1.1.1.2 mrg cur_node.host_start--;
435 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
436 1.1.1.2 mrg cur_node.host_start++;
437 1.1.1.2 mrg if (n2
438 1.1.1.2 mrg && n2->tgt == n->tgt
439 1.1.1.2 mrg && n2->host_start - n->host_start
440 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset)
441 1.1.1.2 mrg {
442 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
443 1.1.1.6 mrg kind & typemask, cbuf);
444 1.1.1.2 mrg return;
445 1.1.1.2 mrg }
446 1.1.1.2 mrg }
447 1.1.1.2 mrg cur_node.host_end++;
448 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
449 1.1.1.2 mrg cur_node.host_end--;
450 1.1.1.2 mrg if (n2
451 1.1.1.2 mrg && n2->tgt == n->tgt
452 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
453 1.1.1.2 mrg {
454 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
455 1.1.1.6 mrg kind & typemask, cbuf);
456 1.1.1.2 mrg return;
457 1.1.1.2 mrg }
458 1.1.1.2 mrg }
459 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
460 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when "
461 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped "
462 1.1.1.2 mrg "together with it", (void *) cur_node.host_start,
463 1.1.1.2 mrg (void *) cur_node.host_end);
464 1.1.1.2 mrg }
465 1.1.1.2 mrg
466 1.1.1.2 mrg static inline uintptr_t
467 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
468 1.1.1.2 mrg {
469 1.1.1.2 mrg if (tgt->list[i].key != NULL)
470 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start
471 1.1.1.2 mrg + tgt->list[i].key->tgt_offset
472 1.1.1.2 mrg + tgt->list[i].offset;
473 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 0)
474 1.1.1.2 mrg return (uintptr_t) hostaddrs[i];
475 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 1)
476 1.1.1.2 mrg return 0;
477 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 2)
478 1.1.1.2 mrg return tgt->list[i + 1].key->tgt->tgt_start
479 1.1.1.2 mrg + tgt->list[i + 1].key->tgt_offset
480 1.1.1.2 mrg + tgt->list[i + 1].offset
481 1.1.1.2 mrg + (uintptr_t) hostaddrs[i]
482 1.1.1.2 mrg - (uintptr_t) hostaddrs[i + 1];
483 1.1.1.2 mrg return tgt->tgt_start + tgt->list[i].offset;
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg attribute_hidden struct target_mem_desc *
487 1.1 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
488 1.1 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
489 1.1.1.2 mrg bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
490 1.1 mrg {
491 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
492 1.1.1.2 mrg bool has_firstprivate = false;
493 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3;
494 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
495 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
496 1.1 mrg struct splay_tree_key_s cur_node;
497 1.1 mrg struct target_mem_desc *tgt
498 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
499 1.1 mrg tgt->list_count = mapnum;
500 1.1.1.2 mrg tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
501 1.1 mrg tgt->device_descr = devicep;
502 1.1.1.6 mrg struct gomp_coalesce_buf cbuf, *cbufp = NULL;
503 1.1 mrg
504 1.1 mrg if (mapnum == 0)
505 1.1.1.2 mrg {
506 1.1.1.2 mrg tgt->tgt_start = 0;
507 1.1.1.2 mrg tgt->tgt_end = 0;
508 1.1.1.2 mrg return tgt;
509 1.1.1.2 mrg }
510 1.1 mrg
511 1.1 mrg tgt_align = sizeof (void *);
512 1.1 mrg tgt_size = 0;
513 1.1.1.6 mrg cbuf.chunks = NULL;
514 1.1.1.6 mrg cbuf.chunk_cnt = -1;
515 1.1.1.6 mrg cbuf.use_cnt = 0;
516 1.1.1.6 mrg cbuf.buf = NULL;
517 1.1.1.6 mrg if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
518 1.1.1.6 mrg {
519 1.1.1.7 mrg size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
520 1.1.1.7 mrg cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
521 1.1.1.6 mrg cbuf.chunk_cnt = 0;
522 1.1.1.6 mrg }
523 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
524 1.1 mrg {
525 1.1 mrg size_t align = 4 * sizeof (void *);
526 1.1 mrg tgt_align = align;
527 1.1 mrg tgt_size = mapnum * sizeof (void *);
528 1.1.1.6 mrg cbuf.chunk_cnt = 1;
529 1.1.1.6 mrg cbuf.use_cnt = 1 + (mapnum > 1);
530 1.1.1.7 mrg cbuf.chunks[0].start = 0;
531 1.1.1.7 mrg cbuf.chunks[0].end = tgt_size;
532 1.1 mrg }
533 1.1 mrg
534 1.1 mrg gomp_mutex_lock (&devicep->lock);
535 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
536 1.1.1.2 mrg {
537 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
538 1.1.1.2 mrg free (tgt);
539 1.1.1.2 mrg return NULL;
540 1.1.1.2 mrg }
541 1.1 mrg
542 1.1 mrg for (i = 0; i < mapnum; i++)
543 1.1 mrg {
544 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
545 1.1.1.2 mrg if (hostaddrs[i] == NULL
546 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
547 1.1.1.2 mrg {
548 1.1.1.2 mrg tgt->list[i].key = NULL;
549 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 0;
550 1.1.1.2 mrg continue;
551 1.1.1.2 mrg }
552 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
553 1.1.1.2 mrg {
554 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
555 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
556 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
557 1.1.1.2 mrg if (n == NULL)
558 1.1.1.2 mrg {
559 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
560 1.1.1.2 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
561 1.1.1.2 mrg }
562 1.1.1.2 mrg cur_node.host_start -= n->host_start;
563 1.1.1.2 mrg hostaddrs[i]
564 1.1.1.2 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
565 1.1.1.2 mrg + cur_node.host_start);
566 1.1.1.2 mrg tgt->list[i].key = NULL;
567 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 0;
568 1.1.1.2 mrg continue;
569 1.1.1.2 mrg }
570 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT)
571 1.1.1.2 mrg {
572 1.1.1.2 mrg size_t first = i + 1;
573 1.1.1.2 mrg size_t last = i + sizes[i];
574 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
575 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
576 1.1.1.2 mrg + sizes[last];
577 1.1.1.2 mrg tgt->list[i].key = NULL;
578 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 2;
579 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
580 1.1.1.2 mrg if (n == NULL)
581 1.1.1.2 mrg {
582 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
583 1.1.1.2 mrg if (tgt_align < align)
584 1.1.1.2 mrg tgt_align = align;
585 1.1.1.6 mrg tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
586 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
587 1.1.1.6 mrg tgt_size += cur_node.host_end - cur_node.host_start;
588 1.1.1.2 mrg not_found_cnt += last - i;
589 1.1.1.2 mrg for (i = first; i <= last; i++)
590 1.1.1.6 mrg {
591 1.1.1.6 mrg tgt->list[i].key = NULL;
592 1.1.1.6 mrg if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
593 1.1.1.6 mrg & typemask))
594 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf,
595 1.1.1.6 mrg tgt_size - cur_node.host_end
596 1.1.1.6 mrg + (uintptr_t) hostaddrs[i],
597 1.1.1.6 mrg sizes[i]);
598 1.1.1.6 mrg }
599 1.1.1.2 mrg i--;
600 1.1.1.2 mrg continue;
601 1.1.1.2 mrg }
602 1.1.1.2 mrg for (i = first; i <= last; i++)
603 1.1.1.2 mrg gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
604 1.1.1.6 mrg sizes, kinds, NULL);
605 1.1.1.2 mrg i--;
606 1.1.1.2 mrg continue;
607 1.1.1.2 mrg }
608 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
609 1.1 mrg {
610 1.1.1.2 mrg tgt->list[i].key = NULL;
611 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 1;
612 1.1.1.2 mrg has_firstprivate = true;
613 1.1 mrg continue;
614 1.1 mrg }
615 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
616 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
617 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
618 1.1 mrg else
619 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
620 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
621 1.1.1.2 mrg {
622 1.1.1.2 mrg tgt->list[i].key = NULL;
623 1.1.1.2 mrg
624 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
625 1.1.1.2 mrg if (tgt_align < align)
626 1.1.1.2 mrg tgt_align = align;
627 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
628 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
629 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
630 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start;
631 1.1.1.2 mrg has_firstprivate = true;
632 1.1.1.2 mrg continue;
633 1.1.1.2 mrg }
634 1.1.1.2 mrg splay_tree_key n;
635 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
636 1.1 mrg {
637 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node);
638 1.1.1.2 mrg if (!n)
639 1.1.1.2 mrg {
640 1.1.1.2 mrg tgt->list[i].key = NULL;
641 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 1;
642 1.1.1.2 mrg continue;
643 1.1.1.2 mrg }
644 1.1 mrg }
645 1.1 mrg else
646 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
647 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
648 1.1.1.2 mrg gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
649 1.1.1.6 mrg kind & typemask, NULL);
650 1.1.1.2 mrg else
651 1.1 mrg {
652 1.1.1.2 mrg tgt->list[i].key = NULL;
653 1.1 mrg
654 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
655 1.1 mrg not_found_cnt++;
656 1.1 mrg if (tgt_align < align)
657 1.1 mrg tgt_align = align;
658 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
659 1.1.1.6 mrg if (gomp_to_device_kind_p (kind & typemask))
660 1.1.1.6 mrg gomp_coalesce_buf_add (&cbuf, tgt_size,
661 1.1.1.6 mrg cur_node.host_end - cur_node.host_start);
662 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
663 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
664 1.1 mrg {
665 1.1 mrg size_t j;
666 1.1 mrg for (j = i + 1; j < mapnum; j++)
667 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
668 1.1 mrg & typemask))
669 1.1 mrg break;
670 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
671 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
672 1.1 mrg > cur_node.host_end))
673 1.1 mrg break;
674 1.1 mrg else
675 1.1 mrg {
676 1.1.1.2 mrg tgt->list[j].key = NULL;
677 1.1 mrg i++;
678 1.1 mrg }
679 1.1 mrg }
680 1.1 mrg }
681 1.1 mrg }
682 1.1 mrg
683 1.1 mrg if (devaddrs)
684 1.1 mrg {
685 1.1 mrg if (mapnum != 1)
686 1.1 mrg {
687 1.1 mrg gomp_mutex_unlock (&devicep->lock);
688 1.1 mrg gomp_fatal ("unexpected aggregation");
689 1.1 mrg }
690 1.1 mrg tgt->to_free = devaddrs[0];
691 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
692 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
693 1.1 mrg }
694 1.1.1.2 mrg else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
695 1.1 mrg {
696 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
697 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
698 1.1 mrg memory. */
699 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
700 1.1 mrg tgt_size + tgt_align - 1);
701 1.1.1.3 mrg if (!tgt->to_free)
702 1.1.1.3 mrg {
703 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
704 1.1.1.3 mrg gomp_fatal ("device memory allocation fail");
705 1.1.1.3 mrg }
706 1.1.1.3 mrg
707 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
708 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
709 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
710 1.1.1.6 mrg
711 1.1.1.6 mrg if (cbuf.use_cnt == 1)
712 1.1.1.6 mrg cbuf.chunk_cnt--;
713 1.1.1.6 mrg if (cbuf.chunk_cnt > 0)
714 1.1.1.6 mrg {
715 1.1.1.6 mrg cbuf.buf
716 1.1.1.7 mrg = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
717 1.1.1.6 mrg if (cbuf.buf)
718 1.1.1.6 mrg {
719 1.1.1.6 mrg cbuf.tgt = tgt;
720 1.1.1.6 mrg cbufp = &cbuf;
721 1.1.1.6 mrg }
722 1.1.1.6 mrg }
723 1.1 mrg }
724 1.1 mrg else
725 1.1 mrg {
726 1.1 mrg tgt->to_free = NULL;
727 1.1 mrg tgt->tgt_start = 0;
728 1.1 mrg tgt->tgt_end = 0;
729 1.1 mrg }
730 1.1 mrg
731 1.1 mrg tgt_size = 0;
732 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
733 1.1 mrg tgt_size = mapnum * sizeof (void *);
734 1.1 mrg
735 1.1 mrg tgt->array = NULL;
736 1.1.1.2 mrg if (not_found_cnt || has_firstprivate)
737 1.1 mrg {
738 1.1.1.2 mrg if (not_found_cnt)
739 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
740 1.1 mrg splay_tree_node array = tgt->array;
741 1.1.1.2 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
742 1.1.1.2 mrg uintptr_t field_tgt_base = 0;
743 1.1 mrg
744 1.1 mrg for (i = 0; i < mapnum; i++)
745 1.1.1.2 mrg if (tgt->list[i].key == NULL)
746 1.1 mrg {
747 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
748 1.1 mrg if (hostaddrs[i] == NULL)
749 1.1 mrg continue;
750 1.1.1.2 mrg switch (kind & typemask)
751 1.1.1.2 mrg {
752 1.1.1.2 mrg size_t align, len, first, last;
753 1.1.1.2 mrg splay_tree_key n;
754 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE:
755 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift);
756 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
757 1.1.1.2 mrg tgt->list[i].offset = tgt_size;
758 1.1.1.2 mrg len = sizes[i];
759 1.1.1.3 mrg gomp_copy_host2dev (devicep,
760 1.1.1.3 mrg (void *) (tgt->tgt_start + tgt_size),
761 1.1.1.6 mrg (void *) hostaddrs[i], len, cbufp);
762 1.1.1.2 mrg tgt_size += len;
763 1.1.1.2 mrg continue;
764 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
765 1.1.1.2 mrg case GOMP_MAP_USE_DEVICE_PTR:
766 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
767 1.1.1.2 mrg continue;
768 1.1.1.2 mrg case GOMP_MAP_STRUCT:
769 1.1.1.2 mrg first = i + 1;
770 1.1.1.2 mrg last = i + sizes[i];
771 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
772 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
773 1.1.1.2 mrg + sizes[last];
774 1.1.1.2 mrg if (tgt->list[first].key != NULL)
775 1.1.1.2 mrg continue;
776 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
777 1.1.1.2 mrg if (n == NULL)
778 1.1.1.2 mrg {
779 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
780 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
781 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
782 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
783 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first]
784 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
785 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first];
786 1.1.1.2 mrg field_tgt_offset = tgt_size;
787 1.1.1.2 mrg field_tgt_clear = last;
788 1.1.1.2 mrg tgt_size += cur_node.host_end
789 1.1.1.2 mrg - (uintptr_t) hostaddrs[first];
790 1.1.1.2 mrg continue;
791 1.1.1.2 mrg }
792 1.1.1.2 mrg for (i = first; i <= last; i++)
793 1.1.1.2 mrg gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
794 1.1.1.6 mrg sizes, kinds, cbufp);
795 1.1.1.2 mrg i--;
796 1.1.1.2 mrg continue;
797 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER:
798 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
799 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
800 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
801 1.1.1.2 mrg if (n == NULL
802 1.1.1.2 mrg || n->host_start > cur_node.host_start
803 1.1.1.2 mrg || n->host_end < cur_node.host_end)
804 1.1.1.2 mrg {
805 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
806 1.1.1.2 mrg gomp_fatal ("always pointer not mapped");
807 1.1.1.2 mrg }
808 1.1.1.2 mrg if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
809 1.1.1.2 mrg != GOMP_MAP_ALWAYS_POINTER)
810 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
811 1.1.1.2 mrg if (cur_node.tgt_offset)
812 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i];
813 1.1.1.3 mrg gomp_copy_host2dev (devicep,
814 1.1.1.3 mrg (void *) (n->tgt->tgt_start
815 1.1.1.3 mrg + n->tgt_offset
816 1.1.1.3 mrg + cur_node.host_start
817 1.1.1.3 mrg - n->host_start),
818 1.1.1.3 mrg (void *) &cur_node.tgt_offset,
819 1.1.1.6 mrg sizeof (void *), cbufp);
820 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
821 1.1.1.2 mrg + cur_node.host_start - n->host_start;
822 1.1.1.2 mrg continue;
823 1.1.1.2 mrg default:
824 1.1.1.2 mrg break;
825 1.1.1.2 mrg }
826 1.1 mrg splay_tree_key k = &array->key;
827 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
828 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
829 1.1 mrg k->host_end = k->host_start + sizes[i];
830 1.1 mrg else
831 1.1 mrg k->host_end = k->host_start + sizeof (void *);
832 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
833 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
834 1.1.1.2 mrg gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
835 1.1.1.6 mrg kind & typemask, cbufp);
836 1.1 mrg else
837 1.1 mrg {
838 1.1.1.2 mrg k->link_key = NULL;
839 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK)
840 1.1.1.2 mrg {
841 1.1.1.2 mrg /* Replace target address of the pointer with target address
842 1.1.1.2 mrg of mapped object in the splay tree. */
843 1.1.1.2 mrg splay_tree_remove (mem_map, n);
844 1.1.1.2 mrg k->link_key = n;
845 1.1.1.2 mrg }
846 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
847 1.1.1.2 mrg tgt->list[i].key = k;
848 1.1 mrg k->tgt = tgt;
849 1.1.1.2 mrg if (field_tgt_clear != ~(size_t) 0)
850 1.1.1.2 mrg {
851 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base
852 1.1.1.2 mrg + field_tgt_offset;
853 1.1.1.2 mrg if (i == field_tgt_clear)
854 1.1.1.2 mrg field_tgt_clear = ~(size_t) 0;
855 1.1.1.2 mrg }
856 1.1.1.2 mrg else
857 1.1.1.2 mrg {
858 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
859 1.1.1.2 mrg k->tgt_offset = tgt_size;
860 1.1.1.2 mrg tgt_size += k->host_end - k->host_start;
861 1.1.1.2 mrg }
862 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
863 1.1.1.2 mrg tgt->list[i].always_copy_from
864 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
865 1.1.1.2 mrg tgt->list[i].offset = 0;
866 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start;
867 1.1 mrg k->refcount = 1;
868 1.1.1.7 mrg k->dynamic_refcount = 0;
869 1.1 mrg tgt->refcount++;
870 1.1 mrg array->left = NULL;
871 1.1 mrg array->right = NULL;
872 1.1 mrg splay_tree_insert (mem_map, array);
873 1.1 mrg switch (kind & typemask)
874 1.1 mrg {
875 1.1 mrg case GOMP_MAP_ALLOC:
876 1.1 mrg case GOMP_MAP_FROM:
877 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
878 1.1 mrg case GOMP_MAP_FORCE_FROM:
879 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
880 1.1 mrg break;
881 1.1 mrg case GOMP_MAP_TO:
882 1.1 mrg case GOMP_MAP_TOFROM:
883 1.1 mrg case GOMP_MAP_FORCE_TO:
884 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
885 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO:
886 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM:
887 1.1.1.3 mrg gomp_copy_host2dev (devicep,
888 1.1.1.3 mrg (void *) (tgt->tgt_start
889 1.1.1.3 mrg + k->tgt_offset),
890 1.1.1.3 mrg (void *) k->host_start,
891 1.1.1.6 mrg k->host_end - k->host_start, cbufp);
892 1.1 mrg break;
893 1.1 mrg case GOMP_MAP_POINTER:
894 1.1.1.2 mrg gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
895 1.1.1.6 mrg k->tgt_offset, sizes[i], cbufp);
896 1.1 mrg break;
897 1.1 mrg case GOMP_MAP_TO_PSET:
898 1.1.1.3 mrg gomp_copy_host2dev (devicep,
899 1.1.1.3 mrg (void *) (tgt->tgt_start
900 1.1.1.3 mrg + k->tgt_offset),
901 1.1.1.3 mrg (void *) k->host_start,
902 1.1.1.6 mrg k->host_end - k->host_start, cbufp);
903 1.1 mrg
904 1.1 mrg for (j = i + 1; j < mapnum; j++)
905 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
906 1.1.1.2 mrg j)
907 1.1 mrg & typemask))
908 1.1 mrg break;
909 1.1 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
910 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
911 1.1 mrg > k->host_end))
912 1.1 mrg break;
913 1.1 mrg else
914 1.1 mrg {
915 1.1.1.2 mrg tgt->list[j].key = k;
916 1.1.1.2 mrg tgt->list[j].copy_from = false;
917 1.1.1.2 mrg tgt->list[j].always_copy_from = false;
918 1.1.1.2 mrg if (k->refcount != REFCOUNT_INFINITY)
919 1.1.1.2 mrg k->refcount++;
920 1.1.1.2 mrg gomp_map_pointer (tgt,
921 1.1.1.2 mrg (uintptr_t) *(void **) hostaddrs[j],
922 1.1.1.2 mrg k->tgt_offset
923 1.1.1.2 mrg + ((uintptr_t) hostaddrs[j]
924 1.1.1.2 mrg - k->host_start),
925 1.1.1.6 mrg sizes[j], cbufp);
926 1.1 mrg i++;
927 1.1 mrg }
928 1.1 mrg break;
929 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
930 1.1 mrg {
931 1.1 mrg /* We already looked up the memory region above and it
932 1.1 mrg was missing. */
933 1.1 mrg size_t size = k->host_end - k->host_start;
934 1.1 mrg gomp_mutex_unlock (&devicep->lock);
935 1.1 mrg #ifdef HAVE_INTTYPES_H
936 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
937 1.1 mrg "%"PRIu64" (0x%"PRIx64"))",
938 1.1 mrg (void *) k->host_start,
939 1.1 mrg (uint64_t) size, (uint64_t) size);
940 1.1 mrg #else
941 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
942 1.1 mrg "%lu (0x%lx))", (void *) k->host_start,
943 1.1 mrg (unsigned long) size, (unsigned long) size);
944 1.1 mrg #endif
945 1.1 mrg }
946 1.1 mrg break;
947 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
948 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
949 1.1.1.3 mrg gomp_copy_host2dev (devicep,
950 1.1.1.3 mrg (void *) (tgt->tgt_start
951 1.1.1.3 mrg + k->tgt_offset),
952 1.1.1.3 mrg (void *) k->host_start,
953 1.1.1.6 mrg sizeof (void *), cbufp);
954 1.1 mrg break;
955 1.1 mrg default:
956 1.1 mrg gomp_mutex_unlock (&devicep->lock);
957 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
958 1.1 mrg kind);
959 1.1 mrg }
960 1.1.1.2 mrg
961 1.1.1.2 mrg if (k->link_key)
962 1.1.1.2 mrg {
963 1.1.1.2 mrg /* Set link pointer on target to the device address of the
964 1.1.1.2 mrg mapped object. */
965 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
966 1.1.1.7 mrg /* We intentionally do not use coalescing here, as it's not
967 1.1.1.7 mrg data allocated by the current call to this function. */
968 1.1.1.7 mrg gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
969 1.1.1.7 mrg &tgt_addr, sizeof (void *), NULL);
970 1.1.1.2 mrg }
971 1.1 mrg array++;
972 1.1 mrg }
973 1.1 mrg }
974 1.1 mrg }
975 1.1 mrg
976 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
977 1.1 mrg {
978 1.1 mrg for (i = 0; i < mapnum; i++)
979 1.1 mrg {
980 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
981 1.1.1.3 mrg gomp_copy_host2dev (devicep,
982 1.1.1.3 mrg (void *) (tgt->tgt_start + i * sizeof (void *)),
983 1.1.1.6 mrg (void *) &cur_node.tgt_offset, sizeof (void *),
984 1.1.1.6 mrg cbufp);
985 1.1 mrg }
986 1.1 mrg }
987 1.1 mrg
988 1.1.1.6 mrg if (cbufp)
989 1.1.1.6 mrg {
990 1.1.1.6 mrg long c = 0;
991 1.1.1.6 mrg for (c = 0; c < cbuf.chunk_cnt; ++c)
992 1.1.1.7 mrg gomp_copy_host2dev (devicep,
993 1.1.1.7 mrg (void *) (tgt->tgt_start + cbuf.chunks[c].start),
994 1.1.1.7 mrg (char *) cbuf.buf + (cbuf.chunks[c].start
995 1.1.1.7 mrg - cbuf.chunks[0].start),
996 1.1.1.7 mrg cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
997 1.1.1.6 mrg free (cbuf.buf);
998 1.1.1.7 mrg cbuf.buf = NULL;
999 1.1.1.7 mrg cbufp = NULL;
1000 1.1.1.6 mrg }
1001 1.1.1.6 mrg
1002 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped,
1003 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1004 1.1.1.2 mrg gomp_exit_data. */
1005 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1006 1.1.1.2 mrg {
1007 1.1.1.2 mrg free (tgt);
1008 1.1.1.2 mrg tgt = NULL;
1009 1.1.1.2 mrg }
1010 1.1.1.2 mrg
1011 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1012 1.1 mrg return tgt;
1013 1.1 mrg }
1014 1.1 mrg
1015 1.1 mrg static void
1016 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
1017 1.1 mrg {
1018 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1019 1.1 mrg if (tgt->tgt_end)
1020 1.1.1.3 mrg gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1021 1.1 mrg
1022 1.1 mrg free (tgt->array);
1023 1.1 mrg free (tgt);
1024 1.1 mrg }
1025 1.1 mrg
1026 1.1.1.7 mrg attribute_hidden bool
1027 1.1.1.7 mrg gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1028 1.1.1.7 mrg {
1029 1.1.1.7 mrg bool is_tgt_unmapped = false;
1030 1.1.1.7 mrg splay_tree_remove (&devicep->mem_map, k);
1031 1.1.1.7 mrg if (k->link_key)
1032 1.1.1.7 mrg splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1033 1.1.1.7 mrg if (k->tgt->refcount > 1)
1034 1.1.1.7 mrg k->tgt->refcount--;
1035 1.1.1.7 mrg else
1036 1.1.1.7 mrg {
1037 1.1.1.7 mrg is_tgt_unmapped = true;
1038 1.1.1.7 mrg gomp_unmap_tgt (k->tgt);
1039 1.1.1.7 mrg }
1040 1.1.1.7 mrg return is_tgt_unmapped;
1041 1.1.1.7 mrg }
1042 1.1.1.7 mrg
1043 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1044 1.1 mrg variables back from device to host: if it is false, it is assumed that this
1045 1.1.1.3 mrg has been done already. */
1046 1.1 mrg
1047 1.1 mrg attribute_hidden void
1048 1.1 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1049 1.1 mrg {
1050 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
1051 1.1 mrg
1052 1.1 mrg if (tgt->list_count == 0)
1053 1.1 mrg {
1054 1.1 mrg free (tgt);
1055 1.1 mrg return;
1056 1.1 mrg }
1057 1.1 mrg
1058 1.1 mrg gomp_mutex_lock (&devicep->lock);
1059 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1060 1.1.1.2 mrg {
1061 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1062 1.1.1.2 mrg free (tgt->array);
1063 1.1.1.2 mrg free (tgt);
1064 1.1.1.2 mrg return;
1065 1.1.1.2 mrg }
1066 1.1 mrg
1067 1.1 mrg size_t i;
1068 1.1 mrg for (i = 0; i < tgt->list_count; i++)
1069 1.1.1.2 mrg {
1070 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key;
1071 1.1.1.2 mrg if (k == NULL)
1072 1.1.1.2 mrg continue;
1073 1.1.1.2 mrg
1074 1.1.1.2 mrg bool do_unmap = false;
1075 1.1.1.2 mrg if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1076 1.1.1.2 mrg k->refcount--;
1077 1.1.1.2 mrg else if (k->refcount == 1)
1078 1.1.1.2 mrg {
1079 1.1.1.3 mrg k->refcount--;
1080 1.1.1.3 mrg do_unmap = true;
1081 1.1.1.2 mrg }
1082 1.1.1.2 mrg
1083 1.1.1.2 mrg if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1084 1.1.1.2 mrg || tgt->list[i].always_copy_from)
1085 1.1.1.3 mrg gomp_copy_dev2host (devicep,
1086 1.1.1.3 mrg (void *) (k->host_start + tgt->list[i].offset),
1087 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
1088 1.1.1.3 mrg + tgt->list[i].offset),
1089 1.1.1.3 mrg tgt->list[i].length);
1090 1.1.1.2 mrg if (do_unmap)
1091 1.1.1.7 mrg gomp_remove_var (devicep, k);
1092 1.1.1.2 mrg }
1093 1.1 mrg
1094 1.1 mrg if (tgt->refcount > 1)
1095 1.1 mrg tgt->refcount--;
1096 1.1 mrg else
1097 1.1 mrg gomp_unmap_tgt (tgt);
1098 1.1 mrg
1099 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1100 1.1 mrg }
1101 1.1 mrg
1102 1.1 mrg static void
1103 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1104 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind)
1105 1.1 mrg {
1106 1.1 mrg size_t i;
1107 1.1 mrg struct splay_tree_key_s cur_node;
1108 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
1109 1.1 mrg
1110 1.1 mrg if (!devicep)
1111 1.1 mrg return;
1112 1.1 mrg
1113 1.1 mrg if (mapnum == 0)
1114 1.1 mrg return;
1115 1.1 mrg
1116 1.1 mrg gomp_mutex_lock (&devicep->lock);
1117 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1118 1.1.1.2 mrg {
1119 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1120 1.1.1.2 mrg return;
1121 1.1.1.2 mrg }
1122 1.1.1.2 mrg
1123 1.1 mrg for (i = 0; i < mapnum; i++)
1124 1.1 mrg if (sizes[i])
1125 1.1 mrg {
1126 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1127 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1128 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1129 1.1 mrg if (n)
1130 1.1 mrg {
1131 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
1132 1.1 mrg if (n->host_start > cur_node.host_start
1133 1.1 mrg || n->host_end < cur_node.host_end)
1134 1.1 mrg {
1135 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1136 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
1137 1.1 mrg "only [%p..%p) is mapped",
1138 1.1 mrg (void *) cur_node.host_start,
1139 1.1 mrg (void *) cur_node.host_end,
1140 1.1 mrg (void *) n->host_start,
1141 1.1 mrg (void *) n->host_end);
1142 1.1 mrg }
1143 1.1.1.3 mrg
1144 1.1.1.3 mrg
1145 1.1.1.3 mrg void *hostaddr = (void *) cur_node.host_start;
1146 1.1.1.3 mrg void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1147 1.1.1.3 mrg + cur_node.host_start - n->host_start);
1148 1.1.1.3 mrg size_t size = cur_node.host_end - cur_node.host_start;
1149 1.1.1.3 mrg
1150 1.1 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
1151 1.1.1.6 mrg gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1152 1.1 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1153 1.1.1.3 mrg gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1154 1.1 mrg }
1155 1.1 mrg }
1156 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1157 1.1 mrg }
1158 1.1 mrg
1159 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1160 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
1161 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler
1162 1.1.1.2 mrg emitting variable and functions in the same order. */
1163 1.1 mrg
1164 1.1 mrg static void
1165 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1166 1.1.1.2 mrg const void *host_table, const void *target_data,
1167 1.1.1.2 mrg bool is_register_lock)
1168 1.1 mrg {
1169 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
1170 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
1171 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
1172 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
1173 1.1 mrg
1174 1.1 mrg /* The func table contains only addresses, the var table contains addresses
1175 1.1 mrg and corresponding sizes. */
1176 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
1177 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1178 1.1 mrg
1179 1.1 mrg /* Load image to device and get target addresses for the image. */
1180 1.1 mrg struct addr_pair *target_table = NULL;
1181 1.1.1.2 mrg int i, num_target_entries;
1182 1.1.1.2 mrg
1183 1.1.1.2 mrg num_target_entries
1184 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version,
1185 1.1.1.2 mrg target_data, &target_table);
1186 1.1 mrg
1187 1.1 mrg if (num_target_entries != num_funcs + num_vars)
1188 1.1 mrg {
1189 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1190 1.1 mrg if (is_register_lock)
1191 1.1 mrg gomp_mutex_unlock (®ister_lock);
1192 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables"
1193 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars,
1194 1.1.1.2 mrg num_target_entries);
1195 1.1 mrg }
1196 1.1 mrg
1197 1.1 mrg /* Insert host-target address mapping into splay tree. */
1198 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1199 1.1 mrg tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1200 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY;
1201 1.1 mrg tgt->tgt_start = 0;
1202 1.1 mrg tgt->tgt_end = 0;
1203 1.1 mrg tgt->to_free = NULL;
1204 1.1 mrg tgt->prev = NULL;
1205 1.1 mrg tgt->list_count = 0;
1206 1.1 mrg tgt->device_descr = devicep;
1207 1.1 mrg splay_tree_node array = tgt->array;
1208 1.1 mrg
1209 1.1 mrg for (i = 0; i < num_funcs; i++)
1210 1.1 mrg {
1211 1.1 mrg splay_tree_key k = &array->key;
1212 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
1213 1.1 mrg k->host_end = k->host_start + 1;
1214 1.1 mrg k->tgt = tgt;
1215 1.1 mrg k->tgt_offset = target_table[i].start;
1216 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
1217 1.1.1.7 mrg k->dynamic_refcount = 0;
1218 1.1.1.2 mrg k->link_key = NULL;
1219 1.1 mrg array->left = NULL;
1220 1.1 mrg array->right = NULL;
1221 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1222 1.1 mrg array++;
1223 1.1 mrg }
1224 1.1 mrg
1225 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1226 1.1.1.2 mrg "omp declare target link" variables. */
1227 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1228 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1229 1.1.1.2 mrg
1230 1.1 mrg for (i = 0; i < num_vars; i++)
1231 1.1 mrg {
1232 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
1233 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start;
1234 1.1.1.2 mrg
1235 1.1.1.2 mrg if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1236 1.1 mrg {
1237 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1238 1.1 mrg if (is_register_lock)
1239 1.1 mrg gomp_mutex_unlock (®ister_lock);
1240 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)");
1241 1.1 mrg }
1242 1.1 mrg
1243 1.1 mrg splay_tree_key k = &array->key;
1244 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
1245 1.1.1.2 mrg k->host_end
1246 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1247 1.1 mrg k->tgt = tgt;
1248 1.1 mrg k->tgt_offset = target_var->start;
1249 1.1.1.2 mrg k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1250 1.1.1.7 mrg k->dynamic_refcount = 0;
1251 1.1.1.2 mrg k->link_key = NULL;
1252 1.1 mrg array->left = NULL;
1253 1.1 mrg array->right = NULL;
1254 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1255 1.1 mrg array++;
1256 1.1 mrg }
1257 1.1 mrg
1258 1.1 mrg free (target_table);
1259 1.1 mrg }
1260 1.1 mrg
1261 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P.
1262 1.1.1.2 mrg The device must be locked. */
1263 1.1.1.2 mrg
1264 1.1.1.2 mrg static void
1265 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1266 1.1.1.2 mrg unsigned version,
1267 1.1.1.2 mrg const void *host_table, const void *target_data)
1268 1.1.1.2 mrg {
1269 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0];
1270 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1];
1271 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2];
1272 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3];
1273 1.1.1.2 mrg
1274 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses
1275 1.1.1.2 mrg and corresponding sizes. */
1276 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table;
1277 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1278 1.1.1.2 mrg
1279 1.1.1.2 mrg struct splay_tree_key_s k;
1280 1.1.1.2 mrg splay_tree_key node = NULL;
1281 1.1.1.2 mrg
1282 1.1.1.2 mrg /* Find mapping at start of node array */
1283 1.1.1.2 mrg if (num_funcs || num_vars)
1284 1.1.1.2 mrg {
1285 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1286 1.1.1.2 mrg : (uintptr_t) host_var_table[0]);
1287 1.1.1.2 mrg k.host_end = k.host_start + 1;
1288 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
1289 1.1.1.2 mrg }
1290 1.1.1.2 mrg
1291 1.1.1.3 mrg if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1292 1.1.1.3 mrg {
1293 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1294 1.1.1.3 mrg gomp_fatal ("image unload fail");
1295 1.1.1.3 mrg }
1296 1.1.1.2 mrg
1297 1.1.1.2 mrg /* Remove mappings from splay tree. */
1298 1.1.1.2 mrg int i;
1299 1.1.1.2 mrg for (i = 0; i < num_funcs; i++)
1300 1.1.1.2 mrg {
1301 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i];
1302 1.1.1.2 mrg k.host_end = k.host_start + 1;
1303 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1304 1.1.1.2 mrg }
1305 1.1.1.2 mrg
1306 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1307 1.1.1.2 mrg "omp declare target link" variables. */
1308 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1309 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1310 1.1.1.2 mrg bool is_tgt_unmapped = false;
1311 1.1.1.2 mrg
1312 1.1.1.2 mrg for (i = 0; i < num_vars; i++)
1313 1.1.1.2 mrg {
1314 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2];
1315 1.1.1.2 mrg k.host_end
1316 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1317 1.1.1.2 mrg
1318 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1319 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1320 1.1.1.2 mrg else
1321 1.1.1.2 mrg {
1322 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1323 1.1.1.7 mrg is_tgt_unmapped = gomp_remove_var (devicep, n);
1324 1.1.1.2 mrg }
1325 1.1.1.2 mrg }
1326 1.1.1.2 mrg
1327 1.1.1.2 mrg if (node && !is_tgt_unmapped)
1328 1.1.1.2 mrg {
1329 1.1.1.2 mrg free (node->tgt);
1330 1.1.1.2 mrg free (node);
1331 1.1.1.2 mrg }
1332 1.1.1.2 mrg }
1333 1.1.1.2 mrg
1334 1.1 mrg /* This function should be called from every offload image while loading.
1335 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1336 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1337 1.1 mrg
1338 1.1 mrg void
1339 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
1340 1.1.1.2 mrg int target_type, const void *target_data)
1341 1.1 mrg {
1342 1.1 mrg int i;
1343 1.1.1.2 mrg
1344 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1345 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
1346 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
1347 1.1.1.2 mrg
1348 1.1 mrg gomp_mutex_lock (®ister_lock);
1349 1.1 mrg
1350 1.1 mrg /* Load image to all initialized devices. */
1351 1.1 mrg for (i = 0; i < num_devices; i++)
1352 1.1 mrg {
1353 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1354 1.1 mrg gomp_mutex_lock (&devicep->lock);
1355 1.1.1.2 mrg if (devicep->type == target_type
1356 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1357 1.1.1.2 mrg gomp_load_image_to_device (devicep, version,
1358 1.1.1.2 mrg host_table, target_data, true);
1359 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1360 1.1 mrg }
1361 1.1 mrg
1362 1.1 mrg /* Insert image to array of pending images. */
1363 1.1 mrg offload_images
1364 1.1 mrg = gomp_realloc_unlock (offload_images,
1365 1.1 mrg (num_offload_images + 1)
1366 1.1 mrg * sizeof (struct offload_image_descr));
1367 1.1.1.2 mrg offload_images[num_offload_images].version = version;
1368 1.1 mrg offload_images[num_offload_images].type = target_type;
1369 1.1 mrg offload_images[num_offload_images].host_table = host_table;
1370 1.1 mrg offload_images[num_offload_images].target_data = target_data;
1371 1.1 mrg
1372 1.1 mrg num_offload_images++;
1373 1.1 mrg gomp_mutex_unlock (®ister_lock);
1374 1.1 mrg }
1375 1.1 mrg
1376 1.1.1.2 mrg void
1377 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type,
1378 1.1.1.2 mrg const void *target_data)
1379 1.1.1.2 mrg {
1380 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data);
1381 1.1.1.2 mrg }
1382 1.1.1.2 mrg
1383 1.1 mrg /* This function should be called from every offload image while unloading.
1384 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1385 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1386 1.1 mrg
1387 1.1 mrg void
1388 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1389 1.1.1.2 mrg int target_type, const void *target_data)
1390 1.1 mrg {
1391 1.1 mrg int i;
1392 1.1 mrg
1393 1.1 mrg gomp_mutex_lock (®ister_lock);
1394 1.1 mrg
1395 1.1 mrg /* Unload image from all initialized devices. */
1396 1.1 mrg for (i = 0; i < num_devices; i++)
1397 1.1 mrg {
1398 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1399 1.1 mrg gomp_mutex_lock (&devicep->lock);
1400 1.1.1.2 mrg if (devicep->type == target_type
1401 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1402 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version,
1403 1.1.1.2 mrg host_table, target_data);
1404 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1405 1.1 mrg }
1406 1.1 mrg
1407 1.1 mrg /* Remove image from array of pending images. */
1408 1.1 mrg for (i = 0; i < num_offload_images; i++)
1409 1.1 mrg if (offload_images[i].target_data == target_data)
1410 1.1 mrg {
1411 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
1412 1.1 mrg break;
1413 1.1 mrg }
1414 1.1 mrg
1415 1.1 mrg gomp_mutex_unlock (®ister_lock);
1416 1.1 mrg }
1417 1.1 mrg
1418 1.1.1.2 mrg void
1419 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type,
1420 1.1.1.2 mrg const void *target_data)
1421 1.1.1.2 mrg {
1422 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1423 1.1.1.2 mrg }
1424 1.1.1.2 mrg
1425 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
1426 1.1 mrg must be locked on entry, and remains locked on return. */
1427 1.1 mrg
1428 1.1 mrg attribute_hidden void
1429 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
1430 1.1 mrg {
1431 1.1 mrg int i;
1432 1.1.1.3 mrg if (!devicep->init_device_func (devicep->target_id))
1433 1.1.1.3 mrg {
1434 1.1.1.3 mrg gomp_mutex_unlock (&devicep->lock);
1435 1.1.1.3 mrg gomp_fatal ("device initialization failed");
1436 1.1.1.3 mrg }
1437 1.1 mrg
1438 1.1 mrg /* Load to device all images registered by the moment. */
1439 1.1 mrg for (i = 0; i < num_offload_images; i++)
1440 1.1 mrg {
1441 1.1 mrg struct offload_image_descr *image = &offload_images[i];
1442 1.1 mrg if (image->type == devicep->type)
1443 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version,
1444 1.1.1.2 mrg image->host_table, image->target_data,
1445 1.1.1.2 mrg false);
1446 1.1 mrg }
1447 1.1 mrg
1448 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED;
1449 1.1.1.2 mrg }
1450 1.1.1.2 mrg
1451 1.1.1.2 mrg attribute_hidden void
1452 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep)
1453 1.1.1.2 mrg {
1454 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
1455 1.1.1.2 mrg {
1456 1.1.1.2 mrg unsigned i;
1457 1.1.1.2 mrg
1458 1.1.1.2 mrg /* Unload from device all images registered at the moment. */
1459 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++)
1460 1.1.1.2 mrg {
1461 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i];
1462 1.1.1.2 mrg if (image->type == devicep->type)
1463 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version,
1464 1.1.1.2 mrg image->host_table,
1465 1.1.1.2 mrg image->target_data);
1466 1.1.1.2 mrg }
1467 1.1.1.2 mrg }
1468 1.1 mrg }
1469 1.1 mrg
1470 1.1 mrg /* Free address mapping tables. MM must be locked on entry, and remains locked
1471 1.1 mrg on return. */
1472 1.1 mrg
1473 1.1 mrg attribute_hidden void
1474 1.1 mrg gomp_free_memmap (struct splay_tree_s *mem_map)
1475 1.1 mrg {
1476 1.1 mrg while (mem_map->root)
1477 1.1 mrg {
1478 1.1 mrg struct target_mem_desc *tgt = mem_map->root->key.tgt;
1479 1.1 mrg
1480 1.1 mrg splay_tree_remove (mem_map, &mem_map->root->key);
1481 1.1 mrg free (tgt->array);
1482 1.1 mrg free (tgt);
1483 1.1 mrg }
1484 1.1 mrg }
1485 1.1 mrg
1486 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */
1487 1.1 mrg
1488 1.1.1.2 mrg static void
1489 1.1.1.2 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1490 1.1 mrg {
1491 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
1492 1.1.1.2 mrg old_thr = *thr;
1493 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr));
1494 1.1.1.2 mrg if (gomp_places_list)
1495 1.1.1.2 mrg {
1496 1.1.1.2 mrg thr->place = old_thr.place;
1497 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len;
1498 1.1.1.2 mrg }
1499 1.1.1.2 mrg fn (hostaddrs);
1500 1.1.1.2 mrg gomp_free_thread (thr);
1501 1.1.1.2 mrg *thr = old_thr;
1502 1.1 mrg }
1503 1.1 mrg
1504 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared
1505 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1506 1.1.1.2 mrg
1507 1.1.1.2 mrg static inline void
1508 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1509 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align,
1510 1.1.1.2 mrg size_t *tgt_size)
1511 1.1.1.2 mrg {
1512 1.1.1.2 mrg size_t i;
1513 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1514 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1515 1.1.1.2 mrg {
1516 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1517 1.1.1.2 mrg if (*tgt_align < align)
1518 1.1.1.2 mrg *tgt_align = align;
1519 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1520 1.1.1.2 mrg *tgt_size += sizes[i];
1521 1.1.1.2 mrg }
1522 1.1.1.2 mrg }
1523 1.1.1.2 mrg
1524 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1525 1.1.1.2 mrg
1526 1.1.1.2 mrg static inline void
1527 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1528 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align,
1529 1.1.1.2 mrg size_t tgt_size)
1530 1.1.1.2 mrg {
1531 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1532 1.1.1.2 mrg if (al)
1533 1.1.1.2 mrg tgt += tgt_align - al;
1534 1.1.1.2 mrg tgt_size = 0;
1535 1.1.1.2 mrg size_t i;
1536 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1537 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1538 1.1.1.2 mrg {
1539 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1540 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1541 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1542 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size;
1543 1.1.1.2 mrg tgt_size = tgt_size + sizes[i];
1544 1.1.1.2 mrg }
1545 1.1.1.2 mrg }
1546 1.1.1.2 mrg
1547 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */
1548 1.1.1.2 mrg
1549 1.1.1.2 mrg static void *
1550 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1551 1.1.1.2 mrg void (*host_fn) (void *))
1552 1.1.1.2 mrg {
1553 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1554 1.1.1.2 mrg return (void *) host_fn;
1555 1.1.1.2 mrg else
1556 1.1.1.2 mrg {
1557 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
1558 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1559 1.1.1.2 mrg {
1560 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1561 1.1.1.2 mrg return NULL;
1562 1.1.1.2 mrg }
1563 1.1.1.2 mrg
1564 1.1.1.2 mrg struct splay_tree_key_s k;
1565 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn;
1566 1.1.1.2 mrg k.host_end = k.host_start + 1;
1567 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1568 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1569 1.1.1.2 mrg if (tgt_fn == NULL)
1570 1.1.1.2 mrg return NULL;
1571 1.1.1.2 mrg
1572 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset;
1573 1.1.1.2 mrg }
1574 1.1.1.2 mrg }
1575 1.1.1.2 mrg
1576 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE
1577 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1578 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
1579 1.1 mrg larger than last available hw device), use host fallback.
1580 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
1581 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1582 1.1 mrg with MAPNUM entries, with addresses of the host objects,
1583 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
1584 1.1 mrg and assumed sizeof (void *) size) and kinds. */
1585 1.1 mrg
1586 1.1 mrg void
1587 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
1588 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
1589 1.1 mrg unsigned char *kinds)
1590 1.1 mrg {
1591 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1592 1.1 mrg
1593 1.1.1.2 mrg void *fn_addr;
1594 1.1 mrg if (devicep == NULL
1595 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1596 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */
1597 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1598 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1599 1.1.1.2 mrg return gomp_target_fallback (fn, hostaddrs);
1600 1.1.1.2 mrg
1601 1.1.1.2 mrg struct target_mem_desc *tgt_vars
1602 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1603 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
1604 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1605 1.1.1.2 mrg NULL);
1606 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
1607 1.1.1.2 mrg }
1608 1.1.1.2 mrg
1609 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1610 1.1.1.2 mrg and several arguments have been added:
1611 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1612 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details.
1613 1.1.1.2 mrg
1614 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both
1615 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two
1616 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type
1617 1.1.1.2 mrg and optionally also the value. If the value is not present in the first
1618 1.1.1.2 mrg one, the whole second element the actual value. The last element of the
1619 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example
1620 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT.
1621 1.1.1.2 mrg
1622 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with
1623 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if
1624 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is
1625 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host
1626 1.1.1.2 mrg what value will GOMP_teams have on the device.
1627 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1628 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit
1629 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be
1630 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */
1631 1.1.1.2 mrg
1632 1.1.1.2 mrg void
1633 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1634 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
1635 1.1.1.2 mrg unsigned int flags, void **depend, void **args)
1636 1.1.1.2 mrg {
1637 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1638 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0;
1639 1.1.1.2 mrg bool fpc_done = false;
1640 1.1.1.2 mrg
1641 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT)
1642 1.1 mrg {
1643 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1644 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait
1645 1.1.1.2 mrg target tasks make sense to run asynchronously even when
1646 1.1.1.2 mrg outside of any parallel. */
1647 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0))
1648 1.1.1.2 mrg {
1649 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1);
1650 1.1.1.2 mrg struct gomp_task *task = thr->task;
1651 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1652 1.1.1.2 mrg team->prev_ts = thr->ts;
1653 1.1.1.2 mrg thr->ts.team = team;
1654 1.1.1.2 mrg thr->ts.team_id = 0;
1655 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
1656 1.1.1.2 mrg thr->ts.last_work_share = NULL;
1657 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS
1658 1.1.1.2 mrg thr->ts.single_count = 0;
1659 1.1.1.2 mrg #endif
1660 1.1.1.2 mrg thr->ts.static_trip = 0;
1661 1.1.1.2 mrg thr->task = &team->implicit_task[0];
1662 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv);
1663 1.1.1.2 mrg if (task)
1664 1.1.1.2 mrg {
1665 1.1.1.2 mrg thr->task = task;
1666 1.1.1.2 mrg gomp_end_task ();
1667 1.1.1.2 mrg free (task);
1668 1.1.1.2 mrg thr->task = &team->implicit_task[0];
1669 1.1.1.2 mrg }
1670 1.1.1.2 mrg else
1671 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr);
1672 1.1.1.2 mrg }
1673 1.1.1.2 mrg if (thr->ts.team
1674 1.1.1.2 mrg && !thr->task->final_task)
1675 1.1.1.2 mrg {
1676 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1677 1.1.1.2 mrg sizes, kinds, flags, depend, args,
1678 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP);
1679 1.1.1.2 mrg return;
1680 1.1.1.2 mrg }
1681 1.1 mrg }
1682 1.1 mrg
1683 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present
1684 1.1.1.2 mrg (or we are in a final task), block the parent task until the
1685 1.1.1.2 mrg dependencies are resolved and then just continue with the rest
1686 1.1.1.2 mrg of the function as if it is a merged task. */
1687 1.1.1.2 mrg if (depend != NULL)
1688 1.1.1.2 mrg {
1689 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1690 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
1691 1.1.1.2 mrg {
1692 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */
1693 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1694 1.1.1.2 mrg &tgt_align, &tgt_size);
1695 1.1.1.2 mrg if (tgt_align)
1696 1.1.1.2 mrg {
1697 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1698 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1699 1.1.1.2 mrg tgt_align, tgt_size);
1700 1.1.1.2 mrg }
1701 1.1.1.2 mrg fpc_done = true;
1702 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
1703 1.1.1.2 mrg }
1704 1.1.1.2 mrg }
1705 1.1 mrg
1706 1.1 mrg void *fn_addr;
1707 1.1.1.2 mrg if (devicep == NULL
1708 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1709 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1710 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1711 1.1 mrg {
1712 1.1.1.2 mrg if (!fpc_done)
1713 1.1 mrg {
1714 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1715 1.1.1.2 mrg &tgt_align, &tgt_size);
1716 1.1.1.2 mrg if (tgt_align)
1717 1.1.1.2 mrg {
1718 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1719 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1720 1.1.1.2 mrg tgt_align, tgt_size);
1721 1.1.1.2 mrg }
1722 1.1 mrg }
1723 1.1.1.2 mrg gomp_target_fallback (fn, hostaddrs);
1724 1.1.1.2 mrg return;
1725 1.1.1.2 mrg }
1726 1.1 mrg
1727 1.1.1.2 mrg struct target_mem_desc *tgt_vars;
1728 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1729 1.1.1.2 mrg {
1730 1.1.1.2 mrg if (!fpc_done)
1731 1.1.1.2 mrg {
1732 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1733 1.1.1.2 mrg &tgt_align, &tgt_size);
1734 1.1.1.2 mrg if (tgt_align)
1735 1.1.1.2 mrg {
1736 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1737 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1738 1.1.1.2 mrg tgt_align, tgt_size);
1739 1.1.1.2 mrg }
1740 1.1.1.2 mrg }
1741 1.1.1.2 mrg tgt_vars = NULL;
1742 1.1 mrg }
1743 1.1.1.2 mrg else
1744 1.1.1.2 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1745 1.1.1.2 mrg true, GOMP_MAP_VARS_TARGET);
1746 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr,
1747 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1748 1.1.1.2 mrg args);
1749 1.1.1.2 mrg if (tgt_vars)
1750 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
1751 1.1.1.2 mrg }
1752 1.1 mrg
1753 1.1.1.2 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */
1754 1.1.1.2 mrg
1755 1.1.1.2 mrg static void
1756 1.1.1.2 mrg gomp_target_data_fallback (void)
1757 1.1.1.2 mrg {
1758 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (false);
1759 1.1.1.2 mrg if (icv->target_data)
1760 1.1 mrg {
1761 1.1.1.2 mrg /* Even when doing a host fallback, if there are any active
1762 1.1.1.2 mrg #pragma omp target data constructs, need to remember the
1763 1.1.1.2 mrg new #pragma omp target data, otherwise GOMP_target_end_data
1764 1.1.1.2 mrg would get out of sync. */
1765 1.1.1.2 mrg struct target_mem_desc *tgt
1766 1.1.1.2 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1767 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1768 1.1.1.2 mrg tgt->prev = icv->target_data;
1769 1.1.1.2 mrg icv->target_data = tgt;
1770 1.1 mrg }
1771 1.1 mrg }
1772 1.1 mrg
1773 1.1 mrg void
1774 1.1 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
1775 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1776 1.1 mrg {
1777 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1778 1.1 mrg
1779 1.1 mrg if (devicep == NULL
1780 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1781 1.1.1.2 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1782 1.1.1.2 mrg return gomp_target_data_fallback ();
1783 1.1 mrg
1784 1.1 mrg struct target_mem_desc *tgt
1785 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1786 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1787 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (true);
1788 1.1.1.2 mrg tgt->prev = icv->target_data;
1789 1.1.1.2 mrg icv->target_data = tgt;
1790 1.1.1.2 mrg }
1791 1.1.1.2 mrg
1792 1.1.1.2 mrg void
1793 1.1.1.2 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1794 1.1.1.2 mrg size_t *sizes, unsigned short *kinds)
1795 1.1.1.2 mrg {
1796 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1797 1.1.1.2 mrg
1798 1.1.1.2 mrg if (devicep == NULL
1799 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1800 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1801 1.1.1.2 mrg return gomp_target_data_fallback ();
1802 1.1.1.2 mrg
1803 1.1.1.2 mrg struct target_mem_desc *tgt
1804 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1805 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1806 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
1807 1.1 mrg tgt->prev = icv->target_data;
1808 1.1 mrg icv->target_data = tgt;
1809 1.1 mrg }
1810 1.1 mrg
1811 1.1 mrg void
1812 1.1 mrg GOMP_target_end_data (void)
1813 1.1 mrg {
1814 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
1815 1.1 mrg if (icv->target_data)
1816 1.1 mrg {
1817 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
1818 1.1 mrg icv->target_data = tgt->prev;
1819 1.1 mrg gomp_unmap_vars (tgt, true);
1820 1.1 mrg }
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1 mrg void
1824 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
1825 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1826 1.1 mrg {
1827 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1828 1.1 mrg
1829 1.1 mrg if (devicep == NULL
1830 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1831 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1832 1.1.1.2 mrg return;
1833 1.1.1.2 mrg
1834 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1835 1.1.1.2 mrg }
1836 1.1.1.2 mrg
1837 1.1.1.2 mrg void
1838 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1839 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
1840 1.1.1.2 mrg unsigned int flags, void **depend)
1841 1.1.1.2 mrg {
1842 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1843 1.1.1.2 mrg
1844 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
1845 1.1.1.2 mrg block the parent task until the dependencies are resolved
1846 1.1.1.2 mrg and then just continue with the rest of the function as if it
1847 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
1848 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
1849 1.1.1.2 mrg are not present. */
1850 1.1.1.2 mrg if (depend != NULL)
1851 1.1.1.2 mrg {
1852 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1853 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
1854 1.1.1.2 mrg {
1855 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1856 1.1.1.2 mrg && thr->ts.team
1857 1.1.1.2 mrg && !thr->task->final_task)
1858 1.1.1.2 mrg {
1859 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1860 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
1861 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE,
1862 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA))
1863 1.1.1.2 mrg return;
1864 1.1.1.2 mrg }
1865 1.1.1.2 mrg else
1866 1.1.1.2 mrg {
1867 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1868 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
1869 1.1.1.2 mrg tasks. */
1870 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
1871 1.1.1.7 mrg {
1872 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
1873 1.1.1.7 mrg return;
1874 1.1.1.7 mrg if (thr->task->taskgroup)
1875 1.1.1.7 mrg {
1876 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
1877 1.1.1.7 mrg return;
1878 1.1.1.7 mrg if (thr->task->taskgroup->workshare
1879 1.1.1.7 mrg && thr->task->taskgroup->prev
1880 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
1881 1.1.1.7 mrg return;
1882 1.1.1.7 mrg }
1883 1.1.1.7 mrg }
1884 1.1.1.2 mrg
1885 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
1886 1.1.1.2 mrg }
1887 1.1.1.2 mrg }
1888 1.1.1.2 mrg }
1889 1.1.1.2 mrg
1890 1.1.1.2 mrg if (devicep == NULL
1891 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1892 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1893 1.1.1.2 mrg return;
1894 1.1.1.2 mrg
1895 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1896 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1897 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1898 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
1899 1.1.1.7 mrg {
1900 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
1901 1.1.1.7 mrg return;
1902 1.1.1.7 mrg if (thr->task->taskgroup)
1903 1.1.1.7 mrg {
1904 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
1905 1.1.1.7 mrg return;
1906 1.1.1.7 mrg if (thr->task->taskgroup->workshare
1907 1.1.1.7 mrg && thr->task->taskgroup->prev
1908 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
1909 1.1.1.7 mrg return;
1910 1.1.1.7 mrg }
1911 1.1.1.7 mrg }
1912 1.1 mrg
1913 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1914 1.1.1.2 mrg }
1915 1.1.1.2 mrg
1916 1.1.1.2 mrg static void
1917 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1918 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
1919 1.1.1.2 mrg {
1920 1.1.1.2 mrg const int typemask = 0xff;
1921 1.1.1.2 mrg size_t i;
1922 1.1 mrg gomp_mutex_lock (&devicep->lock);
1923 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1924 1.1.1.2 mrg {
1925 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1926 1.1.1.2 mrg return;
1927 1.1.1.2 mrg }
1928 1.1.1.2 mrg
1929 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1930 1.1.1.2 mrg {
1931 1.1.1.2 mrg struct splay_tree_key_s cur_node;
1932 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask;
1933 1.1.1.2 mrg switch (kind)
1934 1.1.1.2 mrg {
1935 1.1.1.2 mrg case GOMP_MAP_FROM:
1936 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
1937 1.1.1.2 mrg case GOMP_MAP_DELETE:
1938 1.1.1.2 mrg case GOMP_MAP_RELEASE:
1939 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1940 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1941 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1942 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1943 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1944 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1945 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1946 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node);
1947 1.1.1.2 mrg if (!k)
1948 1.1.1.2 mrg continue;
1949 1.1.1.2 mrg
1950 1.1.1.2 mrg if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1951 1.1.1.2 mrg k->refcount--;
1952 1.1.1.2 mrg if ((kind == GOMP_MAP_DELETE
1953 1.1.1.2 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1954 1.1.1.2 mrg && k->refcount != REFCOUNT_INFINITY)
1955 1.1.1.2 mrg k->refcount = 0;
1956 1.1.1.2 mrg
1957 1.1.1.2 mrg if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1958 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM)
1959 1.1.1.3 mrg gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1960 1.1.1.3 mrg (void *) (k->tgt->tgt_start + k->tgt_offset
1961 1.1.1.3 mrg + cur_node.host_start
1962 1.1.1.3 mrg - k->host_start),
1963 1.1.1.3 mrg cur_node.host_end - cur_node.host_start);
1964 1.1.1.2 mrg if (k->refcount == 0)
1965 1.1.1.2 mrg {
1966 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, k);
1967 1.1.1.2 mrg if (k->link_key)
1968 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map,
1969 1.1.1.2 mrg (splay_tree_node) k->link_key);
1970 1.1.1.2 mrg if (k->tgt->refcount > 1)
1971 1.1.1.2 mrg k->tgt->refcount--;
1972 1.1.1.2 mrg else
1973 1.1.1.2 mrg gomp_unmap_tgt (k->tgt);
1974 1.1.1.2 mrg }
1975 1.1.1.2 mrg
1976 1.1.1.2 mrg break;
1977 1.1.1.2 mrg default:
1978 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1979 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1980 1.1.1.2 mrg kind);
1981 1.1.1.2 mrg }
1982 1.1.1.2 mrg }
1983 1.1.1.2 mrg
1984 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1985 1.1.1.2 mrg }
1986 1.1 mrg
1987 1.1.1.2 mrg void
1988 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1989 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
1990 1.1.1.2 mrg unsigned int flags, void **depend)
1991 1.1.1.2 mrg {
1992 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1993 1.1.1.2 mrg
1994 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
1995 1.1.1.2 mrg block the parent task until the dependencies are resolved
1996 1.1.1.2 mrg and then just continue with the rest of the function as if it
1997 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
1998 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
1999 1.1.1.2 mrg are not present. */
2000 1.1.1.2 mrg if (depend != NULL)
2001 1.1.1.2 mrg {
2002 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2003 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
2004 1.1.1.2 mrg {
2005 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2006 1.1.1.2 mrg && thr->ts.team
2007 1.1.1.2 mrg && !thr->task->final_task)
2008 1.1.1.2 mrg {
2009 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2010 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
2011 1.1.1.2 mrg flags, depend, NULL,
2012 1.1.1.2 mrg GOMP_TARGET_TASK_DATA))
2013 1.1.1.2 mrg return;
2014 1.1.1.2 mrg }
2015 1.1.1.2 mrg else
2016 1.1.1.2 mrg {
2017 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2018 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
2019 1.1.1.2 mrg tasks. */
2020 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2021 1.1.1.7 mrg {
2022 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2023 1.1.1.7 mrg return;
2024 1.1.1.7 mrg if (thr->task->taskgroup)
2025 1.1.1.7 mrg {
2026 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2027 1.1.1.7 mrg return;
2028 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2029 1.1.1.7 mrg && thr->task->taskgroup->prev
2030 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2031 1.1.1.7 mrg return;
2032 1.1.1.7 mrg }
2033 1.1.1.7 mrg }
2034 1.1.1.2 mrg
2035 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
2036 1.1.1.2 mrg }
2037 1.1.1.2 mrg }
2038 1.1.1.2 mrg }
2039 1.1.1.2 mrg
2040 1.1.1.2 mrg if (devicep == NULL
2041 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2042 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2043 1.1.1.2 mrg return;
2044 1.1.1.2 mrg
2045 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
2046 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
2047 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2048 1.1.1.7 mrg if (__builtin_expect (gomp_cancel_var, 0) && team)
2049 1.1.1.7 mrg {
2050 1.1.1.7 mrg if (gomp_team_barrier_cancelled (&team->barrier))
2051 1.1.1.7 mrg return;
2052 1.1.1.7 mrg if (thr->task->taskgroup)
2053 1.1.1.7 mrg {
2054 1.1.1.7 mrg if (thr->task->taskgroup->cancelled)
2055 1.1.1.7 mrg return;
2056 1.1.1.7 mrg if (thr->task->taskgroup->workshare
2057 1.1.1.7 mrg && thr->task->taskgroup->prev
2058 1.1.1.7 mrg && thr->task->taskgroup->prev->cancelled)
2059 1.1.1.7 mrg return;
2060 1.1.1.7 mrg }
2061 1.1.1.7 mrg }
2062 1.1.1.2 mrg
2063 1.1.1.2 mrg size_t i;
2064 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2065 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
2066 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2067 1.1.1.2 mrg {
2068 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2069 1.1.1.2 mrg &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2070 1.1.1.2 mrg i += sizes[i];
2071 1.1.1.2 mrg }
2072 1.1.1.2 mrg else
2073 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2074 1.1.1.2 mrg true, GOMP_MAP_VARS_ENTER_DATA);
2075 1.1.1.2 mrg else
2076 1.1.1.2 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2077 1.1.1.2 mrg }
2078 1.1.1.2 mrg
2079 1.1.1.2 mrg bool
2080 1.1.1.2 mrg gomp_target_task_fn (void *data)
2081 1.1.1.2 mrg {
2082 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2083 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep;
2084 1.1.1.2 mrg
2085 1.1.1.2 mrg if (ttask->fn != NULL)
2086 1.1.1.2 mrg {
2087 1.1.1.2 mrg void *fn_addr;
2088 1.1.1.2 mrg if (devicep == NULL
2089 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2090 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2091 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2092 1.1.1.2 mrg {
2093 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK;
2094 1.1.1.2 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2095 1.1.1.2 mrg return false;
2096 1.1.1.2 mrg }
2097 1.1.1.2 mrg
2098 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2099 1.1.1.2 mrg {
2100 1.1.1.2 mrg if (ttask->tgt)
2101 1.1.1.2 mrg gomp_unmap_vars (ttask->tgt, true);
2102 1.1.1.2 mrg return false;
2103 1.1.1.2 mrg }
2104 1.1.1.2 mrg
2105 1.1.1.2 mrg void *actual_arguments;
2106 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2107 1.1.1.2 mrg {
2108 1.1.1.2 mrg ttask->tgt = NULL;
2109 1.1.1.2 mrg actual_arguments = ttask->hostaddrs;
2110 1.1.1.2 mrg }
2111 1.1.1.2 mrg else
2112 1.1.1.2 mrg {
2113 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2114 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true,
2115 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
2116 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start;
2117 1.1.1.2 mrg }
2118 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2119 1.1.1.2 mrg
2120 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2121 1.1.1.2 mrg ttask->args, (void *) ttask);
2122 1.1.1.2 mrg return true;
2123 1.1.1.2 mrg }
2124 1.1.1.2 mrg else if (devicep == NULL
2125 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2126 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2127 1.1.1.2 mrg return false;
2128 1.1.1.2 mrg
2129 1.1.1.2 mrg size_t i;
2130 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2131 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2132 1.1.1.2 mrg ttask->kinds, true);
2133 1.1.1.2 mrg else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2134 1.1.1.2 mrg for (i = 0; i < ttask->mapnum; i++)
2135 1.1.1.2 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2136 1.1.1.2 mrg {
2137 1.1.1.2 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2138 1.1.1.2 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2139 1.1.1.2 mrg GOMP_MAP_VARS_ENTER_DATA);
2140 1.1.1.2 mrg i += ttask->sizes[i];
2141 1.1.1.2 mrg }
2142 1.1.1.2 mrg else
2143 1.1.1.2 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2144 1.1.1.2 mrg &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2145 1.1.1.2 mrg else
2146 1.1.1.2 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2147 1.1.1.2 mrg ttask->kinds);
2148 1.1.1.2 mrg return false;
2149 1.1 mrg }
2150 1.1 mrg
2151 1.1 mrg void
2152 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2153 1.1 mrg {
2154 1.1 mrg if (thread_limit)
2155 1.1 mrg {
2156 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
2157 1.1 mrg icv->thread_limit_var
2158 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2159 1.1 mrg }
2160 1.1 mrg (void) num_teams;
2161 1.1 mrg }
2162 1.1 mrg
2163 1.1.1.2 mrg void *
2164 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num)
2165 1.1.1.2 mrg {
2166 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2167 1.1.1.2 mrg return malloc (size);
2168 1.1.1.2 mrg
2169 1.1.1.2 mrg if (device_num < 0)
2170 1.1.1.2 mrg return NULL;
2171 1.1.1.2 mrg
2172 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2173 1.1.1.2 mrg if (devicep == NULL)
2174 1.1.1.2 mrg return NULL;
2175 1.1.1.2 mrg
2176 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2177 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2178 1.1.1.2 mrg return malloc (size);
2179 1.1.1.2 mrg
2180 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2181 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size);
2182 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2183 1.1.1.2 mrg return ret;
2184 1.1.1.2 mrg }
2185 1.1.1.2 mrg
2186 1.1.1.2 mrg void
2187 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num)
2188 1.1.1.2 mrg {
2189 1.1.1.2 mrg if (device_ptr == NULL)
2190 1.1.1.2 mrg return;
2191 1.1.1.2 mrg
2192 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2193 1.1.1.2 mrg {
2194 1.1.1.2 mrg free (device_ptr);
2195 1.1.1.2 mrg return;
2196 1.1.1.2 mrg }
2197 1.1.1.2 mrg
2198 1.1.1.2 mrg if (device_num < 0)
2199 1.1.1.2 mrg return;
2200 1.1.1.2 mrg
2201 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2202 1.1.1.2 mrg if (devicep == NULL)
2203 1.1.1.2 mrg return;
2204 1.1.1.2 mrg
2205 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2206 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2207 1.1.1.2 mrg {
2208 1.1.1.2 mrg free (device_ptr);
2209 1.1.1.2 mrg return;
2210 1.1.1.2 mrg }
2211 1.1.1.2 mrg
2212 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2213 1.1.1.3 mrg gomp_free_device_memory (devicep, device_ptr);
2214 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2215 1.1.1.2 mrg }
2216 1.1.1.2 mrg
2217 1.1.1.2 mrg int
2218 1.1.1.7 mrg omp_target_is_present (const void *ptr, int device_num)
2219 1.1.1.2 mrg {
2220 1.1.1.2 mrg if (ptr == NULL)
2221 1.1.1.2 mrg return 1;
2222 1.1.1.2 mrg
2223 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2224 1.1.1.2 mrg return 1;
2225 1.1.1.2 mrg
2226 1.1.1.2 mrg if (device_num < 0)
2227 1.1.1.2 mrg return 0;
2228 1.1.1.2 mrg
2229 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2230 1.1.1.2 mrg if (devicep == NULL)
2231 1.1.1.2 mrg return 0;
2232 1.1.1.2 mrg
2233 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2234 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2235 1.1.1.2 mrg return 1;
2236 1.1.1.2 mrg
2237 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2238 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2239 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2240 1.1.1.2 mrg
2241 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2242 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2243 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2244 1.1.1.2 mrg int ret = n != NULL;
2245 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2246 1.1.1.2 mrg return ret;
2247 1.1.1.2 mrg }
2248 1.1.1.2 mrg
2249 1.1.1.2 mrg int
2250 1.1.1.7 mrg omp_target_memcpy (void *dst, const void *src, size_t length,
2251 1.1.1.7 mrg size_t dst_offset, size_t src_offset, int dst_device_num,
2252 1.1.1.7 mrg int src_device_num)
2253 1.1.1.2 mrg {
2254 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2255 1.1.1.3 mrg bool ret;
2256 1.1.1.2 mrg
2257 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2258 1.1.1.2 mrg {
2259 1.1.1.2 mrg if (dst_device_num < 0)
2260 1.1.1.2 mrg return EINVAL;
2261 1.1.1.2 mrg
2262 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2263 1.1.1.2 mrg if (dst_devicep == NULL)
2264 1.1.1.2 mrg return EINVAL;
2265 1.1.1.2 mrg
2266 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2267 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2268 1.1.1.2 mrg dst_devicep = NULL;
2269 1.1.1.2 mrg }
2270 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2271 1.1.1.2 mrg {
2272 1.1.1.2 mrg if (src_device_num < 0)
2273 1.1.1.2 mrg return EINVAL;
2274 1.1.1.2 mrg
2275 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2276 1.1.1.2 mrg if (src_devicep == NULL)
2277 1.1.1.2 mrg return EINVAL;
2278 1.1.1.2 mrg
2279 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2280 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2281 1.1.1.2 mrg src_devicep = NULL;
2282 1.1.1.2 mrg }
2283 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL)
2284 1.1.1.2 mrg {
2285 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2286 1.1.1.2 mrg return 0;
2287 1.1.1.2 mrg }
2288 1.1.1.2 mrg if (src_devicep == NULL)
2289 1.1.1.2 mrg {
2290 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2291 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2292 1.1.1.3 mrg (char *) dst + dst_offset,
2293 1.1.1.3 mrg (char *) src + src_offset, length);
2294 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2295 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2296 1.1.1.2 mrg }
2297 1.1.1.2 mrg if (dst_devicep == NULL)
2298 1.1.1.2 mrg {
2299 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2300 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
2301 1.1.1.3 mrg (char *) dst + dst_offset,
2302 1.1.1.3 mrg (char *) src + src_offset, length);
2303 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2304 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2305 1.1.1.2 mrg }
2306 1.1.1.2 mrg if (src_devicep == dst_devicep)
2307 1.1.1.2 mrg {
2308 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2309 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
2310 1.1.1.3 mrg (char *) dst + dst_offset,
2311 1.1.1.3 mrg (char *) src + src_offset, length);
2312 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2313 1.1.1.3 mrg return (ret ? 0 : EINVAL);
2314 1.1.1.2 mrg }
2315 1.1.1.2 mrg return EINVAL;
2316 1.1.1.2 mrg }
2317 1.1.1.2 mrg
2318 1.1.1.2 mrg static int
2319 1.1.1.7 mrg omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2320 1.1.1.2 mrg int num_dims, const size_t *volume,
2321 1.1.1.2 mrg const size_t *dst_offsets,
2322 1.1.1.2 mrg const size_t *src_offsets,
2323 1.1.1.2 mrg const size_t *dst_dimensions,
2324 1.1.1.2 mrg const size_t *src_dimensions,
2325 1.1.1.2 mrg struct gomp_device_descr *dst_devicep,
2326 1.1.1.2 mrg struct gomp_device_descr *src_devicep)
2327 1.1.1.2 mrg {
2328 1.1.1.2 mrg size_t dst_slice = element_size;
2329 1.1.1.2 mrg size_t src_slice = element_size;
2330 1.1.1.2 mrg size_t j, dst_off, src_off, length;
2331 1.1.1.2 mrg int i, ret;
2332 1.1.1.2 mrg
2333 1.1.1.2 mrg if (num_dims == 1)
2334 1.1.1.2 mrg {
2335 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length)
2336 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2337 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2338 1.1.1.2 mrg return EINVAL;
2339 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL)
2340 1.1.1.3 mrg {
2341 1.1.1.7 mrg memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2342 1.1.1.7 mrg length);
2343 1.1.1.3 mrg ret = 1;
2344 1.1.1.3 mrg }
2345 1.1.1.2 mrg else if (src_devicep == NULL)
2346 1.1.1.3 mrg ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2347 1.1.1.3 mrg (char *) dst + dst_off,
2348 1.1.1.7 mrg (const char *) src + src_off,
2349 1.1.1.7 mrg length);
2350 1.1.1.2 mrg else if (dst_devicep == NULL)
2351 1.1.1.3 mrg ret = src_devicep->dev2host_func (src_devicep->target_id,
2352 1.1.1.3 mrg (char *) dst + dst_off,
2353 1.1.1.7 mrg (const char *) src + src_off,
2354 1.1.1.7 mrg length);
2355 1.1.1.2 mrg else if (src_devicep == dst_devicep)
2356 1.1.1.3 mrg ret = src_devicep->dev2dev_func (src_devicep->target_id,
2357 1.1.1.3 mrg (char *) dst + dst_off,
2358 1.1.1.7 mrg (const char *) src + src_off,
2359 1.1.1.7 mrg length);
2360 1.1.1.2 mrg else
2361 1.1.1.3 mrg ret = 0;
2362 1.1.1.3 mrg return ret ? 0 : EINVAL;
2363 1.1.1.2 mrg }
2364 1.1.1.2 mrg
2365 1.1.1.2 mrg /* FIXME: it would be nice to have some plugin function to handle
2366 1.1.1.2 mrg num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2367 1.1.1.2 mrg be handled in the generic recursion below, and for host-host it
2368 1.1.1.2 mrg should be used even for any num_dims >= 2. */
2369 1.1.1.2 mrg
2370 1.1.1.2 mrg for (i = 1; i < num_dims; i++)
2371 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2372 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2373 1.1.1.2 mrg return EINVAL;
2374 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2375 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2376 1.1.1.2 mrg return EINVAL;
2377 1.1.1.2 mrg for (j = 0; j < volume[0]; j++)
2378 1.1.1.2 mrg {
2379 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2380 1.1.1.7 mrg (const char *) src + src_off,
2381 1.1.1.2 mrg element_size, num_dims - 1,
2382 1.1.1.2 mrg volume + 1, dst_offsets + 1,
2383 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1,
2384 1.1.1.2 mrg src_dimensions + 1, dst_devicep,
2385 1.1.1.2 mrg src_devicep);
2386 1.1.1.2 mrg if (ret)
2387 1.1.1.2 mrg return ret;
2388 1.1.1.2 mrg dst_off += dst_slice;
2389 1.1.1.2 mrg src_off += src_slice;
2390 1.1.1.2 mrg }
2391 1.1.1.2 mrg return 0;
2392 1.1.1.2 mrg }
2393 1.1.1.2 mrg
2394 1.1.1.2 mrg int
2395 1.1.1.7 mrg omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2396 1.1.1.2 mrg int num_dims, const size_t *volume,
2397 1.1.1.2 mrg const size_t *dst_offsets,
2398 1.1.1.2 mrg const size_t *src_offsets,
2399 1.1.1.2 mrg const size_t *dst_dimensions,
2400 1.1.1.2 mrg const size_t *src_dimensions,
2401 1.1.1.2 mrg int dst_device_num, int src_device_num)
2402 1.1.1.2 mrg {
2403 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2404 1.1.1.2 mrg
2405 1.1.1.2 mrg if (!dst && !src)
2406 1.1.1.2 mrg return INT_MAX;
2407 1.1.1.2 mrg
2408 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2409 1.1.1.2 mrg {
2410 1.1.1.2 mrg if (dst_device_num < 0)
2411 1.1.1.2 mrg return EINVAL;
2412 1.1.1.2 mrg
2413 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2414 1.1.1.2 mrg if (dst_devicep == NULL)
2415 1.1.1.2 mrg return EINVAL;
2416 1.1.1.2 mrg
2417 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2418 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2419 1.1.1.2 mrg dst_devicep = NULL;
2420 1.1.1.2 mrg }
2421 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2422 1.1.1.2 mrg {
2423 1.1.1.2 mrg if (src_device_num < 0)
2424 1.1.1.2 mrg return EINVAL;
2425 1.1.1.2 mrg
2426 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2427 1.1.1.2 mrg if (src_devicep == NULL)
2428 1.1.1.2 mrg return EINVAL;
2429 1.1.1.2 mrg
2430 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2431 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2432 1.1.1.2 mrg src_devicep = NULL;
2433 1.1.1.2 mrg }
2434 1.1.1.2 mrg
2435 1.1.1.2 mrg if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2436 1.1.1.2 mrg return EINVAL;
2437 1.1.1.2 mrg
2438 1.1.1.2 mrg if (src_devicep)
2439 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2440 1.1.1.2 mrg else if (dst_devicep)
2441 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2442 1.1.1.2 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2443 1.1.1.2 mrg volume, dst_offsets, src_offsets,
2444 1.1.1.2 mrg dst_dimensions, src_dimensions,
2445 1.1.1.2 mrg dst_devicep, src_devicep);
2446 1.1.1.2 mrg if (src_devicep)
2447 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2448 1.1.1.2 mrg else if (dst_devicep)
2449 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2450 1.1.1.2 mrg return ret;
2451 1.1.1.2 mrg }
2452 1.1.1.2 mrg
2453 1.1.1.2 mrg int
2454 1.1.1.7 mrg omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2455 1.1.1.7 mrg size_t size, size_t device_offset, int device_num)
2456 1.1.1.2 mrg {
2457 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2458 1.1.1.2 mrg return EINVAL;
2459 1.1.1.2 mrg
2460 1.1.1.2 mrg if (device_num < 0)
2461 1.1.1.2 mrg return EINVAL;
2462 1.1.1.2 mrg
2463 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2464 1.1.1.2 mrg if (devicep == NULL)
2465 1.1.1.2 mrg return EINVAL;
2466 1.1.1.2 mrg
2467 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2468 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2469 1.1.1.2 mrg return EINVAL;
2470 1.1.1.2 mrg
2471 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2472 1.1.1.2 mrg
2473 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2474 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2475 1.1.1.2 mrg int ret = EINVAL;
2476 1.1.1.2 mrg
2477 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr;
2478 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size;
2479 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2480 1.1.1.2 mrg if (n)
2481 1.1.1.2 mrg {
2482 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset
2483 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset
2484 1.1.1.2 mrg && n->host_start <= cur_node.host_start
2485 1.1.1.2 mrg && n->host_end >= cur_node.host_end)
2486 1.1.1.2 mrg ret = 0;
2487 1.1.1.2 mrg }
2488 1.1.1.2 mrg else
2489 1.1.1.2 mrg {
2490 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2491 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array));
2492 1.1.1.2 mrg tgt->refcount = 1;
2493 1.1.1.2 mrg tgt->tgt_start = 0;
2494 1.1.1.2 mrg tgt->tgt_end = 0;
2495 1.1.1.2 mrg tgt->to_free = NULL;
2496 1.1.1.2 mrg tgt->prev = NULL;
2497 1.1.1.2 mrg tgt->list_count = 0;
2498 1.1.1.2 mrg tgt->device_descr = devicep;
2499 1.1.1.2 mrg splay_tree_node array = tgt->array;
2500 1.1.1.2 mrg splay_tree_key k = &array->key;
2501 1.1.1.2 mrg k->host_start = cur_node.host_start;
2502 1.1.1.2 mrg k->host_end = cur_node.host_end;
2503 1.1.1.2 mrg k->tgt = tgt;
2504 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2505 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
2506 1.1.1.7 mrg k->dynamic_refcount = 0;
2507 1.1.1.2 mrg array->left = NULL;
2508 1.1.1.2 mrg array->right = NULL;
2509 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array);
2510 1.1.1.2 mrg ret = 0;
2511 1.1.1.2 mrg }
2512 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2513 1.1.1.2 mrg return ret;
2514 1.1.1.2 mrg }
2515 1.1.1.2 mrg
2516 1.1.1.2 mrg int
2517 1.1.1.7 mrg omp_target_disassociate_ptr (const void *ptr, int device_num)
2518 1.1.1.2 mrg {
2519 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2520 1.1.1.2 mrg return EINVAL;
2521 1.1.1.2 mrg
2522 1.1.1.2 mrg if (device_num < 0)
2523 1.1.1.2 mrg return EINVAL;
2524 1.1.1.2 mrg
2525 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2526 1.1.1.2 mrg if (devicep == NULL)
2527 1.1.1.2 mrg return EINVAL;
2528 1.1.1.2 mrg
2529 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2530 1.1.1.2 mrg return EINVAL;
2531 1.1.1.2 mrg
2532 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2533 1.1.1.2 mrg
2534 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2535 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2536 1.1.1.2 mrg int ret = EINVAL;
2537 1.1.1.2 mrg
2538 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2539 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2540 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2541 1.1.1.2 mrg if (n
2542 1.1.1.2 mrg && n->host_start == cur_node.host_start
2543 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY
2544 1.1.1.2 mrg && n->tgt->tgt_start == 0
2545 1.1.1.2 mrg && n->tgt->to_free == NULL
2546 1.1.1.2 mrg && n->tgt->refcount == 1
2547 1.1.1.2 mrg && n->tgt->list_count == 0)
2548 1.1.1.2 mrg {
2549 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
2550 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
2551 1.1.1.2 mrg ret = 0;
2552 1.1.1.2 mrg }
2553 1.1.1.2 mrg
2554 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2555 1.1.1.2 mrg return ret;
2556 1.1.1.2 mrg }
2557 1.1.1.2 mrg
2558 1.1.1.7 mrg int
2559 1.1.1.7 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num)
2560 1.1.1.7 mrg {
2561 1.1.1.7 mrg (void) kind;
2562 1.1.1.7 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2563 1.1.1.7 mrg return gomp_pause_host ();
2564 1.1.1.7 mrg if (device_num < 0 || device_num >= gomp_get_num_devices ())
2565 1.1.1.7 mrg return -1;
2566 1.1.1.7 mrg /* Do nothing for target devices for now. */
2567 1.1.1.7 mrg return 0;
2568 1.1.1.7 mrg }
2569 1.1.1.7 mrg
2570 1.1.1.7 mrg int
2571 1.1.1.7 mrg omp_pause_resource_all (omp_pause_resource_t kind)
2572 1.1.1.7 mrg {
2573 1.1.1.7 mrg (void) kind;
2574 1.1.1.7 mrg if (gomp_pause_host ())
2575 1.1.1.7 mrg return -1;
2576 1.1.1.7 mrg /* Do nothing for target devices for now. */
2577 1.1.1.7 mrg return 0;
2578 1.1.1.7 mrg }
2579 1.1.1.7 mrg
2580 1.1.1.7 mrg ialias (omp_pause_resource)
2581 1.1.1.7 mrg ialias (omp_pause_resource_all)
2582 1.1.1.7 mrg
2583 1.1 mrg #ifdef PLUGIN_SUPPORT
2584 1.1 mrg
2585 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2586 1.1 mrg in PLUGIN_NAME.
2587 1.1 mrg The handles of the found functions are stored in the corresponding fields
2588 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2589 1.1 mrg
2590 1.1 mrg static bool
2591 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
2592 1.1 mrg const char *plugin_name)
2593 1.1 mrg {
2594 1.1 mrg const char *err = NULL, *last_missing = NULL;
2595 1.1 mrg
2596 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2597 1.1 mrg if (!plugin_handle)
2598 1.1.1.2 mrg goto dl_fail;
2599 1.1 mrg
2600 1.1 mrg /* Check if all required functions are available in the plugin and store
2601 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL,
2602 1.1.1.2 mrg so we don't need to check dlerror all the time. */
2603 1.1 mrg #define DLSYM(f) \
2604 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2605 1.1.1.2 mrg goto dl_fail
2606 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if
2607 1.1.1.2 mrg failed, true otherwise. */
2608 1.1.1.2 mrg #define DLSYM_OPT(f, n) \
2609 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2610 1.1.1.2 mrg || (last_missing = #n, 0))
2611 1.1.1.2 mrg
2612 1.1.1.2 mrg DLSYM (version);
2613 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION)
2614 1.1.1.2 mrg {
2615 1.1.1.2 mrg err = "plugin version mismatch";
2616 1.1.1.2 mrg goto fail;
2617 1.1.1.2 mrg }
2618 1.1 mrg
2619 1.1 mrg DLSYM (get_name);
2620 1.1 mrg DLSYM (get_caps);
2621 1.1 mrg DLSYM (get_type);
2622 1.1 mrg DLSYM (get_num_devices);
2623 1.1 mrg DLSYM (init_device);
2624 1.1 mrg DLSYM (fini_device);
2625 1.1 mrg DLSYM (load_image);
2626 1.1 mrg DLSYM (unload_image);
2627 1.1 mrg DLSYM (alloc);
2628 1.1 mrg DLSYM (free);
2629 1.1 mrg DLSYM (dev2host);
2630 1.1 mrg DLSYM (host2dev);
2631 1.1 mrg device->capabilities = device->get_caps_func ();
2632 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2633 1.1.1.2 mrg {
2634 1.1.1.2 mrg DLSYM (run);
2635 1.1.1.2 mrg DLSYM (async_run);
2636 1.1.1.2 mrg DLSYM_OPT (can_run, can_run);
2637 1.1.1.2 mrg DLSYM (dev2dev);
2638 1.1.1.2 mrg }
2639 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2640 1.1 mrg {
2641 1.1.1.3 mrg if (!DLSYM_OPT (openacc.exec, openacc_exec)
2642 1.1.1.2 mrg || !DLSYM_OPT (openacc.register_async_cleanup,
2643 1.1.1.2 mrg openacc_register_async_cleanup)
2644 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2645 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2646 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2647 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2648 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2649 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_all_async,
2650 1.1.1.2 mrg openacc_async_wait_all_async)
2651 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2652 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data,
2653 1.1.1.2 mrg openacc_create_thread_data)
2654 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data,
2655 1.1.1.2 mrg openacc_destroy_thread_data))
2656 1.1 mrg {
2657 1.1.1.2 mrg /* Require all the OpenACC handlers if we have
2658 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
2659 1.1 mrg err = "plugin missing OpenACC handler function";
2660 1.1.1.2 mrg goto fail;
2661 1.1 mrg }
2662 1.1.1.2 mrg
2663 1.1.1.2 mrg unsigned cuda = 0;
2664 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2665 1.1.1.3 mrg openacc_cuda_get_current_device);
2666 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2667 1.1.1.3 mrg openacc_cuda_get_current_context);
2668 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2669 1.1.1.3 mrg cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2670 1.1.1.2 mrg if (cuda && cuda != 4)
2671 1.1 mrg {
2672 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */
2673 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
2674 1.1.1.2 mrg goto fail;
2675 1.1 mrg }
2676 1.1 mrg }
2677 1.1 mrg #undef DLSYM
2678 1.1 mrg #undef DLSYM_OPT
2679 1.1 mrg
2680 1.1.1.2 mrg return 1;
2681 1.1.1.2 mrg
2682 1.1.1.2 mrg dl_fail:
2683 1.1.1.2 mrg err = dlerror ();
2684 1.1.1.2 mrg fail:
2685 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err);
2686 1.1.1.2 mrg if (last_missing)
2687 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing);
2688 1.1.1.2 mrg if (plugin_handle)
2689 1.1.1.2 mrg dlclose (plugin_handle);
2690 1.1.1.2 mrg
2691 1.1.1.2 mrg return 0;
2692 1.1.1.2 mrg }
2693 1.1.1.2 mrg
2694 1.1.1.2 mrg /* This function finalizes all initialized devices. */
2695 1.1.1.2 mrg
2696 1.1.1.2 mrg static void
2697 1.1.1.2 mrg gomp_target_fini (void)
2698 1.1.1.2 mrg {
2699 1.1.1.2 mrg int i;
2700 1.1.1.2 mrg for (i = 0; i < num_devices; i++)
2701 1.1 mrg {
2702 1.1.1.3 mrg bool ret = true;
2703 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i];
2704 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2705 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
2706 1.1.1.2 mrg {
2707 1.1.1.3 mrg ret = devicep->fini_device_func (devicep->target_id);
2708 1.1.1.2 mrg devicep->state = GOMP_DEVICE_FINALIZED;
2709 1.1.1.2 mrg }
2710 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2711 1.1.1.3 mrg if (!ret)
2712 1.1.1.3 mrg gomp_fatal ("device finalization failed");
2713 1.1 mrg }
2714 1.1 mrg }
2715 1.1 mrg
2716 1.1.1.7 mrg /* This function initializes the runtime for offloading.
2717 1.1.1.7 mrg It parses the list of offload plugins, and tries to load these.
2718 1.1.1.7 mrg On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2719 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
2720 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2721 1.1 mrg by the others. */
2722 1.1 mrg
2723 1.1 mrg static void
2724 1.1 mrg gomp_target_init (void)
2725 1.1 mrg {
2726 1.1 mrg const char *prefix ="libgomp-plugin-";
2727 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
2728 1.1 mrg const char *cur, *next;
2729 1.1 mrg char *plugin_name;
2730 1.1 mrg int i, new_num_devices;
2731 1.1 mrg
2732 1.1 mrg num_devices = 0;
2733 1.1 mrg devices = NULL;
2734 1.1 mrg
2735 1.1.1.7 mrg cur = OFFLOAD_PLUGINS;
2736 1.1 mrg if (*cur)
2737 1.1 mrg do
2738 1.1 mrg {
2739 1.1 mrg struct gomp_device_descr current_device;
2740 1.1.1.6 mrg size_t prefix_len, suffix_len, cur_len;
2741 1.1 mrg
2742 1.1 mrg next = strchr (cur, ',');
2743 1.1 mrg
2744 1.1.1.6 mrg prefix_len = strlen (prefix);
2745 1.1.1.6 mrg cur_len = next ? next - cur : strlen (cur);
2746 1.1.1.6 mrg suffix_len = strlen (suffix);
2747 1.1.1.6 mrg
2748 1.1.1.6 mrg plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2749 1.1 mrg if (!plugin_name)
2750 1.1 mrg {
2751 1.1 mrg num_devices = 0;
2752 1.1 mrg break;
2753 1.1 mrg }
2754 1.1 mrg
2755 1.1.1.6 mrg memcpy (plugin_name, prefix, prefix_len);
2756 1.1.1.6 mrg memcpy (plugin_name + prefix_len, cur, cur_len);
2757 1.1.1.6 mrg memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2758 1.1 mrg
2759 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
2760 1.1 mrg {
2761 1.1 mrg new_num_devices = current_device.get_num_devices_func ();
2762 1.1 mrg if (new_num_devices >= 1)
2763 1.1 mrg {
2764 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
2765 1.1 mrg
2766 1.1 mrg devices = realloc (devices, (num_devices + new_num_devices)
2767 1.1 mrg * sizeof (struct gomp_device_descr));
2768 1.1 mrg if (!devices)
2769 1.1 mrg {
2770 1.1 mrg num_devices = 0;
2771 1.1 mrg free (plugin_name);
2772 1.1 mrg break;
2773 1.1 mrg }
2774 1.1 mrg
2775 1.1 mrg current_device.name = current_device.get_name_func ();
2776 1.1 mrg /* current_device.capabilities has already been set. */
2777 1.1 mrg current_device.type = current_device.get_type_func ();
2778 1.1 mrg current_device.mem_map.root = NULL;
2779 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED;
2780 1.1 mrg current_device.openacc.data_environ = NULL;
2781 1.1 mrg for (i = 0; i < new_num_devices; i++)
2782 1.1 mrg {
2783 1.1 mrg current_device.target_id = i;
2784 1.1 mrg devices[num_devices] = current_device;
2785 1.1 mrg gomp_mutex_init (&devices[num_devices].lock);
2786 1.1 mrg num_devices++;
2787 1.1 mrg }
2788 1.1 mrg }
2789 1.1 mrg }
2790 1.1 mrg
2791 1.1 mrg free (plugin_name);
2792 1.1 mrg cur = next + 1;
2793 1.1 mrg }
2794 1.1 mrg while (next);
2795 1.1 mrg
2796 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2797 1.1 mrg NUM_DEVICES_OPENMP. */
2798 1.1 mrg struct gomp_device_descr *devices_s
2799 1.1 mrg = malloc (num_devices * sizeof (struct gomp_device_descr));
2800 1.1 mrg if (!devices_s)
2801 1.1 mrg {
2802 1.1 mrg num_devices = 0;
2803 1.1 mrg free (devices);
2804 1.1 mrg devices = NULL;
2805 1.1 mrg }
2806 1.1 mrg num_devices_openmp = 0;
2807 1.1 mrg for (i = 0; i < num_devices; i++)
2808 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2809 1.1 mrg devices_s[num_devices_openmp++] = devices[i];
2810 1.1 mrg int num_devices_after_openmp = num_devices_openmp;
2811 1.1 mrg for (i = 0; i < num_devices; i++)
2812 1.1 mrg if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2813 1.1 mrg devices_s[num_devices_after_openmp++] = devices[i];
2814 1.1 mrg free (devices);
2815 1.1 mrg devices = devices_s;
2816 1.1 mrg
2817 1.1 mrg for (i = 0; i < num_devices; i++)
2818 1.1 mrg {
2819 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
2820 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
2821 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
2822 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2823 1.1 mrg goacc_register (&devices[i]);
2824 1.1 mrg }
2825 1.1.1.2 mrg
2826 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0)
2827 1.1.1.2 mrg gomp_fatal ("atexit failed");
2828 1.1 mrg }
2829 1.1 mrg
2830 1.1 mrg #else /* PLUGIN_SUPPORT */
2831 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
2832 1.1 mrg GOMP_target* routines are just stubs for this case. */
2833 1.1 mrg static void
2834 1.1 mrg gomp_target_init (void)
2835 1.1 mrg {
2836 1.1 mrg }
2837 1.1 mrg #endif /* PLUGIN_SUPPORT */
2838