target.c revision 1.1.1.2.2.1 1 1.1.1.2.2.1 pgoyette /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "config.h"
29 1.1 mrg #include "libgomp.h"
30 1.1 mrg #include "oacc-plugin.h"
31 1.1 mrg #include "oacc-int.h"
32 1.1 mrg #include "gomp-constants.h"
33 1.1 mrg #include <limits.h>
34 1.1 mrg #include <stdbool.h>
35 1.1 mrg #include <stdlib.h>
36 1.1 mrg #ifdef HAVE_INTTYPES_H
37 1.1 mrg # include <inttypes.h> /* For PRIu64. */
38 1.1 mrg #endif
39 1.1 mrg #include <string.h>
40 1.1 mrg #include <assert.h>
41 1.1.1.2 mrg #include <errno.h>
42 1.1 mrg
43 1.1 mrg #ifdef PLUGIN_SUPPORT
44 1.1 mrg #include <dlfcn.h>
45 1.1 mrg #include "plugin-suffix.h"
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg static void gomp_target_init (void);
49 1.1 mrg
50 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
51 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 1.1 mrg
53 1.1 mrg /* Mutex for offload image registration. */
54 1.1 mrg static gomp_mutex_t register_lock;
55 1.1 mrg
56 1.1 mrg /* This structure describes an offload image.
57 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
58 1.1 mrg pointer to target data. */
59 1.1 mrg struct offload_image_descr {
60 1.1.1.2 mrg unsigned version;
61 1.1 mrg enum offload_target_type type;
62 1.1.1.2 mrg const void *host_table;
63 1.1.1.2 mrg const void *target_data;
64 1.1 mrg };
65 1.1 mrg
66 1.1 mrg /* Array of descriptors of offload images. */
67 1.1 mrg static struct offload_image_descr *offload_images;
68 1.1 mrg
69 1.1 mrg /* Total number of offload images. */
70 1.1 mrg static int num_offload_images;
71 1.1 mrg
72 1.1 mrg /* Array of descriptors for all available devices. */
73 1.1 mrg static struct gomp_device_descr *devices;
74 1.1 mrg
75 1.1 mrg /* Total number of available devices. */
76 1.1 mrg static int num_devices;
77 1.1 mrg
78 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 1.1 mrg static int num_devices_openmp;
80 1.1 mrg
81 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82 1.1 mrg
83 1.1 mrg static void *
84 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
85 1.1 mrg {
86 1.1 mrg void *ret = realloc (old, size);
87 1.1 mrg if (ret == NULL)
88 1.1 mrg {
89 1.1 mrg gomp_mutex_unlock (®ister_lock);
90 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 1.1 mrg }
92 1.1 mrg return ret;
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg attribute_hidden void
96 1.1 mrg gomp_init_targets_once (void)
97 1.1 mrg {
98 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg attribute_hidden int
102 1.1 mrg gomp_get_num_devices (void)
103 1.1 mrg {
104 1.1 mrg gomp_init_targets_once ();
105 1.1 mrg return num_devices_openmp;
106 1.1 mrg }
107 1.1 mrg
108 1.1 mrg static struct gomp_device_descr *
109 1.1 mrg resolve_device (int device_id)
110 1.1 mrg {
111 1.1 mrg if (device_id == GOMP_DEVICE_ICV)
112 1.1 mrg {
113 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
114 1.1 mrg device_id = icv->default_device_var;
115 1.1 mrg }
116 1.1 mrg
117 1.1 mrg if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 1.1 mrg return NULL;
119 1.1 mrg
120 1.1.1.2 mrg gomp_mutex_lock (&devices[device_id].lock);
121 1.1.1.2 mrg if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 1.1.1.2 mrg gomp_init_device (&devices[device_id]);
123 1.1.1.2 mrg else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 1.1.1.2 mrg {
125 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
126 1.1.1.2 mrg return NULL;
127 1.1.1.2 mrg }
128 1.1.1.2 mrg gomp_mutex_unlock (&devices[device_id].lock);
129 1.1.1.2 mrg
130 1.1 mrg return &devices[device_id];
131 1.1 mrg }
132 1.1 mrg
133 1.1 mrg
134 1.1.1.2 mrg static inline splay_tree_key
135 1.1.1.2 mrg gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 1.1.1.2 mrg {
137 1.1.1.2 mrg if (key->host_start != key->host_end)
138 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
139 1.1.1.2 mrg
140 1.1.1.2 mrg key->host_end++;
141 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
142 1.1.1.2 mrg key->host_end--;
143 1.1.1.2 mrg if (n)
144 1.1.1.2 mrg return n;
145 1.1.1.2 mrg key->host_start--;
146 1.1.1.2 mrg n = splay_tree_lookup (mem_map, key);
147 1.1.1.2 mrg key->host_start++;
148 1.1.1.2 mrg if (n)
149 1.1.1.2 mrg return n;
150 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
151 1.1.1.2 mrg }
152 1.1.1.2 mrg
153 1.1.1.2 mrg static inline splay_tree_key
154 1.1.1.2 mrg gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 1.1.1.2 mrg {
156 1.1.1.2 mrg if (key->host_start != key->host_end)
157 1.1.1.2 mrg return splay_tree_lookup (mem_map, key);
158 1.1.1.2 mrg
159 1.1.1.2 mrg key->host_end++;
160 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, key);
161 1.1.1.2 mrg key->host_end--;
162 1.1.1.2 mrg return n;
163 1.1.1.2 mrg }
164 1.1.1.2 mrg
165 1.1.1.2.2.1 pgoyette static inline void
166 1.1.1.2.2.1 pgoyette gomp_device_copy (struct gomp_device_descr *devicep,
167 1.1.1.2.2.1 pgoyette bool (*copy_func) (int, void *, const void *, size_t),
168 1.1.1.2.2.1 pgoyette const char *dst, void *dstaddr,
169 1.1.1.2.2.1 pgoyette const char *src, const void *srcaddr,
170 1.1.1.2.2.1 pgoyette size_t size)
171 1.1.1.2.2.1 pgoyette {
172 1.1.1.2.2.1 pgoyette if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 1.1.1.2.2.1 pgoyette {
174 1.1.1.2.2.1 pgoyette gomp_mutex_unlock (&devicep->lock);
175 1.1.1.2.2.1 pgoyette gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 1.1.1.2.2.1 pgoyette src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177 1.1.1.2.2.1 pgoyette }
178 1.1.1.2.2.1 pgoyette }
179 1.1.1.2.2.1 pgoyette
180 1.1.1.2.2.1 pgoyette static void
181 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (struct gomp_device_descr *devicep,
182 1.1.1.2.2.1 pgoyette void *d, const void *h, size_t sz)
183 1.1.1.2.2.1 pgoyette {
184 1.1.1.2.2.1 pgoyette gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
185 1.1.1.2.2.1 pgoyette }
186 1.1.1.2.2.1 pgoyette
187 1.1.1.2.2.1 pgoyette static void
188 1.1.1.2.2.1 pgoyette gomp_copy_dev2host (struct gomp_device_descr *devicep,
189 1.1.1.2.2.1 pgoyette void *h, const void *d, size_t sz)
190 1.1.1.2.2.1 pgoyette {
191 1.1.1.2.2.1 pgoyette gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
192 1.1.1.2.2.1 pgoyette }
193 1.1.1.2.2.1 pgoyette
194 1.1.1.2.2.1 pgoyette static void
195 1.1.1.2.2.1 pgoyette gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
196 1.1.1.2.2.1 pgoyette {
197 1.1.1.2.2.1 pgoyette if (!devicep->free_func (devicep->target_id, devptr))
198 1.1.1.2.2.1 pgoyette {
199 1.1.1.2.2.1 pgoyette gomp_mutex_unlock (&devicep->lock);
200 1.1.1.2.2.1 pgoyette gomp_fatal ("error in freeing device memory block at %p", devptr);
201 1.1.1.2.2.1 pgoyette }
202 1.1.1.2.2.1 pgoyette }
203 1.1.1.2.2.1 pgoyette
204 1.1.1.2 mrg /* Handle the case where gomp_map_lookup, splay_tree_lookup or
205 1.1.1.2 mrg gomp_map_0len_lookup found oldn for newn.
206 1.1 mrg Helper function of gomp_map_vars. */
207 1.1 mrg
208 1.1 mrg static inline void
209 1.1 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
210 1.1.1.2 mrg splay_tree_key newn, struct target_var_desc *tgt_var,
211 1.1.1.2 mrg unsigned char kind)
212 1.1 mrg {
213 1.1.1.2 mrg tgt_var->key = oldn;
214 1.1.1.2 mrg tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
215 1.1.1.2 mrg tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
216 1.1.1.2 mrg tgt_var->offset = newn->host_start - oldn->host_start;
217 1.1.1.2 mrg tgt_var->length = newn->host_end - newn->host_start;
218 1.1.1.2 mrg
219 1.1 mrg if ((kind & GOMP_MAP_FLAG_FORCE)
220 1.1 mrg || oldn->host_start > newn->host_start
221 1.1 mrg || oldn->host_end < newn->host_end)
222 1.1 mrg {
223 1.1 mrg gomp_mutex_unlock (&devicep->lock);
224 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
225 1.1 mrg "[%p..%p) is already mapped",
226 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
227 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
228 1.1 mrg }
229 1.1.1.2 mrg
230 1.1.1.2 mrg if (GOMP_MAP_ALWAYS_TO_P (kind))
231 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
232 1.1.1.2.2.1 pgoyette (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
233 1.1.1.2.2.1 pgoyette + newn->host_start - oldn->host_start),
234 1.1.1.2.2.1 pgoyette (void *) newn->host_start,
235 1.1.1.2.2.1 pgoyette newn->host_end - newn->host_start);
236 1.1.1.2.2.1 pgoyette
237 1.1.1.2 mrg if (oldn->refcount != REFCOUNT_INFINITY)
238 1.1.1.2 mrg oldn->refcount++;
239 1.1 mrg }
240 1.1 mrg
241 1.1 mrg static int
242 1.1.1.2 mrg get_kind (bool short_mapkind, void *kinds, int idx)
243 1.1.1.2 mrg {
244 1.1.1.2 mrg return short_mapkind ? ((unsigned short *) kinds)[idx]
245 1.1.1.2 mrg : ((unsigned char *) kinds)[idx];
246 1.1.1.2 mrg }
247 1.1.1.2 mrg
248 1.1.1.2 mrg static void
249 1.1.1.2 mrg gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
250 1.1.1.2 mrg uintptr_t target_offset, uintptr_t bias)
251 1.1.1.2 mrg {
252 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
253 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
254 1.1.1.2 mrg struct splay_tree_key_s cur_node;
255 1.1.1.2 mrg
256 1.1.1.2 mrg cur_node.host_start = host_ptr;
257 1.1.1.2 mrg if (cur_node.host_start == (uintptr_t) NULL)
258 1.1.1.2 mrg {
259 1.1.1.2 mrg cur_node.tgt_offset = (uintptr_t) NULL;
260 1.1.1.2 mrg /* FIXME: see comment about coalescing host/dev transfers below. */
261 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
262 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start + target_offset),
263 1.1.1.2.2.1 pgoyette (void *) &cur_node.tgt_offset,
264 1.1.1.2.2.1 pgoyette sizeof (void *));
265 1.1.1.2 mrg return;
266 1.1.1.2 mrg }
267 1.1.1.2 mrg /* Add bias to the pointer value. */
268 1.1.1.2 mrg cur_node.host_start += bias;
269 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
270 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
271 1.1.1.2 mrg if (n == NULL)
272 1.1.1.2 mrg {
273 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
274 1.1.1.2 mrg gomp_fatal ("Pointer target of array section wasn't mapped");
275 1.1.1.2 mrg }
276 1.1.1.2 mrg cur_node.host_start -= n->host_start;
277 1.1.1.2 mrg cur_node.tgt_offset
278 1.1.1.2 mrg = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
279 1.1.1.2 mrg /* At this point tgt_offset is target address of the
280 1.1.1.2 mrg array section. Now subtract bias to get what we want
281 1.1.1.2 mrg to initialize the pointer with. */
282 1.1.1.2 mrg cur_node.tgt_offset -= bias;
283 1.1.1.2 mrg /* FIXME: see comment about coalescing host/dev transfers below. */
284 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
285 1.1.1.2.2.1 pgoyette (void *) &cur_node.tgt_offset, sizeof (void *));
286 1.1.1.2 mrg }
287 1.1.1.2 mrg
288 1.1.1.2 mrg static void
289 1.1.1.2 mrg gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
290 1.1.1.2 mrg size_t first, size_t i, void **hostaddrs,
291 1.1.1.2 mrg size_t *sizes, void *kinds)
292 1.1 mrg {
293 1.1.1.2 mrg struct gomp_device_descr *devicep = tgt->device_descr;
294 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
295 1.1.1.2 mrg struct splay_tree_key_s cur_node;
296 1.1.1.2 mrg int kind;
297 1.1.1.2 mrg const bool short_mapkind = true;
298 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
299 1.1.1.2 mrg
300 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
301 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
302 1.1.1.2 mrg splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
303 1.1.1.2 mrg kind = get_kind (short_mapkind, kinds, i);
304 1.1.1.2 mrg if (n2
305 1.1.1.2 mrg && n2->tgt == n->tgt
306 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
307 1.1.1.2 mrg {
308 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node,
309 1.1.1.2 mrg &tgt->list[i], kind & typemask);
310 1.1.1.2 mrg return;
311 1.1.1.2 mrg }
312 1.1.1.2 mrg if (sizes[i] == 0)
313 1.1.1.2 mrg {
314 1.1.1.2 mrg if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
315 1.1.1.2 mrg {
316 1.1.1.2 mrg cur_node.host_start--;
317 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
318 1.1.1.2 mrg cur_node.host_start++;
319 1.1.1.2 mrg if (n2
320 1.1.1.2 mrg && n2->tgt == n->tgt
321 1.1.1.2 mrg && n2->host_start - n->host_start
322 1.1.1.2 mrg == n2->tgt_offset - n->tgt_offset)
323 1.1.1.2 mrg {
324 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
325 1.1.1.2 mrg kind & typemask);
326 1.1.1.2 mrg return;
327 1.1.1.2 mrg }
328 1.1.1.2 mrg }
329 1.1.1.2 mrg cur_node.host_end++;
330 1.1.1.2 mrg n2 = splay_tree_lookup (mem_map, &cur_node);
331 1.1.1.2 mrg cur_node.host_end--;
332 1.1.1.2 mrg if (n2
333 1.1.1.2 mrg && n2->tgt == n->tgt
334 1.1.1.2 mrg && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
335 1.1.1.2 mrg {
336 1.1.1.2 mrg gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
337 1.1.1.2 mrg kind & typemask);
338 1.1.1.2 mrg return;
339 1.1.1.2 mrg }
340 1.1.1.2 mrg }
341 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
342 1.1.1.2 mrg gomp_fatal ("Trying to map into device [%p..%p) structure element when "
343 1.1.1.2 mrg "other mapped elements from the same structure weren't mapped "
344 1.1.1.2 mrg "together with it", (void *) cur_node.host_start,
345 1.1.1.2 mrg (void *) cur_node.host_end);
346 1.1.1.2 mrg }
347 1.1.1.2 mrg
348 1.1.1.2 mrg static inline uintptr_t
349 1.1.1.2 mrg gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
350 1.1.1.2 mrg {
351 1.1.1.2 mrg if (tgt->list[i].key != NULL)
352 1.1.1.2 mrg return tgt->list[i].key->tgt->tgt_start
353 1.1.1.2 mrg + tgt->list[i].key->tgt_offset
354 1.1.1.2 mrg + tgt->list[i].offset;
355 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 0)
356 1.1.1.2 mrg return (uintptr_t) hostaddrs[i];
357 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 1)
358 1.1.1.2 mrg return 0;
359 1.1.1.2 mrg if (tgt->list[i].offset == ~(uintptr_t) 2)
360 1.1.1.2 mrg return tgt->list[i + 1].key->tgt->tgt_start
361 1.1.1.2 mrg + tgt->list[i + 1].key->tgt_offset
362 1.1.1.2 mrg + tgt->list[i + 1].offset
363 1.1.1.2 mrg + (uintptr_t) hostaddrs[i]
364 1.1.1.2 mrg - (uintptr_t) hostaddrs[i + 1];
365 1.1.1.2 mrg return tgt->tgt_start + tgt->list[i].offset;
366 1.1 mrg }
367 1.1 mrg
368 1.1 mrg attribute_hidden struct target_mem_desc *
369 1.1 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
370 1.1 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
371 1.1.1.2 mrg bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
372 1.1 mrg {
373 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
374 1.1.1.2 mrg bool has_firstprivate = false;
375 1.1.1.2 mrg const int rshift = short_mapkind ? 8 : 3;
376 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
377 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
378 1.1 mrg struct splay_tree_key_s cur_node;
379 1.1 mrg struct target_mem_desc *tgt
380 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
381 1.1 mrg tgt->list_count = mapnum;
382 1.1.1.2 mrg tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
383 1.1 mrg tgt->device_descr = devicep;
384 1.1 mrg
385 1.1 mrg if (mapnum == 0)
386 1.1.1.2 mrg {
387 1.1.1.2 mrg tgt->tgt_start = 0;
388 1.1.1.2 mrg tgt->tgt_end = 0;
389 1.1.1.2 mrg return tgt;
390 1.1.1.2 mrg }
391 1.1 mrg
392 1.1 mrg tgt_align = sizeof (void *);
393 1.1 mrg tgt_size = 0;
394 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
395 1.1 mrg {
396 1.1 mrg size_t align = 4 * sizeof (void *);
397 1.1 mrg tgt_align = align;
398 1.1 mrg tgt_size = mapnum * sizeof (void *);
399 1.1 mrg }
400 1.1 mrg
401 1.1 mrg gomp_mutex_lock (&devicep->lock);
402 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
403 1.1.1.2 mrg {
404 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
405 1.1.1.2 mrg free (tgt);
406 1.1.1.2 mrg return NULL;
407 1.1.1.2 mrg }
408 1.1 mrg
409 1.1 mrg for (i = 0; i < mapnum; i++)
410 1.1 mrg {
411 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
412 1.1.1.2 mrg if (hostaddrs[i] == NULL
413 1.1.1.2 mrg || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
414 1.1.1.2 mrg {
415 1.1.1.2 mrg tgt->list[i].key = NULL;
416 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 0;
417 1.1.1.2 mrg continue;
418 1.1.1.2 mrg }
419 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
420 1.1.1.2 mrg {
421 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
422 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
423 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
424 1.1.1.2 mrg if (n == NULL)
425 1.1.1.2 mrg {
426 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
427 1.1.1.2 mrg gomp_fatal ("use_device_ptr pointer wasn't mapped");
428 1.1.1.2 mrg }
429 1.1.1.2 mrg cur_node.host_start -= n->host_start;
430 1.1.1.2 mrg hostaddrs[i]
431 1.1.1.2 mrg = (void *) (n->tgt->tgt_start + n->tgt_offset
432 1.1.1.2 mrg + cur_node.host_start);
433 1.1.1.2 mrg tgt->list[i].key = NULL;
434 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 0;
435 1.1.1.2 mrg continue;
436 1.1.1.2 mrg }
437 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_STRUCT)
438 1.1.1.2 mrg {
439 1.1.1.2 mrg size_t first = i + 1;
440 1.1.1.2 mrg size_t last = i + sizes[i];
441 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
442 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
443 1.1.1.2 mrg + sizes[last];
444 1.1.1.2 mrg tgt->list[i].key = NULL;
445 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 2;
446 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
447 1.1.1.2 mrg if (n == NULL)
448 1.1.1.2 mrg {
449 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
450 1.1.1.2 mrg if (tgt_align < align)
451 1.1.1.2 mrg tgt_align = align;
452 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
453 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
454 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
455 1.1.1.2 mrg tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
456 1.1.1.2 mrg not_found_cnt += last - i;
457 1.1.1.2 mrg for (i = first; i <= last; i++)
458 1.1.1.2 mrg tgt->list[i].key = NULL;
459 1.1.1.2 mrg i--;
460 1.1.1.2 mrg continue;
461 1.1.1.2 mrg }
462 1.1.1.2 mrg for (i = first; i <= last; i++)
463 1.1.1.2 mrg gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
464 1.1.1.2 mrg sizes, kinds);
465 1.1.1.2 mrg i--;
466 1.1.1.2 mrg continue;
467 1.1.1.2 mrg }
468 1.1.1.2 mrg else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
469 1.1 mrg {
470 1.1.1.2 mrg tgt->list[i].key = NULL;
471 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 1;
472 1.1.1.2 mrg has_firstprivate = true;
473 1.1 mrg continue;
474 1.1 mrg }
475 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
476 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
477 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
478 1.1 mrg else
479 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
480 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
481 1.1.1.2 mrg {
482 1.1.1.2 mrg tgt->list[i].key = NULL;
483 1.1.1.2 mrg
484 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
485 1.1.1.2 mrg if (tgt_align < align)
486 1.1.1.2 mrg tgt_align = align;
487 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
488 1.1.1.2 mrg tgt_size += cur_node.host_end - cur_node.host_start;
489 1.1.1.2 mrg has_firstprivate = true;
490 1.1.1.2 mrg continue;
491 1.1.1.2 mrg }
492 1.1.1.2 mrg splay_tree_key n;
493 1.1.1.2 mrg if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
494 1.1 mrg {
495 1.1.1.2 mrg n = gomp_map_0len_lookup (mem_map, &cur_node);
496 1.1.1.2 mrg if (!n)
497 1.1.1.2 mrg {
498 1.1.1.2 mrg tgt->list[i].key = NULL;
499 1.1.1.2 mrg tgt->list[i].offset = ~(uintptr_t) 1;
500 1.1.1.2 mrg continue;
501 1.1.1.2 mrg }
502 1.1 mrg }
503 1.1 mrg else
504 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
505 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
506 1.1.1.2 mrg gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
507 1.1.1.2 mrg kind & typemask);
508 1.1.1.2 mrg else
509 1.1 mrg {
510 1.1.1.2 mrg tgt->list[i].key = NULL;
511 1.1 mrg
512 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
513 1.1 mrg not_found_cnt++;
514 1.1 mrg if (tgt_align < align)
515 1.1 mrg tgt_align = align;
516 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
517 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
518 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
519 1.1 mrg {
520 1.1 mrg size_t j;
521 1.1 mrg for (j = i + 1; j < mapnum; j++)
522 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
523 1.1 mrg & typemask))
524 1.1 mrg break;
525 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
526 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
527 1.1 mrg > cur_node.host_end))
528 1.1 mrg break;
529 1.1 mrg else
530 1.1 mrg {
531 1.1.1.2 mrg tgt->list[j].key = NULL;
532 1.1 mrg i++;
533 1.1 mrg }
534 1.1 mrg }
535 1.1 mrg }
536 1.1 mrg }
537 1.1 mrg
538 1.1 mrg if (devaddrs)
539 1.1 mrg {
540 1.1 mrg if (mapnum != 1)
541 1.1 mrg {
542 1.1 mrg gomp_mutex_unlock (&devicep->lock);
543 1.1 mrg gomp_fatal ("unexpected aggregation");
544 1.1 mrg }
545 1.1 mrg tgt->to_free = devaddrs[0];
546 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
547 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
548 1.1 mrg }
549 1.1.1.2 mrg else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
550 1.1 mrg {
551 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
552 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
553 1.1 mrg memory. */
554 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
555 1.1 mrg tgt_size + tgt_align - 1);
556 1.1.1.2.2.1 pgoyette if (!tgt->to_free)
557 1.1.1.2.2.1 pgoyette {
558 1.1.1.2.2.1 pgoyette gomp_mutex_unlock (&devicep->lock);
559 1.1.1.2.2.1 pgoyette gomp_fatal ("device memory allocation fail");
560 1.1.1.2.2.1 pgoyette }
561 1.1.1.2.2.1 pgoyette
562 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
563 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
564 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
565 1.1 mrg }
566 1.1 mrg else
567 1.1 mrg {
568 1.1 mrg tgt->to_free = NULL;
569 1.1 mrg tgt->tgt_start = 0;
570 1.1 mrg tgt->tgt_end = 0;
571 1.1 mrg }
572 1.1 mrg
573 1.1 mrg tgt_size = 0;
574 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
575 1.1 mrg tgt_size = mapnum * sizeof (void *);
576 1.1 mrg
577 1.1 mrg tgt->array = NULL;
578 1.1.1.2 mrg if (not_found_cnt || has_firstprivate)
579 1.1 mrg {
580 1.1.1.2 mrg if (not_found_cnt)
581 1.1.1.2 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
582 1.1 mrg splay_tree_node array = tgt->array;
583 1.1.1.2 mrg size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
584 1.1.1.2 mrg uintptr_t field_tgt_base = 0;
585 1.1 mrg
586 1.1 mrg for (i = 0; i < mapnum; i++)
587 1.1.1.2 mrg if (tgt->list[i].key == NULL)
588 1.1 mrg {
589 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
590 1.1 mrg if (hostaddrs[i] == NULL)
591 1.1 mrg continue;
592 1.1.1.2 mrg switch (kind & typemask)
593 1.1.1.2 mrg {
594 1.1.1.2 mrg size_t align, len, first, last;
595 1.1.1.2 mrg splay_tree_key n;
596 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE:
597 1.1.1.2 mrg align = (size_t) 1 << (kind >> rshift);
598 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
599 1.1.1.2 mrg tgt->list[i].offset = tgt_size;
600 1.1.1.2 mrg len = sizes[i];
601 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
602 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start + tgt_size),
603 1.1.1.2.2.1 pgoyette (void *) hostaddrs[i], len);
604 1.1.1.2 mrg tgt_size += len;
605 1.1.1.2 mrg continue;
606 1.1.1.2 mrg case GOMP_MAP_FIRSTPRIVATE_INT:
607 1.1.1.2 mrg case GOMP_MAP_USE_DEVICE_PTR:
608 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
609 1.1.1.2 mrg continue;
610 1.1.1.2 mrg case GOMP_MAP_STRUCT:
611 1.1.1.2 mrg first = i + 1;
612 1.1.1.2 mrg last = i + sizes[i];
613 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
614 1.1.1.2 mrg cur_node.host_end = (uintptr_t) hostaddrs[last]
615 1.1.1.2 mrg + sizes[last];
616 1.1.1.2 mrg if (tgt->list[first].key != NULL)
617 1.1.1.2 mrg continue;
618 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
619 1.1.1.2 mrg if (n == NULL)
620 1.1.1.2 mrg {
621 1.1.1.2 mrg size_t align = (size_t) 1 << (kind >> rshift);
622 1.1.1.2 mrg tgt_size -= (uintptr_t) hostaddrs[first]
623 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
624 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
625 1.1.1.2 mrg tgt_size += (uintptr_t) hostaddrs[first]
626 1.1.1.2 mrg - (uintptr_t) hostaddrs[i];
627 1.1.1.2 mrg field_tgt_base = (uintptr_t) hostaddrs[first];
628 1.1.1.2 mrg field_tgt_offset = tgt_size;
629 1.1.1.2 mrg field_tgt_clear = last;
630 1.1.1.2 mrg tgt_size += cur_node.host_end
631 1.1.1.2 mrg - (uintptr_t) hostaddrs[first];
632 1.1.1.2 mrg continue;
633 1.1.1.2 mrg }
634 1.1.1.2 mrg for (i = first; i <= last; i++)
635 1.1.1.2 mrg gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
636 1.1.1.2 mrg sizes, kinds);
637 1.1.1.2 mrg i--;
638 1.1.1.2 mrg continue;
639 1.1.1.2 mrg case GOMP_MAP_ALWAYS_POINTER:
640 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
641 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
642 1.1.1.2 mrg n = splay_tree_lookup (mem_map, &cur_node);
643 1.1.1.2 mrg if (n == NULL
644 1.1.1.2 mrg || n->host_start > cur_node.host_start
645 1.1.1.2 mrg || n->host_end < cur_node.host_end)
646 1.1.1.2 mrg {
647 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
648 1.1.1.2 mrg gomp_fatal ("always pointer not mapped");
649 1.1.1.2 mrg }
650 1.1.1.2 mrg if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
651 1.1.1.2 mrg != GOMP_MAP_ALWAYS_POINTER)
652 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
653 1.1.1.2 mrg if (cur_node.tgt_offset)
654 1.1.1.2 mrg cur_node.tgt_offset -= sizes[i];
655 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
656 1.1.1.2.2.1 pgoyette (void *) (n->tgt->tgt_start
657 1.1.1.2.2.1 pgoyette + n->tgt_offset
658 1.1.1.2.2.1 pgoyette + cur_node.host_start
659 1.1.1.2.2.1 pgoyette - n->host_start),
660 1.1.1.2.2.1 pgoyette (void *) &cur_node.tgt_offset,
661 1.1.1.2.2.1 pgoyette sizeof (void *));
662 1.1.1.2 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
663 1.1.1.2 mrg + cur_node.host_start - n->host_start;
664 1.1.1.2 mrg continue;
665 1.1.1.2 mrg default:
666 1.1.1.2 mrg break;
667 1.1.1.2 mrg }
668 1.1 mrg splay_tree_key k = &array->key;
669 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
670 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
671 1.1 mrg k->host_end = k->host_start + sizes[i];
672 1.1 mrg else
673 1.1 mrg k->host_end = k->host_start + sizeof (void *);
674 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
675 1.1.1.2 mrg if (n && n->refcount != REFCOUNT_LINK)
676 1.1.1.2 mrg gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
677 1.1.1.2 mrg kind & typemask);
678 1.1 mrg else
679 1.1 mrg {
680 1.1.1.2 mrg k->link_key = NULL;
681 1.1.1.2 mrg if (n && n->refcount == REFCOUNT_LINK)
682 1.1.1.2 mrg {
683 1.1.1.2 mrg /* Replace target address of the pointer with target address
684 1.1.1.2 mrg of mapped object in the splay tree. */
685 1.1.1.2 mrg splay_tree_remove (mem_map, n);
686 1.1.1.2 mrg k->link_key = n;
687 1.1.1.2 mrg }
688 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
689 1.1.1.2 mrg tgt->list[i].key = k;
690 1.1 mrg k->tgt = tgt;
691 1.1.1.2 mrg if (field_tgt_clear != ~(size_t) 0)
692 1.1.1.2 mrg {
693 1.1.1.2 mrg k->tgt_offset = k->host_start - field_tgt_base
694 1.1.1.2 mrg + field_tgt_offset;
695 1.1.1.2 mrg if (i == field_tgt_clear)
696 1.1.1.2 mrg field_tgt_clear = ~(size_t) 0;
697 1.1.1.2 mrg }
698 1.1.1.2 mrg else
699 1.1.1.2 mrg {
700 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
701 1.1.1.2 mrg k->tgt_offset = tgt_size;
702 1.1.1.2 mrg tgt_size += k->host_end - k->host_start;
703 1.1.1.2 mrg }
704 1.1.1.2 mrg tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
705 1.1.1.2 mrg tgt->list[i].always_copy_from
706 1.1.1.2 mrg = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
707 1.1.1.2 mrg tgt->list[i].offset = 0;
708 1.1.1.2 mrg tgt->list[i].length = k->host_end - k->host_start;
709 1.1 mrg k->refcount = 1;
710 1.1 mrg tgt->refcount++;
711 1.1 mrg array->left = NULL;
712 1.1 mrg array->right = NULL;
713 1.1 mrg splay_tree_insert (mem_map, array);
714 1.1 mrg switch (kind & typemask)
715 1.1 mrg {
716 1.1 mrg case GOMP_MAP_ALLOC:
717 1.1 mrg case GOMP_MAP_FROM:
718 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
719 1.1 mrg case GOMP_MAP_FORCE_FROM:
720 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
721 1.1 mrg break;
722 1.1 mrg case GOMP_MAP_TO:
723 1.1 mrg case GOMP_MAP_TOFROM:
724 1.1 mrg case GOMP_MAP_FORCE_TO:
725 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
726 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TO:
727 1.1.1.2 mrg case GOMP_MAP_ALWAYS_TOFROM:
728 1.1 mrg /* FIXME: Perhaps add some smarts, like if copying
729 1.1 mrg several adjacent fields from host to target, use some
730 1.1 mrg host buffer to avoid sending each var individually. */
731 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
732 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start
733 1.1.1.2.2.1 pgoyette + k->tgt_offset),
734 1.1.1.2.2.1 pgoyette (void *) k->host_start,
735 1.1.1.2.2.1 pgoyette k->host_end - k->host_start);
736 1.1 mrg break;
737 1.1 mrg case GOMP_MAP_POINTER:
738 1.1.1.2 mrg gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
739 1.1.1.2 mrg k->tgt_offset, sizes[i]);
740 1.1 mrg break;
741 1.1 mrg case GOMP_MAP_TO_PSET:
742 1.1 mrg /* FIXME: see above FIXME comment. */
743 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
744 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start
745 1.1.1.2.2.1 pgoyette + k->tgt_offset),
746 1.1.1.2.2.1 pgoyette (void *) k->host_start,
747 1.1.1.2.2.1 pgoyette k->host_end - k->host_start);
748 1.1 mrg
749 1.1 mrg for (j = i + 1; j < mapnum; j++)
750 1.1.1.2 mrg if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
751 1.1.1.2 mrg j)
752 1.1 mrg & typemask))
753 1.1 mrg break;
754 1.1 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
755 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
756 1.1 mrg > k->host_end))
757 1.1 mrg break;
758 1.1 mrg else
759 1.1 mrg {
760 1.1.1.2 mrg tgt->list[j].key = k;
761 1.1.1.2 mrg tgt->list[j].copy_from = false;
762 1.1.1.2 mrg tgt->list[j].always_copy_from = false;
763 1.1.1.2 mrg if (k->refcount != REFCOUNT_INFINITY)
764 1.1.1.2 mrg k->refcount++;
765 1.1.1.2 mrg gomp_map_pointer (tgt,
766 1.1.1.2 mrg (uintptr_t) *(void **) hostaddrs[j],
767 1.1.1.2 mrg k->tgt_offset
768 1.1.1.2 mrg + ((uintptr_t) hostaddrs[j]
769 1.1.1.2 mrg - k->host_start),
770 1.1.1.2 mrg sizes[j]);
771 1.1 mrg i++;
772 1.1 mrg }
773 1.1 mrg break;
774 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
775 1.1 mrg {
776 1.1 mrg /* We already looked up the memory region above and it
777 1.1 mrg was missing. */
778 1.1 mrg size_t size = k->host_end - k->host_start;
779 1.1 mrg gomp_mutex_unlock (&devicep->lock);
780 1.1 mrg #ifdef HAVE_INTTYPES_H
781 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
782 1.1 mrg "%"PRIu64" (0x%"PRIx64"))",
783 1.1 mrg (void *) k->host_start,
784 1.1 mrg (uint64_t) size, (uint64_t) size);
785 1.1 mrg #else
786 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
787 1.1 mrg "%lu (0x%lx))", (void *) k->host_start,
788 1.1 mrg (unsigned long) size, (unsigned long) size);
789 1.1 mrg #endif
790 1.1 mrg }
791 1.1 mrg break;
792 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
793 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
794 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
795 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start
796 1.1.1.2.2.1 pgoyette + k->tgt_offset),
797 1.1.1.2.2.1 pgoyette (void *) k->host_start,
798 1.1.1.2.2.1 pgoyette sizeof (void *));
799 1.1 mrg break;
800 1.1 mrg default:
801 1.1 mrg gomp_mutex_unlock (&devicep->lock);
802 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
803 1.1 mrg kind);
804 1.1 mrg }
805 1.1.1.2 mrg
806 1.1.1.2 mrg if (k->link_key)
807 1.1.1.2 mrg {
808 1.1.1.2 mrg /* Set link pointer on target to the device address of the
809 1.1.1.2 mrg mapped object. */
810 1.1.1.2 mrg void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
811 1.1.1.2 mrg devicep->host2dev_func (devicep->target_id,
812 1.1.1.2 mrg (void *) n->tgt_offset,
813 1.1.1.2 mrg &tgt_addr, sizeof (void *));
814 1.1.1.2 mrg }
815 1.1 mrg array++;
816 1.1 mrg }
817 1.1 mrg }
818 1.1 mrg }
819 1.1 mrg
820 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_TARGET)
821 1.1 mrg {
822 1.1 mrg for (i = 0; i < mapnum; i++)
823 1.1 mrg {
824 1.1.1.2 mrg cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
825 1.1 mrg /* FIXME: see above FIXME comment. */
826 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep,
827 1.1.1.2.2.1 pgoyette (void *) (tgt->tgt_start + i * sizeof (void *)),
828 1.1.1.2.2.1 pgoyette (void *) &cur_node.tgt_offset, sizeof (void *));
829 1.1 mrg }
830 1.1 mrg }
831 1.1 mrg
832 1.1.1.2 mrg /* If the variable from "omp target enter data" map-list was already mapped,
833 1.1.1.2 mrg tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
834 1.1.1.2 mrg gomp_exit_data. */
835 1.1.1.2 mrg if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
836 1.1.1.2 mrg {
837 1.1.1.2 mrg free (tgt);
838 1.1.1.2 mrg tgt = NULL;
839 1.1.1.2 mrg }
840 1.1.1.2 mrg
841 1.1 mrg gomp_mutex_unlock (&devicep->lock);
842 1.1 mrg return tgt;
843 1.1 mrg }
844 1.1 mrg
845 1.1 mrg static void
846 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
847 1.1 mrg {
848 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
849 1.1 mrg if (tgt->tgt_end)
850 1.1.1.2.2.1 pgoyette gomp_free_device_memory (tgt->device_descr, tgt->to_free);
851 1.1 mrg
852 1.1 mrg free (tgt->array);
853 1.1 mrg free (tgt);
854 1.1 mrg }
855 1.1 mrg
856 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
857 1.1 mrg variables back from device to host: if it is false, it is assumed that this
858 1.1.1.2.2.1 pgoyette has been done already. */
859 1.1 mrg
860 1.1 mrg attribute_hidden void
861 1.1 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
862 1.1 mrg {
863 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
864 1.1 mrg
865 1.1 mrg if (tgt->list_count == 0)
866 1.1 mrg {
867 1.1 mrg free (tgt);
868 1.1 mrg return;
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg gomp_mutex_lock (&devicep->lock);
872 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
873 1.1.1.2 mrg {
874 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
875 1.1.1.2 mrg free (tgt->array);
876 1.1.1.2 mrg free (tgt);
877 1.1.1.2 mrg return;
878 1.1.1.2 mrg }
879 1.1 mrg
880 1.1 mrg size_t i;
881 1.1 mrg for (i = 0; i < tgt->list_count; i++)
882 1.1.1.2 mrg {
883 1.1.1.2 mrg splay_tree_key k = tgt->list[i].key;
884 1.1.1.2 mrg if (k == NULL)
885 1.1.1.2 mrg continue;
886 1.1.1.2 mrg
887 1.1.1.2 mrg bool do_unmap = false;
888 1.1.1.2 mrg if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
889 1.1.1.2 mrg k->refcount--;
890 1.1.1.2 mrg else if (k->refcount == 1)
891 1.1.1.2 mrg {
892 1.1.1.2.2.1 pgoyette k->refcount--;
893 1.1.1.2.2.1 pgoyette do_unmap = true;
894 1.1.1.2 mrg }
895 1.1.1.2 mrg
896 1.1.1.2 mrg if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
897 1.1.1.2 mrg || tgt->list[i].always_copy_from)
898 1.1.1.2.2.1 pgoyette gomp_copy_dev2host (devicep,
899 1.1.1.2.2.1 pgoyette (void *) (k->host_start + tgt->list[i].offset),
900 1.1.1.2.2.1 pgoyette (void *) (k->tgt->tgt_start + k->tgt_offset
901 1.1.1.2.2.1 pgoyette + tgt->list[i].offset),
902 1.1.1.2.2.1 pgoyette tgt->list[i].length);
903 1.1.1.2 mrg if (do_unmap)
904 1.1.1.2 mrg {
905 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, k);
906 1.1.1.2 mrg if (k->link_key)
907 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map,
908 1.1.1.2 mrg (splay_tree_node) k->link_key);
909 1.1.1.2 mrg if (k->tgt->refcount > 1)
910 1.1.1.2 mrg k->tgt->refcount--;
911 1.1.1.2 mrg else
912 1.1.1.2 mrg gomp_unmap_tgt (k->tgt);
913 1.1.1.2 mrg }
914 1.1.1.2 mrg }
915 1.1 mrg
916 1.1 mrg if (tgt->refcount > 1)
917 1.1 mrg tgt->refcount--;
918 1.1 mrg else
919 1.1 mrg gomp_unmap_tgt (tgt);
920 1.1 mrg
921 1.1 mrg gomp_mutex_unlock (&devicep->lock);
922 1.1 mrg }
923 1.1 mrg
924 1.1 mrg static void
925 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
926 1.1.1.2 mrg size_t *sizes, void *kinds, bool short_mapkind)
927 1.1 mrg {
928 1.1 mrg size_t i;
929 1.1 mrg struct splay_tree_key_s cur_node;
930 1.1.1.2 mrg const int typemask = short_mapkind ? 0xff : 0x7;
931 1.1 mrg
932 1.1 mrg if (!devicep)
933 1.1 mrg return;
934 1.1 mrg
935 1.1 mrg if (mapnum == 0)
936 1.1 mrg return;
937 1.1 mrg
938 1.1 mrg gomp_mutex_lock (&devicep->lock);
939 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
940 1.1.1.2 mrg {
941 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
942 1.1.1.2 mrg return;
943 1.1.1.2 mrg }
944 1.1.1.2 mrg
945 1.1 mrg for (i = 0; i < mapnum; i++)
946 1.1 mrg if (sizes[i])
947 1.1 mrg {
948 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
949 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
950 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
951 1.1 mrg if (n)
952 1.1 mrg {
953 1.1.1.2 mrg int kind = get_kind (short_mapkind, kinds, i);
954 1.1 mrg if (n->host_start > cur_node.host_start
955 1.1 mrg || n->host_end < cur_node.host_end)
956 1.1 mrg {
957 1.1 mrg gomp_mutex_unlock (&devicep->lock);
958 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
959 1.1 mrg "only [%p..%p) is mapped",
960 1.1 mrg (void *) cur_node.host_start,
961 1.1 mrg (void *) cur_node.host_end,
962 1.1 mrg (void *) n->host_start,
963 1.1 mrg (void *) n->host_end);
964 1.1 mrg }
965 1.1.1.2.2.1 pgoyette
966 1.1.1.2.2.1 pgoyette
967 1.1.1.2.2.1 pgoyette void *hostaddr = (void *) cur_node.host_start;
968 1.1.1.2.2.1 pgoyette void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
969 1.1.1.2.2.1 pgoyette + cur_node.host_start - n->host_start);
970 1.1.1.2.2.1 pgoyette size_t size = cur_node.host_end - cur_node.host_start;
971 1.1.1.2.2.1 pgoyette
972 1.1 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
973 1.1.1.2.2.1 pgoyette gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
974 1.1 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
975 1.1.1.2.2.1 pgoyette gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
976 1.1 mrg }
977 1.1 mrg }
978 1.1 mrg gomp_mutex_unlock (&devicep->lock);
979 1.1 mrg }
980 1.1 mrg
981 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
982 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
983 1.1.1.2 mrg from loaded target image. We rely in the host and device compiler
984 1.1.1.2 mrg emitting variable and functions in the same order. */
985 1.1 mrg
986 1.1 mrg static void
987 1.1.1.2 mrg gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
988 1.1.1.2 mrg const void *host_table, const void *target_data,
989 1.1.1.2 mrg bool is_register_lock)
990 1.1 mrg {
991 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
992 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
993 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
994 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
995 1.1 mrg
996 1.1 mrg /* The func table contains only addresses, the var table contains addresses
997 1.1 mrg and corresponding sizes. */
998 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
999 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1000 1.1 mrg
1001 1.1 mrg /* Load image to device and get target addresses for the image. */
1002 1.1 mrg struct addr_pair *target_table = NULL;
1003 1.1.1.2 mrg int i, num_target_entries;
1004 1.1.1.2 mrg
1005 1.1.1.2 mrg num_target_entries
1006 1.1.1.2 mrg = devicep->load_image_func (devicep->target_id, version,
1007 1.1.1.2 mrg target_data, &target_table);
1008 1.1 mrg
1009 1.1 mrg if (num_target_entries != num_funcs + num_vars)
1010 1.1 mrg {
1011 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1012 1.1 mrg if (is_register_lock)
1013 1.1 mrg gomp_mutex_unlock (®ister_lock);
1014 1.1.1.2 mrg gomp_fatal ("Cannot map target functions or variables"
1015 1.1.1.2 mrg " (expected %u, have %u)", num_funcs + num_vars,
1016 1.1.1.2 mrg num_target_entries);
1017 1.1 mrg }
1018 1.1 mrg
1019 1.1 mrg /* Insert host-target address mapping into splay tree. */
1020 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1021 1.1 mrg tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1022 1.1.1.2 mrg tgt->refcount = REFCOUNT_INFINITY;
1023 1.1 mrg tgt->tgt_start = 0;
1024 1.1 mrg tgt->tgt_end = 0;
1025 1.1 mrg tgt->to_free = NULL;
1026 1.1 mrg tgt->prev = NULL;
1027 1.1 mrg tgt->list_count = 0;
1028 1.1 mrg tgt->device_descr = devicep;
1029 1.1 mrg splay_tree_node array = tgt->array;
1030 1.1 mrg
1031 1.1 mrg for (i = 0; i < num_funcs; i++)
1032 1.1 mrg {
1033 1.1 mrg splay_tree_key k = &array->key;
1034 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
1035 1.1 mrg k->host_end = k->host_start + 1;
1036 1.1 mrg k->tgt = tgt;
1037 1.1 mrg k->tgt_offset = target_table[i].start;
1038 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
1039 1.1.1.2 mrg k->link_key = NULL;
1040 1.1 mrg array->left = NULL;
1041 1.1 mrg array->right = NULL;
1042 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1043 1.1 mrg array++;
1044 1.1 mrg }
1045 1.1 mrg
1046 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1047 1.1.1.2 mrg "omp declare target link" variables. */
1048 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1049 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1050 1.1.1.2 mrg
1051 1.1 mrg for (i = 0; i < num_vars; i++)
1052 1.1 mrg {
1053 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
1054 1.1.1.2 mrg uintptr_t target_size = target_var->end - target_var->start;
1055 1.1.1.2 mrg
1056 1.1.1.2 mrg if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1057 1.1 mrg {
1058 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1059 1.1 mrg if (is_register_lock)
1060 1.1 mrg gomp_mutex_unlock (®ister_lock);
1061 1.1.1.2 mrg gomp_fatal ("Cannot map target variables (size mismatch)");
1062 1.1 mrg }
1063 1.1 mrg
1064 1.1 mrg splay_tree_key k = &array->key;
1065 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
1066 1.1.1.2 mrg k->host_end
1067 1.1.1.2 mrg = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1068 1.1 mrg k->tgt = tgt;
1069 1.1 mrg k->tgt_offset = target_var->start;
1070 1.1.1.2 mrg k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1071 1.1.1.2 mrg k->link_key = NULL;
1072 1.1 mrg array->left = NULL;
1073 1.1 mrg array->right = NULL;
1074 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
1075 1.1 mrg array++;
1076 1.1 mrg }
1077 1.1 mrg
1078 1.1 mrg free (target_table);
1079 1.1 mrg }
1080 1.1 mrg
1081 1.1.1.2 mrg /* Unload the mappings described by target_data from device DEVICE_P.
1082 1.1.1.2 mrg The device must be locked. */
1083 1.1.1.2 mrg
1084 1.1.1.2 mrg static void
1085 1.1.1.2 mrg gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1086 1.1.1.2 mrg unsigned version,
1087 1.1.1.2 mrg const void *host_table, const void *target_data)
1088 1.1.1.2 mrg {
1089 1.1.1.2 mrg void **host_func_table = ((void ***) host_table)[0];
1090 1.1.1.2 mrg void **host_funcs_end = ((void ***) host_table)[1];
1091 1.1.1.2 mrg void **host_var_table = ((void ***) host_table)[2];
1092 1.1.1.2 mrg void **host_vars_end = ((void ***) host_table)[3];
1093 1.1.1.2 mrg
1094 1.1.1.2 mrg /* The func table contains only addresses, the var table contains addresses
1095 1.1.1.2 mrg and corresponding sizes. */
1096 1.1.1.2 mrg int num_funcs = host_funcs_end - host_func_table;
1097 1.1.1.2 mrg int num_vars = (host_vars_end - host_var_table) / 2;
1098 1.1.1.2 mrg
1099 1.1.1.2 mrg struct splay_tree_key_s k;
1100 1.1.1.2 mrg splay_tree_key node = NULL;
1101 1.1.1.2 mrg
1102 1.1.1.2 mrg /* Find mapping at start of node array */
1103 1.1.1.2 mrg if (num_funcs || num_vars)
1104 1.1.1.2 mrg {
1105 1.1.1.2 mrg k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1106 1.1.1.2 mrg : (uintptr_t) host_var_table[0]);
1107 1.1.1.2 mrg k.host_end = k.host_start + 1;
1108 1.1.1.2 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
1109 1.1.1.2 mrg }
1110 1.1.1.2 mrg
1111 1.1.1.2.2.1 pgoyette if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1112 1.1.1.2.2.1 pgoyette {
1113 1.1.1.2.2.1 pgoyette gomp_mutex_unlock (&devicep->lock);
1114 1.1.1.2.2.1 pgoyette gomp_fatal ("image unload fail");
1115 1.1.1.2.2.1 pgoyette }
1116 1.1.1.2 mrg
1117 1.1.1.2 mrg /* Remove mappings from splay tree. */
1118 1.1.1.2 mrg int i;
1119 1.1.1.2 mrg for (i = 0; i < num_funcs; i++)
1120 1.1.1.2 mrg {
1121 1.1.1.2 mrg k.host_start = (uintptr_t) host_func_table[i];
1122 1.1.1.2 mrg k.host_end = k.host_start + 1;
1123 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1124 1.1.1.2 mrg }
1125 1.1.1.2 mrg
1126 1.1.1.2 mrg /* Most significant bit of the size in host and target tables marks
1127 1.1.1.2 mrg "omp declare target link" variables. */
1128 1.1.1.2 mrg const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1129 1.1.1.2 mrg const uintptr_t size_mask = ~link_bit;
1130 1.1.1.2 mrg bool is_tgt_unmapped = false;
1131 1.1.1.2 mrg
1132 1.1.1.2 mrg for (i = 0; i < num_vars; i++)
1133 1.1.1.2 mrg {
1134 1.1.1.2 mrg k.host_start = (uintptr_t) host_var_table[i * 2];
1135 1.1.1.2 mrg k.host_end
1136 1.1.1.2 mrg = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1137 1.1.1.2 mrg
1138 1.1.1.2 mrg if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1139 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, &k);
1140 1.1.1.2 mrg else
1141 1.1.1.2 mrg {
1142 1.1.1.2 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1143 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
1144 1.1.1.2 mrg if (n->link_key)
1145 1.1.1.2 mrg {
1146 1.1.1.2 mrg if (n->tgt->refcount > 1)
1147 1.1.1.2 mrg n->tgt->refcount--;
1148 1.1.1.2 mrg else
1149 1.1.1.2 mrg {
1150 1.1.1.2 mrg is_tgt_unmapped = true;
1151 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
1152 1.1.1.2 mrg }
1153 1.1.1.2 mrg }
1154 1.1.1.2 mrg }
1155 1.1.1.2 mrg }
1156 1.1.1.2 mrg
1157 1.1.1.2 mrg if (node && !is_tgt_unmapped)
1158 1.1.1.2 mrg {
1159 1.1.1.2 mrg free (node->tgt);
1160 1.1.1.2 mrg free (node);
1161 1.1.1.2 mrg }
1162 1.1.1.2 mrg }
1163 1.1.1.2 mrg
1164 1.1 mrg /* This function should be called from every offload image while loading.
1165 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1166 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1167 1.1 mrg
1168 1.1 mrg void
1169 1.1.1.2 mrg GOMP_offload_register_ver (unsigned version, const void *host_table,
1170 1.1.1.2 mrg int target_type, const void *target_data)
1171 1.1 mrg {
1172 1.1 mrg int i;
1173 1.1.1.2 mrg
1174 1.1.1.2 mrg if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1175 1.1.1.2 mrg gomp_fatal ("Library too old for offload (version %u < %u)",
1176 1.1.1.2 mrg GOMP_VERSION, GOMP_VERSION_LIB (version));
1177 1.1.1.2 mrg
1178 1.1 mrg gomp_mutex_lock (®ister_lock);
1179 1.1 mrg
1180 1.1 mrg /* Load image to all initialized devices. */
1181 1.1 mrg for (i = 0; i < num_devices; i++)
1182 1.1 mrg {
1183 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1184 1.1 mrg gomp_mutex_lock (&devicep->lock);
1185 1.1.1.2 mrg if (devicep->type == target_type
1186 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1187 1.1.1.2 mrg gomp_load_image_to_device (devicep, version,
1188 1.1.1.2 mrg host_table, target_data, true);
1189 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1190 1.1 mrg }
1191 1.1 mrg
1192 1.1 mrg /* Insert image to array of pending images. */
1193 1.1 mrg offload_images
1194 1.1 mrg = gomp_realloc_unlock (offload_images,
1195 1.1 mrg (num_offload_images + 1)
1196 1.1 mrg * sizeof (struct offload_image_descr));
1197 1.1.1.2 mrg offload_images[num_offload_images].version = version;
1198 1.1 mrg offload_images[num_offload_images].type = target_type;
1199 1.1 mrg offload_images[num_offload_images].host_table = host_table;
1200 1.1 mrg offload_images[num_offload_images].target_data = target_data;
1201 1.1 mrg
1202 1.1 mrg num_offload_images++;
1203 1.1 mrg gomp_mutex_unlock (®ister_lock);
1204 1.1 mrg }
1205 1.1 mrg
1206 1.1.1.2 mrg void
1207 1.1.1.2 mrg GOMP_offload_register (const void *host_table, int target_type,
1208 1.1.1.2 mrg const void *target_data)
1209 1.1.1.2 mrg {
1210 1.1.1.2 mrg GOMP_offload_register_ver (0, host_table, target_type, target_data);
1211 1.1.1.2 mrg }
1212 1.1.1.2 mrg
1213 1.1 mrg /* This function should be called from every offload image while unloading.
1214 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1215 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
1216 1.1 mrg
1217 1.1 mrg void
1218 1.1.1.2 mrg GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1219 1.1.1.2 mrg int target_type, const void *target_data)
1220 1.1 mrg {
1221 1.1 mrg int i;
1222 1.1 mrg
1223 1.1 mrg gomp_mutex_lock (®ister_lock);
1224 1.1 mrg
1225 1.1 mrg /* Unload image from all initialized devices. */
1226 1.1 mrg for (i = 0; i < num_devices; i++)
1227 1.1 mrg {
1228 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
1229 1.1 mrg gomp_mutex_lock (&devicep->lock);
1230 1.1.1.2 mrg if (devicep->type == target_type
1231 1.1.1.2 mrg && devicep->state == GOMP_DEVICE_INITIALIZED)
1232 1.1.1.2 mrg gomp_unload_image_from_device (devicep, version,
1233 1.1.1.2 mrg host_table, target_data);
1234 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1235 1.1 mrg }
1236 1.1 mrg
1237 1.1 mrg /* Remove image from array of pending images. */
1238 1.1 mrg for (i = 0; i < num_offload_images; i++)
1239 1.1 mrg if (offload_images[i].target_data == target_data)
1240 1.1 mrg {
1241 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
1242 1.1 mrg break;
1243 1.1 mrg }
1244 1.1 mrg
1245 1.1 mrg gomp_mutex_unlock (®ister_lock);
1246 1.1 mrg }
1247 1.1 mrg
1248 1.1.1.2 mrg void
1249 1.1.1.2 mrg GOMP_offload_unregister (const void *host_table, int target_type,
1250 1.1.1.2 mrg const void *target_data)
1251 1.1.1.2 mrg {
1252 1.1.1.2 mrg GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1253 1.1.1.2 mrg }
1254 1.1.1.2 mrg
1255 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
1256 1.1 mrg must be locked on entry, and remains locked on return. */
1257 1.1 mrg
1258 1.1 mrg attribute_hidden void
1259 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
1260 1.1 mrg {
1261 1.1 mrg int i;
1262 1.1.1.2.2.1 pgoyette if (!devicep->init_device_func (devicep->target_id))
1263 1.1.1.2.2.1 pgoyette {
1264 1.1.1.2.2.1 pgoyette gomp_mutex_unlock (&devicep->lock);
1265 1.1.1.2.2.1 pgoyette gomp_fatal ("device initialization failed");
1266 1.1.1.2.2.1 pgoyette }
1267 1.1 mrg
1268 1.1 mrg /* Load to device all images registered by the moment. */
1269 1.1 mrg for (i = 0; i < num_offload_images; i++)
1270 1.1 mrg {
1271 1.1 mrg struct offload_image_descr *image = &offload_images[i];
1272 1.1 mrg if (image->type == devicep->type)
1273 1.1.1.2 mrg gomp_load_image_to_device (devicep, image->version,
1274 1.1.1.2 mrg image->host_table, image->target_data,
1275 1.1.1.2 mrg false);
1276 1.1 mrg }
1277 1.1 mrg
1278 1.1.1.2 mrg devicep->state = GOMP_DEVICE_INITIALIZED;
1279 1.1.1.2 mrg }
1280 1.1.1.2 mrg
1281 1.1.1.2 mrg attribute_hidden void
1282 1.1.1.2 mrg gomp_unload_device (struct gomp_device_descr *devicep)
1283 1.1.1.2 mrg {
1284 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
1285 1.1.1.2 mrg {
1286 1.1.1.2 mrg unsigned i;
1287 1.1.1.2 mrg
1288 1.1.1.2 mrg /* Unload from device all images registered at the moment. */
1289 1.1.1.2 mrg for (i = 0; i < num_offload_images; i++)
1290 1.1.1.2 mrg {
1291 1.1.1.2 mrg struct offload_image_descr *image = &offload_images[i];
1292 1.1.1.2 mrg if (image->type == devicep->type)
1293 1.1.1.2 mrg gomp_unload_image_from_device (devicep, image->version,
1294 1.1.1.2 mrg image->host_table,
1295 1.1.1.2 mrg image->target_data);
1296 1.1.1.2 mrg }
1297 1.1.1.2 mrg }
1298 1.1 mrg }
1299 1.1 mrg
1300 1.1 mrg /* Free address mapping tables. MM must be locked on entry, and remains locked
1301 1.1 mrg on return. */
1302 1.1 mrg
1303 1.1 mrg attribute_hidden void
1304 1.1 mrg gomp_free_memmap (struct splay_tree_s *mem_map)
1305 1.1 mrg {
1306 1.1 mrg while (mem_map->root)
1307 1.1 mrg {
1308 1.1 mrg struct target_mem_desc *tgt = mem_map->root->key.tgt;
1309 1.1 mrg
1310 1.1 mrg splay_tree_remove (mem_map, &mem_map->root->key);
1311 1.1 mrg free (tgt->array);
1312 1.1 mrg free (tgt);
1313 1.1 mrg }
1314 1.1 mrg }
1315 1.1 mrg
1316 1.1.1.2 mrg /* Host fallback for GOMP_target{,_ext} routines. */
1317 1.1 mrg
1318 1.1.1.2 mrg static void
1319 1.1.1.2 mrg gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1320 1.1 mrg {
1321 1.1.1.2 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
1322 1.1.1.2 mrg old_thr = *thr;
1323 1.1.1.2 mrg memset (thr, '\0', sizeof (*thr));
1324 1.1.1.2 mrg if (gomp_places_list)
1325 1.1.1.2 mrg {
1326 1.1.1.2 mrg thr->place = old_thr.place;
1327 1.1.1.2 mrg thr->ts.place_partition_len = gomp_places_list_len;
1328 1.1.1.2 mrg }
1329 1.1.1.2 mrg fn (hostaddrs);
1330 1.1.1.2 mrg gomp_free_thread (thr);
1331 1.1.1.2 mrg *thr = old_thr;
1332 1.1 mrg }
1333 1.1 mrg
1334 1.1.1.2 mrg /* Calculate alignment and size requirements of a private copy of data shared
1335 1.1.1.2 mrg as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1336 1.1.1.2 mrg
1337 1.1.1.2 mrg static inline void
1338 1.1.1.2 mrg calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1339 1.1.1.2 mrg unsigned short *kinds, size_t *tgt_align,
1340 1.1.1.2 mrg size_t *tgt_size)
1341 1.1.1.2 mrg {
1342 1.1.1.2 mrg size_t i;
1343 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1344 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1345 1.1.1.2 mrg {
1346 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1347 1.1.1.2 mrg if (*tgt_align < align)
1348 1.1.1.2 mrg *tgt_align = align;
1349 1.1.1.2 mrg *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1350 1.1.1.2 mrg *tgt_size += sizes[i];
1351 1.1.1.2 mrg }
1352 1.1.1.2 mrg }
1353 1.1.1.2 mrg
1354 1.1.1.2 mrg /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1355 1.1.1.2 mrg
1356 1.1.1.2 mrg static inline void
1357 1.1.1.2 mrg copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1358 1.1.1.2 mrg size_t *sizes, unsigned short *kinds, size_t tgt_align,
1359 1.1.1.2 mrg size_t tgt_size)
1360 1.1.1.2 mrg {
1361 1.1.1.2 mrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1362 1.1.1.2 mrg if (al)
1363 1.1.1.2 mrg tgt += tgt_align - al;
1364 1.1.1.2 mrg tgt_size = 0;
1365 1.1.1.2 mrg size_t i;
1366 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1367 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1368 1.1.1.2 mrg {
1369 1.1.1.2 mrg size_t align = (size_t) 1 << (kinds[i] >> 8);
1370 1.1.1.2 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
1371 1.1.1.2 mrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1372 1.1.1.2 mrg hostaddrs[i] = tgt + tgt_size;
1373 1.1.1.2 mrg tgt_size = tgt_size + sizes[i];
1374 1.1.1.2 mrg }
1375 1.1.1.2 mrg }
1376 1.1.1.2 mrg
1377 1.1.1.2 mrg /* Helper function of GOMP_target{,_ext} routines. */
1378 1.1.1.2 mrg
1379 1.1.1.2 mrg static void *
1380 1.1.1.2 mrg gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1381 1.1.1.2 mrg void (*host_fn) (void *))
1382 1.1.1.2 mrg {
1383 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1384 1.1.1.2 mrg return (void *) host_fn;
1385 1.1.1.2 mrg else
1386 1.1.1.2 mrg {
1387 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
1388 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1389 1.1.1.2 mrg {
1390 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1391 1.1.1.2 mrg return NULL;
1392 1.1.1.2 mrg }
1393 1.1.1.2 mrg
1394 1.1.1.2 mrg struct splay_tree_key_s k;
1395 1.1.1.2 mrg k.host_start = (uintptr_t) host_fn;
1396 1.1.1.2 mrg k.host_end = k.host_start + 1;
1397 1.1.1.2 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1398 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1399 1.1.1.2 mrg if (tgt_fn == NULL)
1400 1.1.1.2 mrg return NULL;
1401 1.1.1.2 mrg
1402 1.1.1.2 mrg return (void *) tgt_fn->tgt_offset;
1403 1.1.1.2 mrg }
1404 1.1.1.2 mrg }
1405 1.1.1.2 mrg
1406 1.1.1.2 mrg /* Called when encountering a target directive. If DEVICE
1407 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1408 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
1409 1.1 mrg larger than last available hw device), use host fallback.
1410 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
1411 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1412 1.1 mrg with MAPNUM entries, with addresses of the host objects,
1413 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
1414 1.1 mrg and assumed sizeof (void *) size) and kinds. */
1415 1.1 mrg
1416 1.1 mrg void
1417 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
1418 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
1419 1.1 mrg unsigned char *kinds)
1420 1.1 mrg {
1421 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1422 1.1 mrg
1423 1.1.1.2 mrg void *fn_addr;
1424 1.1 mrg if (devicep == NULL
1425 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1426 1.1.1.2 mrg /* All shared memory devices should use the GOMP_target_ext function. */
1427 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1428 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1429 1.1.1.2 mrg return gomp_target_fallback (fn, hostaddrs);
1430 1.1.1.2 mrg
1431 1.1.1.2 mrg struct target_mem_desc *tgt_vars
1432 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1433 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
1434 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1435 1.1.1.2 mrg NULL);
1436 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
1437 1.1.1.2 mrg }
1438 1.1.1.2 mrg
1439 1.1.1.2 mrg /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1440 1.1.1.2 mrg and several arguments have been added:
1441 1.1.1.2 mrg FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1442 1.1.1.2 mrg DEPEND is array of dependencies, see GOMP_task for details.
1443 1.1.1.2 mrg
1444 1.1.1.2 mrg ARGS is a pointer to an array consisting of a variable number of both
1445 1.1.1.2 mrg device-independent and device-specific arguments, which can take one two
1446 1.1.1.2 mrg elements where the first specifies for which device it is intended, the type
1447 1.1.1.2 mrg and optionally also the value. If the value is not present in the first
1448 1.1.1.2 mrg one, the whole second element the actual value. The last element of the
1449 1.1.1.2 mrg array is a single NULL. Among the device independent can be for example
1450 1.1.1.2 mrg NUM_TEAMS and THREAD_LIMIT.
1451 1.1.1.2 mrg
1452 1.1.1.2 mrg NUM_TEAMS is positive if GOMP_teams will be called in the body with
1453 1.1.1.2 mrg that value, or 1 if teams construct is not present, or 0, if
1454 1.1.1.2 mrg teams construct does not have num_teams clause and so the choice is
1455 1.1.1.2 mrg implementation defined, and -1 if it can't be determined on the host
1456 1.1.1.2 mrg what value will GOMP_teams have on the device.
1457 1.1.1.2 mrg THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1458 1.1.1.2 mrg body with that value, or 0, if teams construct does not have thread_limit
1459 1.1.1.2 mrg clause or the teams construct is not present, or -1 if it can't be
1460 1.1.1.2 mrg determined on the host what value will GOMP_teams have on the device. */
1461 1.1.1.2 mrg
1462 1.1.1.2 mrg void
1463 1.1.1.2 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1464 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds,
1465 1.1.1.2 mrg unsigned int flags, void **depend, void **args)
1466 1.1.1.2 mrg {
1467 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1468 1.1.1.2 mrg size_t tgt_align = 0, tgt_size = 0;
1469 1.1.1.2 mrg bool fpc_done = false;
1470 1.1.1.2 mrg
1471 1.1.1.2 mrg if (flags & GOMP_TARGET_FLAG_NOWAIT)
1472 1.1 mrg {
1473 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1474 1.1.1.2 mrg /* Create a team if we don't have any around, as nowait
1475 1.1.1.2 mrg target tasks make sense to run asynchronously even when
1476 1.1.1.2 mrg outside of any parallel. */
1477 1.1.1.2 mrg if (__builtin_expect (thr->ts.team == NULL, 0))
1478 1.1.1.2 mrg {
1479 1.1.1.2 mrg struct gomp_team *team = gomp_new_team (1);
1480 1.1.1.2 mrg struct gomp_task *task = thr->task;
1481 1.1.1.2 mrg struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1482 1.1.1.2 mrg team->prev_ts = thr->ts;
1483 1.1.1.2 mrg thr->ts.team = team;
1484 1.1.1.2 mrg thr->ts.team_id = 0;
1485 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
1486 1.1.1.2 mrg thr->ts.last_work_share = NULL;
1487 1.1.1.2 mrg #ifdef HAVE_SYNC_BUILTINS
1488 1.1.1.2 mrg thr->ts.single_count = 0;
1489 1.1.1.2 mrg #endif
1490 1.1.1.2 mrg thr->ts.static_trip = 0;
1491 1.1.1.2 mrg thr->task = &team->implicit_task[0];
1492 1.1.1.2 mrg gomp_init_task (thr->task, NULL, icv);
1493 1.1.1.2 mrg if (task)
1494 1.1.1.2 mrg {
1495 1.1.1.2 mrg thr->task = task;
1496 1.1.1.2 mrg gomp_end_task ();
1497 1.1.1.2 mrg free (task);
1498 1.1.1.2 mrg thr->task = &team->implicit_task[0];
1499 1.1.1.2 mrg }
1500 1.1.1.2 mrg else
1501 1.1.1.2 mrg pthread_setspecific (gomp_thread_destructor, thr);
1502 1.1.1.2 mrg }
1503 1.1.1.2 mrg if (thr->ts.team
1504 1.1.1.2 mrg && !thr->task->final_task)
1505 1.1.1.2 mrg {
1506 1.1.1.2 mrg gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1507 1.1.1.2 mrg sizes, kinds, flags, depend, args,
1508 1.1.1.2 mrg GOMP_TARGET_TASK_BEFORE_MAP);
1509 1.1.1.2 mrg return;
1510 1.1.1.2 mrg }
1511 1.1 mrg }
1512 1.1 mrg
1513 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present
1514 1.1.1.2 mrg (or we are in a final task), block the parent task until the
1515 1.1.1.2 mrg dependencies are resolved and then just continue with the rest
1516 1.1.1.2 mrg of the function as if it is a merged task. */
1517 1.1.1.2 mrg if (depend != NULL)
1518 1.1.1.2 mrg {
1519 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1520 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
1521 1.1.1.2 mrg {
1522 1.1.1.2 mrg /* If we might need to wait, copy firstprivate now. */
1523 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1524 1.1.1.2 mrg &tgt_align, &tgt_size);
1525 1.1.1.2 mrg if (tgt_align)
1526 1.1.1.2 mrg {
1527 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1528 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1529 1.1.1.2 mrg tgt_align, tgt_size);
1530 1.1.1.2 mrg }
1531 1.1.1.2 mrg fpc_done = true;
1532 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
1533 1.1.1.2 mrg }
1534 1.1.1.2 mrg }
1535 1.1 mrg
1536 1.1 mrg void *fn_addr;
1537 1.1.1.2 mrg if (devicep == NULL
1538 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1539 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1540 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1541 1.1 mrg {
1542 1.1.1.2 mrg if (!fpc_done)
1543 1.1 mrg {
1544 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1545 1.1.1.2 mrg &tgt_align, &tgt_size);
1546 1.1.1.2 mrg if (tgt_align)
1547 1.1.1.2 mrg {
1548 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1549 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1550 1.1.1.2 mrg tgt_align, tgt_size);
1551 1.1.1.2 mrg }
1552 1.1 mrg }
1553 1.1.1.2 mrg gomp_target_fallback (fn, hostaddrs);
1554 1.1.1.2 mrg return;
1555 1.1.1.2 mrg }
1556 1.1 mrg
1557 1.1.1.2 mrg struct target_mem_desc *tgt_vars;
1558 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1559 1.1.1.2 mrg {
1560 1.1.1.2 mrg if (!fpc_done)
1561 1.1.1.2 mrg {
1562 1.1.1.2 mrg calculate_firstprivate_requirements (mapnum, sizes, kinds,
1563 1.1.1.2 mrg &tgt_align, &tgt_size);
1564 1.1.1.2 mrg if (tgt_align)
1565 1.1.1.2 mrg {
1566 1.1.1.2 mrg char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1567 1.1.1.2 mrg copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1568 1.1.1.2 mrg tgt_align, tgt_size);
1569 1.1.1.2 mrg }
1570 1.1.1.2 mrg }
1571 1.1.1.2 mrg tgt_vars = NULL;
1572 1.1 mrg }
1573 1.1.1.2 mrg else
1574 1.1.1.2 mrg tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1575 1.1.1.2 mrg true, GOMP_MAP_VARS_TARGET);
1576 1.1.1.2 mrg devicep->run_func (devicep->target_id, fn_addr,
1577 1.1.1.2 mrg tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1578 1.1.1.2 mrg args);
1579 1.1.1.2 mrg if (tgt_vars)
1580 1.1.1.2 mrg gomp_unmap_vars (tgt_vars, true);
1581 1.1.1.2 mrg }
1582 1.1 mrg
1583 1.1.1.2 mrg /* Host fallback for GOMP_target_data{,_ext} routines. */
1584 1.1.1.2 mrg
1585 1.1.1.2 mrg static void
1586 1.1.1.2 mrg gomp_target_data_fallback (void)
1587 1.1.1.2 mrg {
1588 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (false);
1589 1.1.1.2 mrg if (icv->target_data)
1590 1.1 mrg {
1591 1.1.1.2 mrg /* Even when doing a host fallback, if there are any active
1592 1.1.1.2 mrg #pragma omp target data constructs, need to remember the
1593 1.1.1.2 mrg new #pragma omp target data, otherwise GOMP_target_end_data
1594 1.1.1.2 mrg would get out of sync. */
1595 1.1.1.2 mrg struct target_mem_desc *tgt
1596 1.1.1.2 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1597 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1598 1.1.1.2 mrg tgt->prev = icv->target_data;
1599 1.1.1.2 mrg icv->target_data = tgt;
1600 1.1 mrg }
1601 1.1 mrg }
1602 1.1 mrg
1603 1.1 mrg void
1604 1.1 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
1605 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1606 1.1 mrg {
1607 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1608 1.1 mrg
1609 1.1 mrg if (devicep == NULL
1610 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1611 1.1.1.2 mrg || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1612 1.1.1.2 mrg return gomp_target_data_fallback ();
1613 1.1 mrg
1614 1.1 mrg struct target_mem_desc *tgt
1615 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1616 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1617 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (true);
1618 1.1.1.2 mrg tgt->prev = icv->target_data;
1619 1.1.1.2 mrg icv->target_data = tgt;
1620 1.1.1.2 mrg }
1621 1.1.1.2 mrg
1622 1.1.1.2 mrg void
1623 1.1.1.2 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1624 1.1.1.2 mrg size_t *sizes, unsigned short *kinds)
1625 1.1.1.2 mrg {
1626 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1627 1.1.1.2 mrg
1628 1.1.1.2 mrg if (devicep == NULL
1629 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1630 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1631 1.1.1.2 mrg return gomp_target_data_fallback ();
1632 1.1.1.2 mrg
1633 1.1.1.2 mrg struct target_mem_desc *tgt
1634 1.1.1.2 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1635 1.1.1.2 mrg GOMP_MAP_VARS_DATA);
1636 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
1637 1.1 mrg tgt->prev = icv->target_data;
1638 1.1 mrg icv->target_data = tgt;
1639 1.1 mrg }
1640 1.1 mrg
1641 1.1 mrg void
1642 1.1 mrg GOMP_target_end_data (void)
1643 1.1 mrg {
1644 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
1645 1.1 mrg if (icv->target_data)
1646 1.1 mrg {
1647 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
1648 1.1 mrg icv->target_data = tgt->prev;
1649 1.1 mrg gomp_unmap_vars (tgt, true);
1650 1.1 mrg }
1651 1.1 mrg }
1652 1.1 mrg
1653 1.1 mrg void
1654 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
1655 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1656 1.1 mrg {
1657 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1658 1.1 mrg
1659 1.1 mrg if (devicep == NULL
1660 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1661 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1662 1.1.1.2 mrg return;
1663 1.1.1.2 mrg
1664 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1665 1.1.1.2 mrg }
1666 1.1.1.2 mrg
1667 1.1.1.2 mrg void
1668 1.1.1.2 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1669 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
1670 1.1.1.2 mrg unsigned int flags, void **depend)
1671 1.1.1.2 mrg {
1672 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1673 1.1.1.2 mrg
1674 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
1675 1.1.1.2 mrg block the parent task until the dependencies are resolved
1676 1.1.1.2 mrg and then just continue with the rest of the function as if it
1677 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
1678 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
1679 1.1.1.2 mrg are not present. */
1680 1.1.1.2 mrg if (depend != NULL)
1681 1.1.1.2 mrg {
1682 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1683 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
1684 1.1.1.2 mrg {
1685 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1686 1.1.1.2 mrg && thr->ts.team
1687 1.1.1.2 mrg && !thr->task->final_task)
1688 1.1.1.2 mrg {
1689 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1690 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
1691 1.1.1.2 mrg flags | GOMP_TARGET_FLAG_UPDATE,
1692 1.1.1.2 mrg depend, NULL, GOMP_TARGET_TASK_DATA))
1693 1.1.1.2 mrg return;
1694 1.1.1.2 mrg }
1695 1.1.1.2 mrg else
1696 1.1.1.2 mrg {
1697 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1698 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
1699 1.1.1.2 mrg tasks. */
1700 1.1.1.2 mrg if (team
1701 1.1.1.2 mrg && (gomp_team_barrier_cancelled (&team->barrier)
1702 1.1.1.2 mrg || (thr->task->taskgroup
1703 1.1.1.2 mrg && thr->task->taskgroup->cancelled)))
1704 1.1.1.2 mrg return;
1705 1.1.1.2 mrg
1706 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
1707 1.1.1.2 mrg }
1708 1.1.1.2 mrg }
1709 1.1.1.2 mrg }
1710 1.1.1.2 mrg
1711 1.1.1.2 mrg if (devicep == NULL
1712 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1713 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1714 1.1.1.2 mrg return;
1715 1.1.1.2 mrg
1716 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1717 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1718 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1719 1.1.1.2 mrg if (team
1720 1.1.1.2 mrg && (gomp_team_barrier_cancelled (&team->barrier)
1721 1.1.1.2 mrg || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1722 1.1 mrg return;
1723 1.1 mrg
1724 1.1.1.2 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1725 1.1.1.2 mrg }
1726 1.1.1.2 mrg
1727 1.1.1.2 mrg static void
1728 1.1.1.2 mrg gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1729 1.1.1.2 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds)
1730 1.1.1.2 mrg {
1731 1.1.1.2 mrg const int typemask = 0xff;
1732 1.1.1.2 mrg size_t i;
1733 1.1 mrg gomp_mutex_lock (&devicep->lock);
1734 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_FINALIZED)
1735 1.1.1.2 mrg {
1736 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1737 1.1.1.2 mrg return;
1738 1.1.1.2 mrg }
1739 1.1.1.2 mrg
1740 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1741 1.1.1.2 mrg {
1742 1.1.1.2 mrg struct splay_tree_key_s cur_node;
1743 1.1.1.2 mrg unsigned char kind = kinds[i] & typemask;
1744 1.1.1.2 mrg switch (kind)
1745 1.1.1.2 mrg {
1746 1.1.1.2 mrg case GOMP_MAP_FROM:
1747 1.1.1.2 mrg case GOMP_MAP_ALWAYS_FROM:
1748 1.1.1.2 mrg case GOMP_MAP_DELETE:
1749 1.1.1.2 mrg case GOMP_MAP_RELEASE:
1750 1.1.1.2 mrg case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1751 1.1.1.2 mrg case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1752 1.1.1.2 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
1753 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + sizes[i];
1754 1.1.1.2 mrg splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1755 1.1.1.2 mrg || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1756 1.1.1.2 mrg ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1757 1.1.1.2 mrg : splay_tree_lookup (&devicep->mem_map, &cur_node);
1758 1.1.1.2 mrg if (!k)
1759 1.1.1.2 mrg continue;
1760 1.1.1.2 mrg
1761 1.1.1.2 mrg if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1762 1.1.1.2 mrg k->refcount--;
1763 1.1.1.2 mrg if ((kind == GOMP_MAP_DELETE
1764 1.1.1.2 mrg || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1765 1.1.1.2 mrg && k->refcount != REFCOUNT_INFINITY)
1766 1.1.1.2 mrg k->refcount = 0;
1767 1.1.1.2 mrg
1768 1.1.1.2 mrg if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1769 1.1.1.2 mrg || kind == GOMP_MAP_ALWAYS_FROM)
1770 1.1.1.2.2.1 pgoyette gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1771 1.1.1.2.2.1 pgoyette (void *) (k->tgt->tgt_start + k->tgt_offset
1772 1.1.1.2.2.1 pgoyette + cur_node.host_start
1773 1.1.1.2.2.1 pgoyette - k->host_start),
1774 1.1.1.2.2.1 pgoyette cur_node.host_end - cur_node.host_start);
1775 1.1.1.2 mrg if (k->refcount == 0)
1776 1.1.1.2 mrg {
1777 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, k);
1778 1.1.1.2 mrg if (k->link_key)
1779 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map,
1780 1.1.1.2 mrg (splay_tree_node) k->link_key);
1781 1.1.1.2 mrg if (k->tgt->refcount > 1)
1782 1.1.1.2 mrg k->tgt->refcount--;
1783 1.1.1.2 mrg else
1784 1.1.1.2 mrg gomp_unmap_tgt (k->tgt);
1785 1.1.1.2 mrg }
1786 1.1.1.2 mrg
1787 1.1.1.2 mrg break;
1788 1.1.1.2 mrg default:
1789 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1790 1.1.1.2 mrg gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1791 1.1.1.2 mrg kind);
1792 1.1.1.2 mrg }
1793 1.1.1.2 mrg }
1794 1.1.1.2 mrg
1795 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1796 1.1.1.2 mrg }
1797 1.1 mrg
1798 1.1.1.2 mrg void
1799 1.1.1.2 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1800 1.1.1.2 mrg size_t *sizes, unsigned short *kinds,
1801 1.1.1.2 mrg unsigned int flags, void **depend)
1802 1.1.1.2 mrg {
1803 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device);
1804 1.1.1.2 mrg
1805 1.1.1.2 mrg /* If there are depend clauses, but nowait is not present,
1806 1.1.1.2 mrg block the parent task until the dependencies are resolved
1807 1.1.1.2 mrg and then just continue with the rest of the function as if it
1808 1.1.1.2 mrg is a merged task. Until we are able to schedule task during
1809 1.1.1.2 mrg variable mapping or unmapping, ignore nowait if depend clauses
1810 1.1.1.2 mrg are not present. */
1811 1.1.1.2 mrg if (depend != NULL)
1812 1.1.1.2 mrg {
1813 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1814 1.1.1.2 mrg if (thr->task && thr->task->depend_hash)
1815 1.1.1.2 mrg {
1816 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1817 1.1.1.2 mrg && thr->ts.team
1818 1.1.1.2 mrg && !thr->task->final_task)
1819 1.1.1.2 mrg {
1820 1.1.1.2 mrg if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1821 1.1.1.2 mrg mapnum, hostaddrs, sizes, kinds,
1822 1.1.1.2 mrg flags, depend, NULL,
1823 1.1.1.2 mrg GOMP_TARGET_TASK_DATA))
1824 1.1.1.2 mrg return;
1825 1.1.1.2 mrg }
1826 1.1.1.2 mrg else
1827 1.1.1.2 mrg {
1828 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1829 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new
1830 1.1.1.2 mrg tasks. */
1831 1.1.1.2 mrg if (team
1832 1.1.1.2 mrg && (gomp_team_barrier_cancelled (&team->barrier)
1833 1.1.1.2 mrg || (thr->task->taskgroup
1834 1.1.1.2 mrg && thr->task->taskgroup->cancelled)))
1835 1.1.1.2 mrg return;
1836 1.1.1.2 mrg
1837 1.1.1.2 mrg gomp_task_maybe_wait_for_dependencies (depend);
1838 1.1.1.2 mrg }
1839 1.1.1.2 mrg }
1840 1.1.1.2 mrg }
1841 1.1.1.2 mrg
1842 1.1.1.2 mrg if (devicep == NULL
1843 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1844 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1845 1.1.1.2 mrg return;
1846 1.1.1.2 mrg
1847 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
1848 1.1.1.2 mrg struct gomp_team *team = thr->ts.team;
1849 1.1.1.2 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1850 1.1.1.2 mrg if (team
1851 1.1.1.2 mrg && (gomp_team_barrier_cancelled (&team->barrier)
1852 1.1.1.2 mrg || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1853 1.1.1.2 mrg return;
1854 1.1.1.2 mrg
1855 1.1.1.2 mrg size_t i;
1856 1.1.1.2 mrg if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1857 1.1.1.2 mrg for (i = 0; i < mapnum; i++)
1858 1.1.1.2 mrg if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1859 1.1.1.2 mrg {
1860 1.1.1.2 mrg gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1861 1.1.1.2 mrg &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1862 1.1.1.2 mrg i += sizes[i];
1863 1.1.1.2 mrg }
1864 1.1.1.2 mrg else
1865 1.1.1.2 mrg gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1866 1.1.1.2 mrg true, GOMP_MAP_VARS_ENTER_DATA);
1867 1.1.1.2 mrg else
1868 1.1.1.2 mrg gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1869 1.1.1.2 mrg }
1870 1.1.1.2 mrg
1871 1.1.1.2 mrg bool
1872 1.1.1.2 mrg gomp_target_task_fn (void *data)
1873 1.1.1.2 mrg {
1874 1.1.1.2 mrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1875 1.1.1.2 mrg struct gomp_device_descr *devicep = ttask->devicep;
1876 1.1.1.2 mrg
1877 1.1.1.2 mrg if (ttask->fn != NULL)
1878 1.1.1.2 mrg {
1879 1.1.1.2 mrg void *fn_addr;
1880 1.1.1.2 mrg if (devicep == NULL
1881 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1882 1.1.1.2 mrg || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1883 1.1.1.2 mrg || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1884 1.1.1.2 mrg {
1885 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_FALLBACK;
1886 1.1.1.2 mrg gomp_target_fallback (ttask->fn, ttask->hostaddrs);
1887 1.1.1.2 mrg return false;
1888 1.1.1.2 mrg }
1889 1.1.1.2 mrg
1890 1.1.1.2 mrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1891 1.1.1.2 mrg {
1892 1.1.1.2 mrg if (ttask->tgt)
1893 1.1.1.2 mrg gomp_unmap_vars (ttask->tgt, true);
1894 1.1.1.2 mrg return false;
1895 1.1.1.2 mrg }
1896 1.1.1.2 mrg
1897 1.1.1.2 mrg void *actual_arguments;
1898 1.1.1.2 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1899 1.1.1.2 mrg {
1900 1.1.1.2 mrg ttask->tgt = NULL;
1901 1.1.1.2 mrg actual_arguments = ttask->hostaddrs;
1902 1.1.1.2 mrg }
1903 1.1.1.2 mrg else
1904 1.1.1.2 mrg {
1905 1.1.1.2 mrg ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1906 1.1.1.2 mrg NULL, ttask->sizes, ttask->kinds, true,
1907 1.1.1.2 mrg GOMP_MAP_VARS_TARGET);
1908 1.1.1.2 mrg actual_arguments = (void *) ttask->tgt->tgt_start;
1909 1.1.1.2 mrg }
1910 1.1.1.2 mrg ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1911 1.1.1.2 mrg
1912 1.1.1.2 mrg devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1913 1.1.1.2 mrg ttask->args, (void *) ttask);
1914 1.1.1.2 mrg return true;
1915 1.1.1.2 mrg }
1916 1.1.1.2 mrg else if (devicep == NULL
1917 1.1.1.2 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1918 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1919 1.1.1.2 mrg return false;
1920 1.1.1.2 mrg
1921 1.1.1.2 mrg size_t i;
1922 1.1.1.2 mrg if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1923 1.1.1.2 mrg gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1924 1.1.1.2 mrg ttask->kinds, true);
1925 1.1.1.2 mrg else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1926 1.1.1.2 mrg for (i = 0; i < ttask->mapnum; i++)
1927 1.1.1.2 mrg if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1928 1.1.1.2 mrg {
1929 1.1.1.2 mrg gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1930 1.1.1.2 mrg NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1931 1.1.1.2 mrg GOMP_MAP_VARS_ENTER_DATA);
1932 1.1.1.2 mrg i += ttask->sizes[i];
1933 1.1.1.2 mrg }
1934 1.1.1.2 mrg else
1935 1.1.1.2 mrg gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1936 1.1.1.2 mrg &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1937 1.1.1.2 mrg else
1938 1.1.1.2 mrg gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1939 1.1.1.2 mrg ttask->kinds);
1940 1.1.1.2 mrg return false;
1941 1.1 mrg }
1942 1.1 mrg
1943 1.1 mrg void
1944 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1945 1.1 mrg {
1946 1.1 mrg if (thread_limit)
1947 1.1 mrg {
1948 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
1949 1.1 mrg icv->thread_limit_var
1950 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1951 1.1 mrg }
1952 1.1 mrg (void) num_teams;
1953 1.1 mrg }
1954 1.1 mrg
1955 1.1.1.2 mrg void *
1956 1.1.1.2 mrg omp_target_alloc (size_t size, int device_num)
1957 1.1.1.2 mrg {
1958 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1959 1.1.1.2 mrg return malloc (size);
1960 1.1.1.2 mrg
1961 1.1.1.2 mrg if (device_num < 0)
1962 1.1.1.2 mrg return NULL;
1963 1.1.1.2 mrg
1964 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
1965 1.1.1.2 mrg if (devicep == NULL)
1966 1.1.1.2 mrg return NULL;
1967 1.1.1.2 mrg
1968 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1969 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1970 1.1.1.2 mrg return malloc (size);
1971 1.1.1.2 mrg
1972 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
1973 1.1.1.2 mrg void *ret = devicep->alloc_func (devicep->target_id, size);
1974 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
1975 1.1.1.2 mrg return ret;
1976 1.1.1.2 mrg }
1977 1.1.1.2 mrg
1978 1.1.1.2 mrg void
1979 1.1.1.2 mrg omp_target_free (void *device_ptr, int device_num)
1980 1.1.1.2 mrg {
1981 1.1.1.2 mrg if (device_ptr == NULL)
1982 1.1.1.2 mrg return;
1983 1.1.1.2 mrg
1984 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1985 1.1.1.2 mrg {
1986 1.1.1.2 mrg free (device_ptr);
1987 1.1.1.2 mrg return;
1988 1.1.1.2 mrg }
1989 1.1.1.2 mrg
1990 1.1.1.2 mrg if (device_num < 0)
1991 1.1.1.2 mrg return;
1992 1.1.1.2 mrg
1993 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
1994 1.1.1.2 mrg if (devicep == NULL)
1995 1.1.1.2 mrg return;
1996 1.1.1.2 mrg
1997 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1998 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1999 1.1.1.2 mrg {
2000 1.1.1.2 mrg free (device_ptr);
2001 1.1.1.2 mrg return;
2002 1.1.1.2 mrg }
2003 1.1.1.2 mrg
2004 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2005 1.1.1.2.2.1 pgoyette gomp_free_device_memory (devicep, device_ptr);
2006 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2007 1.1.1.2 mrg }
2008 1.1.1.2 mrg
2009 1.1.1.2 mrg int
2010 1.1.1.2 mrg omp_target_is_present (void *ptr, int device_num)
2011 1.1.1.2 mrg {
2012 1.1.1.2 mrg if (ptr == NULL)
2013 1.1.1.2 mrg return 1;
2014 1.1.1.2 mrg
2015 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2016 1.1.1.2 mrg return 1;
2017 1.1.1.2 mrg
2018 1.1.1.2 mrg if (device_num < 0)
2019 1.1.1.2 mrg return 0;
2020 1.1.1.2 mrg
2021 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2022 1.1.1.2 mrg if (devicep == NULL)
2023 1.1.1.2 mrg return 0;
2024 1.1.1.2 mrg
2025 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2026 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2027 1.1.1.2 mrg return 1;
2028 1.1.1.2 mrg
2029 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2030 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2031 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2032 1.1.1.2 mrg
2033 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2034 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2035 1.1.1.2 mrg splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2036 1.1.1.2 mrg int ret = n != NULL;
2037 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2038 1.1.1.2 mrg return ret;
2039 1.1.1.2 mrg }
2040 1.1.1.2 mrg
2041 1.1.1.2 mrg int
2042 1.1.1.2 mrg omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2043 1.1.1.2 mrg size_t src_offset, int dst_device_num, int src_device_num)
2044 1.1.1.2 mrg {
2045 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2046 1.1.1.2.2.1 pgoyette bool ret;
2047 1.1.1.2 mrg
2048 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2049 1.1.1.2 mrg {
2050 1.1.1.2 mrg if (dst_device_num < 0)
2051 1.1.1.2 mrg return EINVAL;
2052 1.1.1.2 mrg
2053 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2054 1.1.1.2 mrg if (dst_devicep == NULL)
2055 1.1.1.2 mrg return EINVAL;
2056 1.1.1.2 mrg
2057 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2058 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2059 1.1.1.2 mrg dst_devicep = NULL;
2060 1.1.1.2 mrg }
2061 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2062 1.1.1.2 mrg {
2063 1.1.1.2 mrg if (src_device_num < 0)
2064 1.1.1.2 mrg return EINVAL;
2065 1.1.1.2 mrg
2066 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2067 1.1.1.2 mrg if (src_devicep == NULL)
2068 1.1.1.2 mrg return EINVAL;
2069 1.1.1.2 mrg
2070 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2071 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2072 1.1.1.2 mrg src_devicep = NULL;
2073 1.1.1.2 mrg }
2074 1.1.1.2 mrg if (src_devicep == NULL && dst_devicep == NULL)
2075 1.1.1.2 mrg {
2076 1.1.1.2 mrg memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2077 1.1.1.2 mrg return 0;
2078 1.1.1.2 mrg }
2079 1.1.1.2 mrg if (src_devicep == NULL)
2080 1.1.1.2 mrg {
2081 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2082 1.1.1.2.2.1 pgoyette ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2083 1.1.1.2.2.1 pgoyette (char *) dst + dst_offset,
2084 1.1.1.2.2.1 pgoyette (char *) src + src_offset, length);
2085 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2086 1.1.1.2.2.1 pgoyette return (ret ? 0 : EINVAL);
2087 1.1.1.2 mrg }
2088 1.1.1.2 mrg if (dst_devicep == NULL)
2089 1.1.1.2 mrg {
2090 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2091 1.1.1.2.2.1 pgoyette ret = src_devicep->dev2host_func (src_devicep->target_id,
2092 1.1.1.2.2.1 pgoyette (char *) dst + dst_offset,
2093 1.1.1.2.2.1 pgoyette (char *) src + src_offset, length);
2094 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2095 1.1.1.2.2.1 pgoyette return (ret ? 0 : EINVAL);
2096 1.1.1.2 mrg }
2097 1.1.1.2 mrg if (src_devicep == dst_devicep)
2098 1.1.1.2 mrg {
2099 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2100 1.1.1.2.2.1 pgoyette ret = src_devicep->dev2dev_func (src_devicep->target_id,
2101 1.1.1.2.2.1 pgoyette (char *) dst + dst_offset,
2102 1.1.1.2.2.1 pgoyette (char *) src + src_offset, length);
2103 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2104 1.1.1.2.2.1 pgoyette return (ret ? 0 : EINVAL);
2105 1.1.1.2 mrg }
2106 1.1.1.2 mrg return EINVAL;
2107 1.1.1.2 mrg }
2108 1.1.1.2 mrg
2109 1.1.1.2 mrg static int
2110 1.1.1.2 mrg omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2111 1.1.1.2 mrg int num_dims, const size_t *volume,
2112 1.1.1.2 mrg const size_t *dst_offsets,
2113 1.1.1.2 mrg const size_t *src_offsets,
2114 1.1.1.2 mrg const size_t *dst_dimensions,
2115 1.1.1.2 mrg const size_t *src_dimensions,
2116 1.1.1.2 mrg struct gomp_device_descr *dst_devicep,
2117 1.1.1.2 mrg struct gomp_device_descr *src_devicep)
2118 1.1.1.2 mrg {
2119 1.1.1.2 mrg size_t dst_slice = element_size;
2120 1.1.1.2 mrg size_t src_slice = element_size;
2121 1.1.1.2 mrg size_t j, dst_off, src_off, length;
2122 1.1.1.2 mrg int i, ret;
2123 1.1.1.2 mrg
2124 1.1.1.2 mrg if (num_dims == 1)
2125 1.1.1.2 mrg {
2126 1.1.1.2 mrg if (__builtin_mul_overflow (element_size, volume[0], &length)
2127 1.1.1.2 mrg || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2128 1.1.1.2 mrg || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2129 1.1.1.2 mrg return EINVAL;
2130 1.1.1.2 mrg if (dst_devicep == NULL && src_devicep == NULL)
2131 1.1.1.2.2.1 pgoyette {
2132 1.1.1.2.2.1 pgoyette memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2133 1.1.1.2.2.1 pgoyette ret = 1;
2134 1.1.1.2.2.1 pgoyette }
2135 1.1.1.2 mrg else if (src_devicep == NULL)
2136 1.1.1.2.2.1 pgoyette ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2137 1.1.1.2.2.1 pgoyette (char *) dst + dst_off,
2138 1.1.1.2.2.1 pgoyette (char *) src + src_off, length);
2139 1.1.1.2 mrg else if (dst_devicep == NULL)
2140 1.1.1.2.2.1 pgoyette ret = src_devicep->dev2host_func (src_devicep->target_id,
2141 1.1.1.2.2.1 pgoyette (char *) dst + dst_off,
2142 1.1.1.2.2.1 pgoyette (char *) src + src_off, length);
2143 1.1.1.2 mrg else if (src_devicep == dst_devicep)
2144 1.1.1.2.2.1 pgoyette ret = src_devicep->dev2dev_func (src_devicep->target_id,
2145 1.1.1.2.2.1 pgoyette (char *) dst + dst_off,
2146 1.1.1.2.2.1 pgoyette (char *) src + src_off, length);
2147 1.1.1.2 mrg else
2148 1.1.1.2.2.1 pgoyette ret = 0;
2149 1.1.1.2.2.1 pgoyette return ret ? 0 : EINVAL;
2150 1.1.1.2 mrg }
2151 1.1.1.2 mrg
2152 1.1.1.2 mrg /* FIXME: it would be nice to have some plugin function to handle
2153 1.1.1.2 mrg num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2154 1.1.1.2 mrg be handled in the generic recursion below, and for host-host it
2155 1.1.1.2 mrg should be used even for any num_dims >= 2. */
2156 1.1.1.2 mrg
2157 1.1.1.2 mrg for (i = 1; i < num_dims; i++)
2158 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2159 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2160 1.1.1.2 mrg return EINVAL;
2161 1.1.1.2 mrg if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2162 1.1.1.2 mrg || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2163 1.1.1.2 mrg return EINVAL;
2164 1.1.1.2 mrg for (j = 0; j < volume[0]; j++)
2165 1.1.1.2 mrg {
2166 1.1.1.2 mrg ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2167 1.1.1.2 mrg (char *) src + src_off,
2168 1.1.1.2 mrg element_size, num_dims - 1,
2169 1.1.1.2 mrg volume + 1, dst_offsets + 1,
2170 1.1.1.2 mrg src_offsets + 1, dst_dimensions + 1,
2171 1.1.1.2 mrg src_dimensions + 1, dst_devicep,
2172 1.1.1.2 mrg src_devicep);
2173 1.1.1.2 mrg if (ret)
2174 1.1.1.2 mrg return ret;
2175 1.1.1.2 mrg dst_off += dst_slice;
2176 1.1.1.2 mrg src_off += src_slice;
2177 1.1.1.2 mrg }
2178 1.1.1.2 mrg return 0;
2179 1.1.1.2 mrg }
2180 1.1.1.2 mrg
2181 1.1.1.2 mrg int
2182 1.1.1.2 mrg omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2183 1.1.1.2 mrg int num_dims, const size_t *volume,
2184 1.1.1.2 mrg const size_t *dst_offsets,
2185 1.1.1.2 mrg const size_t *src_offsets,
2186 1.1.1.2 mrg const size_t *dst_dimensions,
2187 1.1.1.2 mrg const size_t *src_dimensions,
2188 1.1.1.2 mrg int dst_device_num, int src_device_num)
2189 1.1.1.2 mrg {
2190 1.1.1.2 mrg struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2191 1.1.1.2 mrg
2192 1.1.1.2 mrg if (!dst && !src)
2193 1.1.1.2 mrg return INT_MAX;
2194 1.1.1.2 mrg
2195 1.1.1.2 mrg if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2196 1.1.1.2 mrg {
2197 1.1.1.2 mrg if (dst_device_num < 0)
2198 1.1.1.2 mrg return EINVAL;
2199 1.1.1.2 mrg
2200 1.1.1.2 mrg dst_devicep = resolve_device (dst_device_num);
2201 1.1.1.2 mrg if (dst_devicep == NULL)
2202 1.1.1.2 mrg return EINVAL;
2203 1.1.1.2 mrg
2204 1.1.1.2 mrg if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2205 1.1.1.2 mrg || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2206 1.1.1.2 mrg dst_devicep = NULL;
2207 1.1.1.2 mrg }
2208 1.1.1.2 mrg if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2209 1.1.1.2 mrg {
2210 1.1.1.2 mrg if (src_device_num < 0)
2211 1.1.1.2 mrg return EINVAL;
2212 1.1.1.2 mrg
2213 1.1.1.2 mrg src_devicep = resolve_device (src_device_num);
2214 1.1.1.2 mrg if (src_devicep == NULL)
2215 1.1.1.2 mrg return EINVAL;
2216 1.1.1.2 mrg
2217 1.1.1.2 mrg if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2218 1.1.1.2 mrg || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2219 1.1.1.2 mrg src_devicep = NULL;
2220 1.1.1.2 mrg }
2221 1.1.1.2 mrg
2222 1.1.1.2 mrg if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2223 1.1.1.2 mrg return EINVAL;
2224 1.1.1.2 mrg
2225 1.1.1.2 mrg if (src_devicep)
2226 1.1.1.2 mrg gomp_mutex_lock (&src_devicep->lock);
2227 1.1.1.2 mrg else if (dst_devicep)
2228 1.1.1.2 mrg gomp_mutex_lock (&dst_devicep->lock);
2229 1.1.1.2 mrg int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2230 1.1.1.2 mrg volume, dst_offsets, src_offsets,
2231 1.1.1.2 mrg dst_dimensions, src_dimensions,
2232 1.1.1.2 mrg dst_devicep, src_devicep);
2233 1.1.1.2 mrg if (src_devicep)
2234 1.1.1.2 mrg gomp_mutex_unlock (&src_devicep->lock);
2235 1.1.1.2 mrg else if (dst_devicep)
2236 1.1.1.2 mrg gomp_mutex_unlock (&dst_devicep->lock);
2237 1.1.1.2 mrg return ret;
2238 1.1.1.2 mrg }
2239 1.1.1.2 mrg
2240 1.1.1.2 mrg int
2241 1.1.1.2 mrg omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2242 1.1.1.2 mrg size_t device_offset, int device_num)
2243 1.1.1.2 mrg {
2244 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2245 1.1.1.2 mrg return EINVAL;
2246 1.1.1.2 mrg
2247 1.1.1.2 mrg if (device_num < 0)
2248 1.1.1.2 mrg return EINVAL;
2249 1.1.1.2 mrg
2250 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2251 1.1.1.2 mrg if (devicep == NULL)
2252 1.1.1.2 mrg return EINVAL;
2253 1.1.1.2 mrg
2254 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2255 1.1.1.2 mrg || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2256 1.1.1.2 mrg return EINVAL;
2257 1.1.1.2 mrg
2258 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2259 1.1.1.2 mrg
2260 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2261 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2262 1.1.1.2 mrg int ret = EINVAL;
2263 1.1.1.2 mrg
2264 1.1.1.2 mrg cur_node.host_start = (uintptr_t) host_ptr;
2265 1.1.1.2 mrg cur_node.host_end = cur_node.host_start + size;
2266 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2267 1.1.1.2 mrg if (n)
2268 1.1.1.2 mrg {
2269 1.1.1.2 mrg if (n->tgt->tgt_start + n->tgt_offset
2270 1.1.1.2 mrg == (uintptr_t) device_ptr + device_offset
2271 1.1.1.2 mrg && n->host_start <= cur_node.host_start
2272 1.1.1.2 mrg && n->host_end >= cur_node.host_end)
2273 1.1.1.2 mrg ret = 0;
2274 1.1.1.2 mrg }
2275 1.1.1.2 mrg else
2276 1.1.1.2 mrg {
2277 1.1.1.2 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2278 1.1.1.2 mrg tgt->array = gomp_malloc (sizeof (*tgt->array));
2279 1.1.1.2 mrg tgt->refcount = 1;
2280 1.1.1.2 mrg tgt->tgt_start = 0;
2281 1.1.1.2 mrg tgt->tgt_end = 0;
2282 1.1.1.2 mrg tgt->to_free = NULL;
2283 1.1.1.2 mrg tgt->prev = NULL;
2284 1.1.1.2 mrg tgt->list_count = 0;
2285 1.1.1.2 mrg tgt->device_descr = devicep;
2286 1.1.1.2 mrg splay_tree_node array = tgt->array;
2287 1.1.1.2 mrg splay_tree_key k = &array->key;
2288 1.1.1.2 mrg k->host_start = cur_node.host_start;
2289 1.1.1.2 mrg k->host_end = cur_node.host_end;
2290 1.1.1.2 mrg k->tgt = tgt;
2291 1.1.1.2 mrg k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2292 1.1.1.2 mrg k->refcount = REFCOUNT_INFINITY;
2293 1.1.1.2 mrg array->left = NULL;
2294 1.1.1.2 mrg array->right = NULL;
2295 1.1.1.2 mrg splay_tree_insert (&devicep->mem_map, array);
2296 1.1.1.2 mrg ret = 0;
2297 1.1.1.2 mrg }
2298 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2299 1.1.1.2 mrg return ret;
2300 1.1.1.2 mrg }
2301 1.1.1.2 mrg
2302 1.1.1.2 mrg int
2303 1.1.1.2 mrg omp_target_disassociate_ptr (void *ptr, int device_num)
2304 1.1.1.2 mrg {
2305 1.1.1.2 mrg if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2306 1.1.1.2 mrg return EINVAL;
2307 1.1.1.2 mrg
2308 1.1.1.2 mrg if (device_num < 0)
2309 1.1.1.2 mrg return EINVAL;
2310 1.1.1.2 mrg
2311 1.1.1.2 mrg struct gomp_device_descr *devicep = resolve_device (device_num);
2312 1.1.1.2 mrg if (devicep == NULL)
2313 1.1.1.2 mrg return EINVAL;
2314 1.1.1.2 mrg
2315 1.1.1.2 mrg if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2316 1.1.1.2 mrg return EINVAL;
2317 1.1.1.2 mrg
2318 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2319 1.1.1.2 mrg
2320 1.1.1.2 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
2321 1.1.1.2 mrg struct splay_tree_key_s cur_node;
2322 1.1.1.2 mrg int ret = EINVAL;
2323 1.1.1.2 mrg
2324 1.1.1.2 mrg cur_node.host_start = (uintptr_t) ptr;
2325 1.1.1.2 mrg cur_node.host_end = cur_node.host_start;
2326 1.1.1.2 mrg splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2327 1.1.1.2 mrg if (n
2328 1.1.1.2 mrg && n->host_start == cur_node.host_start
2329 1.1.1.2 mrg && n->refcount == REFCOUNT_INFINITY
2330 1.1.1.2 mrg && n->tgt->tgt_start == 0
2331 1.1.1.2 mrg && n->tgt->to_free == NULL
2332 1.1.1.2 mrg && n->tgt->refcount == 1
2333 1.1.1.2 mrg && n->tgt->list_count == 0)
2334 1.1.1.2 mrg {
2335 1.1.1.2 mrg splay_tree_remove (&devicep->mem_map, n);
2336 1.1.1.2 mrg gomp_unmap_tgt (n->tgt);
2337 1.1.1.2 mrg ret = 0;
2338 1.1.1.2 mrg }
2339 1.1.1.2 mrg
2340 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2341 1.1.1.2 mrg return ret;
2342 1.1.1.2 mrg }
2343 1.1.1.2 mrg
2344 1.1 mrg #ifdef PLUGIN_SUPPORT
2345 1.1 mrg
2346 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2347 1.1 mrg in PLUGIN_NAME.
2348 1.1 mrg The handles of the found functions are stored in the corresponding fields
2349 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2350 1.1 mrg
2351 1.1 mrg static bool
2352 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
2353 1.1 mrg const char *plugin_name)
2354 1.1 mrg {
2355 1.1 mrg const char *err = NULL, *last_missing = NULL;
2356 1.1 mrg
2357 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2358 1.1 mrg if (!plugin_handle)
2359 1.1.1.2 mrg goto dl_fail;
2360 1.1 mrg
2361 1.1 mrg /* Check if all required functions are available in the plugin and store
2362 1.1.1.2 mrg their handlers. None of the symbols can legitimately be NULL,
2363 1.1.1.2 mrg so we don't need to check dlerror all the time. */
2364 1.1 mrg #define DLSYM(f) \
2365 1.1.1.2 mrg if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2366 1.1.1.2 mrg goto dl_fail
2367 1.1.1.2 mrg /* Similar, but missing functions are not an error. Return false if
2368 1.1.1.2 mrg failed, true otherwise. */
2369 1.1.1.2 mrg #define DLSYM_OPT(f, n) \
2370 1.1.1.2 mrg ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2371 1.1.1.2 mrg || (last_missing = #n, 0))
2372 1.1.1.2 mrg
2373 1.1.1.2 mrg DLSYM (version);
2374 1.1.1.2 mrg if (device->version_func () != GOMP_VERSION)
2375 1.1.1.2 mrg {
2376 1.1.1.2 mrg err = "plugin version mismatch";
2377 1.1.1.2 mrg goto fail;
2378 1.1.1.2 mrg }
2379 1.1 mrg
2380 1.1 mrg DLSYM (get_name);
2381 1.1 mrg DLSYM (get_caps);
2382 1.1 mrg DLSYM (get_type);
2383 1.1 mrg DLSYM (get_num_devices);
2384 1.1 mrg DLSYM (init_device);
2385 1.1 mrg DLSYM (fini_device);
2386 1.1 mrg DLSYM (load_image);
2387 1.1 mrg DLSYM (unload_image);
2388 1.1 mrg DLSYM (alloc);
2389 1.1 mrg DLSYM (free);
2390 1.1 mrg DLSYM (dev2host);
2391 1.1 mrg DLSYM (host2dev);
2392 1.1 mrg device->capabilities = device->get_caps_func ();
2393 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2394 1.1.1.2 mrg {
2395 1.1.1.2 mrg DLSYM (run);
2396 1.1.1.2 mrg DLSYM (async_run);
2397 1.1.1.2 mrg DLSYM_OPT (can_run, can_run);
2398 1.1.1.2 mrg DLSYM (dev2dev);
2399 1.1.1.2 mrg }
2400 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2401 1.1 mrg {
2402 1.1.1.2.2.1 pgoyette if (!DLSYM_OPT (openacc.exec, openacc_exec)
2403 1.1.1.2 mrg || !DLSYM_OPT (openacc.register_async_cleanup,
2404 1.1.1.2 mrg openacc_register_async_cleanup)
2405 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2406 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2407 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2408 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2409 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2410 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_wait_all_async,
2411 1.1.1.2 mrg openacc_async_wait_all_async)
2412 1.1.1.2 mrg || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2413 1.1.1.2 mrg || !DLSYM_OPT (openacc.create_thread_data,
2414 1.1.1.2 mrg openacc_create_thread_data)
2415 1.1.1.2 mrg || !DLSYM_OPT (openacc.destroy_thread_data,
2416 1.1.1.2 mrg openacc_destroy_thread_data))
2417 1.1 mrg {
2418 1.1.1.2 mrg /* Require all the OpenACC handlers if we have
2419 1.1.1.2 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
2420 1.1 mrg err = "plugin missing OpenACC handler function";
2421 1.1.1.2 mrg goto fail;
2422 1.1 mrg }
2423 1.1.1.2 mrg
2424 1.1.1.2 mrg unsigned cuda = 0;
2425 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2426 1.1.1.2.2.1 pgoyette openacc_cuda_get_current_device);
2427 1.1.1.2 mrg cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2428 1.1.1.2.2.1 pgoyette openacc_cuda_get_current_context);
2429 1.1.1.2.2.1 pgoyette cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2430 1.1.1.2.2.1 pgoyette cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2431 1.1.1.2 mrg if (cuda && cuda != 4)
2432 1.1 mrg {
2433 1.1.1.2 mrg /* Make sure all the CUDA functions are there if any of them are. */
2434 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
2435 1.1.1.2 mrg goto fail;
2436 1.1 mrg }
2437 1.1 mrg }
2438 1.1 mrg #undef DLSYM
2439 1.1 mrg #undef DLSYM_OPT
2440 1.1 mrg
2441 1.1.1.2 mrg return 1;
2442 1.1.1.2 mrg
2443 1.1.1.2 mrg dl_fail:
2444 1.1.1.2 mrg err = dlerror ();
2445 1.1.1.2 mrg fail:
2446 1.1.1.2 mrg gomp_error ("while loading %s: %s", plugin_name, err);
2447 1.1.1.2 mrg if (last_missing)
2448 1.1.1.2 mrg gomp_error ("missing function was %s", last_missing);
2449 1.1.1.2 mrg if (plugin_handle)
2450 1.1.1.2 mrg dlclose (plugin_handle);
2451 1.1.1.2 mrg
2452 1.1.1.2 mrg return 0;
2453 1.1.1.2 mrg }
2454 1.1.1.2 mrg
2455 1.1.1.2 mrg /* This function finalizes all initialized devices. */
2456 1.1.1.2 mrg
2457 1.1.1.2 mrg static void
2458 1.1.1.2 mrg gomp_target_fini (void)
2459 1.1.1.2 mrg {
2460 1.1.1.2 mrg int i;
2461 1.1.1.2 mrg for (i = 0; i < num_devices; i++)
2462 1.1 mrg {
2463 1.1.1.2.2.1 pgoyette bool ret = true;
2464 1.1.1.2 mrg struct gomp_device_descr *devicep = &devices[i];
2465 1.1.1.2 mrg gomp_mutex_lock (&devicep->lock);
2466 1.1.1.2 mrg if (devicep->state == GOMP_DEVICE_INITIALIZED)
2467 1.1.1.2 mrg {
2468 1.1.1.2.2.1 pgoyette ret = devicep->fini_device_func (devicep->target_id);
2469 1.1.1.2 mrg devicep->state = GOMP_DEVICE_FINALIZED;
2470 1.1.1.2 mrg }
2471 1.1.1.2 mrg gomp_mutex_unlock (&devicep->lock);
2472 1.1.1.2.2.1 pgoyette if (!ret)
2473 1.1.1.2.2.1 pgoyette gomp_fatal ("device finalization failed");
2474 1.1 mrg }
2475 1.1 mrg }
2476 1.1 mrg
2477 1.1 mrg /* This function initializes the runtime needed for offloading.
2478 1.1 mrg It parses the list of offload targets and tries to load the plugins for
2479 1.1 mrg these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2480 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
2481 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2482 1.1 mrg by the others. */
2483 1.1 mrg
2484 1.1 mrg static void
2485 1.1 mrg gomp_target_init (void)
2486 1.1 mrg {
2487 1.1 mrg const char *prefix ="libgomp-plugin-";
2488 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
2489 1.1 mrg const char *cur, *next;
2490 1.1 mrg char *plugin_name;
2491 1.1 mrg int i, new_num_devices;
2492 1.1 mrg
2493 1.1 mrg num_devices = 0;
2494 1.1 mrg devices = NULL;
2495 1.1 mrg
2496 1.1 mrg cur = OFFLOAD_TARGETS;
2497 1.1 mrg if (*cur)
2498 1.1 mrg do
2499 1.1 mrg {
2500 1.1 mrg struct gomp_device_descr current_device;
2501 1.1 mrg
2502 1.1 mrg next = strchr (cur, ',');
2503 1.1 mrg
2504 1.1 mrg plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2505 1.1 mrg + strlen (prefix) + strlen (suffix));
2506 1.1 mrg if (!plugin_name)
2507 1.1 mrg {
2508 1.1 mrg num_devices = 0;
2509 1.1 mrg break;
2510 1.1 mrg }
2511 1.1 mrg
2512 1.1 mrg strcpy (plugin_name, prefix);
2513 1.1 mrg strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2514 1.1 mrg strcat (plugin_name, suffix);
2515 1.1 mrg
2516 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
2517 1.1 mrg {
2518 1.1 mrg new_num_devices = current_device.get_num_devices_func ();
2519 1.1 mrg if (new_num_devices >= 1)
2520 1.1 mrg {
2521 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
2522 1.1 mrg
2523 1.1 mrg devices = realloc (devices, (num_devices + new_num_devices)
2524 1.1 mrg * sizeof (struct gomp_device_descr));
2525 1.1 mrg if (!devices)
2526 1.1 mrg {
2527 1.1 mrg num_devices = 0;
2528 1.1 mrg free (plugin_name);
2529 1.1 mrg break;
2530 1.1 mrg }
2531 1.1 mrg
2532 1.1 mrg current_device.name = current_device.get_name_func ();
2533 1.1 mrg /* current_device.capabilities has already been set. */
2534 1.1 mrg current_device.type = current_device.get_type_func ();
2535 1.1 mrg current_device.mem_map.root = NULL;
2536 1.1.1.2 mrg current_device.state = GOMP_DEVICE_UNINITIALIZED;
2537 1.1 mrg current_device.openacc.data_environ = NULL;
2538 1.1 mrg for (i = 0; i < new_num_devices; i++)
2539 1.1 mrg {
2540 1.1 mrg current_device.target_id = i;
2541 1.1 mrg devices[num_devices] = current_device;
2542 1.1 mrg gomp_mutex_init (&devices[num_devices].lock);
2543 1.1 mrg num_devices++;
2544 1.1 mrg }
2545 1.1 mrg }
2546 1.1 mrg }
2547 1.1 mrg
2548 1.1 mrg free (plugin_name);
2549 1.1 mrg cur = next + 1;
2550 1.1 mrg }
2551 1.1 mrg while (next);
2552 1.1 mrg
2553 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2554 1.1 mrg NUM_DEVICES_OPENMP. */
2555 1.1 mrg struct gomp_device_descr *devices_s
2556 1.1 mrg = malloc (num_devices * sizeof (struct gomp_device_descr));
2557 1.1 mrg if (!devices_s)
2558 1.1 mrg {
2559 1.1 mrg num_devices = 0;
2560 1.1 mrg free (devices);
2561 1.1 mrg devices = NULL;
2562 1.1 mrg }
2563 1.1 mrg num_devices_openmp = 0;
2564 1.1 mrg for (i = 0; i < num_devices; i++)
2565 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2566 1.1 mrg devices_s[num_devices_openmp++] = devices[i];
2567 1.1 mrg int num_devices_after_openmp = num_devices_openmp;
2568 1.1 mrg for (i = 0; i < num_devices; i++)
2569 1.1 mrg if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2570 1.1 mrg devices_s[num_devices_after_openmp++] = devices[i];
2571 1.1 mrg free (devices);
2572 1.1 mrg devices = devices_s;
2573 1.1 mrg
2574 1.1 mrg for (i = 0; i < num_devices; i++)
2575 1.1 mrg {
2576 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
2577 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
2578 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
2579 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2580 1.1 mrg goacc_register (&devices[i]);
2581 1.1 mrg }
2582 1.1.1.2 mrg
2583 1.1.1.2 mrg if (atexit (gomp_target_fini) != 0)
2584 1.1.1.2 mrg gomp_fatal ("atexit failed");
2585 1.1 mrg }
2586 1.1 mrg
2587 1.1 mrg #else /* PLUGIN_SUPPORT */
2588 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
2589 1.1 mrg GOMP_target* routines are just stubs for this case. */
2590 1.1 mrg static void
2591 1.1 mrg gomp_target_init (void)
2592 1.1 mrg {
2593 1.1 mrg }
2594 1.1 mrg #endif /* PLUGIN_SUPPORT */
2595