target.c revision 1.1.1.1 1 1.1 mrg /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file contains the support of offloading. */
27 1.1 mrg
28 1.1 mrg #include "config.h"
29 1.1 mrg #include "libgomp.h"
30 1.1 mrg #include "oacc-plugin.h"
31 1.1 mrg #include "oacc-int.h"
32 1.1 mrg #include "gomp-constants.h"
33 1.1 mrg #include <limits.h>
34 1.1 mrg #include <stdbool.h>
35 1.1 mrg #include <stdlib.h>
36 1.1 mrg #ifdef HAVE_INTTYPES_H
37 1.1 mrg # include <inttypes.h> /* For PRIu64. */
38 1.1 mrg #endif
39 1.1 mrg #include <string.h>
40 1.1 mrg #include <assert.h>
41 1.1 mrg
42 1.1 mrg #ifdef PLUGIN_SUPPORT
43 1.1 mrg #include <dlfcn.h>
44 1.1 mrg #include "plugin-suffix.h"
45 1.1 mrg #endif
46 1.1 mrg
47 1.1 mrg static void gomp_target_init (void);
48 1.1 mrg
49 1.1 mrg /* The whole initialization code for offloading plugins is only run one. */
50 1.1 mrg static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
51 1.1 mrg
52 1.1 mrg /* Mutex for offload image registration. */
53 1.1 mrg static gomp_mutex_t register_lock;
54 1.1 mrg
55 1.1 mrg /* This structure describes an offload image.
56 1.1 mrg It contains type of the target device, pointer to host table descriptor, and
57 1.1 mrg pointer to target data. */
58 1.1 mrg struct offload_image_descr {
59 1.1 mrg enum offload_target_type type;
60 1.1 mrg void *host_table;
61 1.1 mrg void *target_data;
62 1.1 mrg };
63 1.1 mrg
64 1.1 mrg /* Array of descriptors of offload images. */
65 1.1 mrg static struct offload_image_descr *offload_images;
66 1.1 mrg
67 1.1 mrg /* Total number of offload images. */
68 1.1 mrg static int num_offload_images;
69 1.1 mrg
70 1.1 mrg /* Array of descriptors for all available devices. */
71 1.1 mrg static struct gomp_device_descr *devices;
72 1.1 mrg
73 1.1 mrg /* Total number of available devices. */
74 1.1 mrg static int num_devices;
75 1.1 mrg
76 1.1 mrg /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
77 1.1 mrg static int num_devices_openmp;
78 1.1 mrg
79 1.1 mrg /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
80 1.1 mrg
81 1.1 mrg static void *
82 1.1 mrg gomp_realloc_unlock (void *old, size_t size)
83 1.1 mrg {
84 1.1 mrg void *ret = realloc (old, size);
85 1.1 mrg if (ret == NULL)
86 1.1 mrg {
87 1.1 mrg gomp_mutex_unlock (®ister_lock);
88 1.1 mrg gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
89 1.1 mrg }
90 1.1 mrg return ret;
91 1.1 mrg }
92 1.1 mrg
93 1.1 mrg /* The comparison function. */
94 1.1 mrg
95 1.1 mrg attribute_hidden int
96 1.1 mrg splay_compare (splay_tree_key x, splay_tree_key y)
97 1.1 mrg {
98 1.1 mrg if (x->host_start == x->host_end
99 1.1 mrg && y->host_start == y->host_end)
100 1.1 mrg return 0;
101 1.1 mrg if (x->host_end <= y->host_start)
102 1.1 mrg return -1;
103 1.1 mrg if (x->host_start >= y->host_end)
104 1.1 mrg return 1;
105 1.1 mrg return 0;
106 1.1 mrg }
107 1.1 mrg
108 1.1 mrg #include "splay-tree.h"
109 1.1 mrg
110 1.1 mrg attribute_hidden void
111 1.1 mrg gomp_init_targets_once (void)
112 1.1 mrg {
113 1.1 mrg (void) pthread_once (&gomp_is_initialized, gomp_target_init);
114 1.1 mrg }
115 1.1 mrg
116 1.1 mrg attribute_hidden int
117 1.1 mrg gomp_get_num_devices (void)
118 1.1 mrg {
119 1.1 mrg gomp_init_targets_once ();
120 1.1 mrg return num_devices_openmp;
121 1.1 mrg }
122 1.1 mrg
123 1.1 mrg static struct gomp_device_descr *
124 1.1 mrg resolve_device (int device_id)
125 1.1 mrg {
126 1.1 mrg if (device_id == GOMP_DEVICE_ICV)
127 1.1 mrg {
128 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
129 1.1 mrg device_id = icv->default_device_var;
130 1.1 mrg }
131 1.1 mrg
132 1.1 mrg if (device_id < 0 || device_id >= gomp_get_num_devices ())
133 1.1 mrg return NULL;
134 1.1 mrg
135 1.1 mrg return &devices[device_id];
136 1.1 mrg }
137 1.1 mrg
138 1.1 mrg
139 1.1 mrg /* Handle the case where splay_tree_lookup found oldn for newn.
140 1.1 mrg Helper function of gomp_map_vars. */
141 1.1 mrg
142 1.1 mrg static inline void
143 1.1 mrg gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
144 1.1 mrg splay_tree_key newn, unsigned char kind)
145 1.1 mrg {
146 1.1 mrg if ((kind & GOMP_MAP_FLAG_FORCE)
147 1.1 mrg || oldn->host_start > newn->host_start
148 1.1 mrg || oldn->host_end < newn->host_end)
149 1.1 mrg {
150 1.1 mrg gomp_mutex_unlock (&devicep->lock);
151 1.1 mrg gomp_fatal ("Trying to map into device [%p..%p) object when "
152 1.1 mrg "[%p..%p) is already mapped",
153 1.1 mrg (void *) newn->host_start, (void *) newn->host_end,
154 1.1 mrg (void *) oldn->host_start, (void *) oldn->host_end);
155 1.1 mrg }
156 1.1 mrg oldn->refcount++;
157 1.1 mrg }
158 1.1 mrg
159 1.1 mrg static int
160 1.1 mrg get_kind (bool is_openacc, void *kinds, int idx)
161 1.1 mrg {
162 1.1 mrg return is_openacc ? ((unsigned short *) kinds)[idx]
163 1.1 mrg : ((unsigned char *) kinds)[idx];
164 1.1 mrg }
165 1.1 mrg
166 1.1 mrg attribute_hidden struct target_mem_desc *
167 1.1 mrg gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
168 1.1 mrg void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
169 1.1 mrg bool is_openacc, bool is_target)
170 1.1 mrg {
171 1.1 mrg size_t i, tgt_align, tgt_size, not_found_cnt = 0;
172 1.1 mrg const int rshift = is_openacc ? 8 : 3;
173 1.1 mrg const int typemask = is_openacc ? 0xff : 0x7;
174 1.1 mrg struct splay_tree_s *mem_map = &devicep->mem_map;
175 1.1 mrg struct splay_tree_key_s cur_node;
176 1.1 mrg struct target_mem_desc *tgt
177 1.1 mrg = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
178 1.1 mrg tgt->list_count = mapnum;
179 1.1 mrg tgt->refcount = 1;
180 1.1 mrg tgt->device_descr = devicep;
181 1.1 mrg
182 1.1 mrg if (mapnum == 0)
183 1.1 mrg return tgt;
184 1.1 mrg
185 1.1 mrg tgt_align = sizeof (void *);
186 1.1 mrg tgt_size = 0;
187 1.1 mrg if (is_target)
188 1.1 mrg {
189 1.1 mrg size_t align = 4 * sizeof (void *);
190 1.1 mrg tgt_align = align;
191 1.1 mrg tgt_size = mapnum * sizeof (void *);
192 1.1 mrg }
193 1.1 mrg
194 1.1 mrg gomp_mutex_lock (&devicep->lock);
195 1.1 mrg
196 1.1 mrg for (i = 0; i < mapnum; i++)
197 1.1 mrg {
198 1.1 mrg int kind = get_kind (is_openacc, kinds, i);
199 1.1 mrg if (hostaddrs[i] == NULL)
200 1.1 mrg {
201 1.1 mrg tgt->list[i] = NULL;
202 1.1 mrg continue;
203 1.1 mrg }
204 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
205 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
206 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
207 1.1 mrg else
208 1.1 mrg cur_node.host_end = cur_node.host_start + sizeof (void *);
209 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
210 1.1 mrg if (n)
211 1.1 mrg {
212 1.1 mrg tgt->list[i] = n;
213 1.1 mrg gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
214 1.1 mrg }
215 1.1 mrg else
216 1.1 mrg {
217 1.1 mrg tgt->list[i] = NULL;
218 1.1 mrg
219 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
220 1.1 mrg not_found_cnt++;
221 1.1 mrg if (tgt_align < align)
222 1.1 mrg tgt_align = align;
223 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
224 1.1 mrg tgt_size += cur_node.host_end - cur_node.host_start;
225 1.1 mrg if ((kind & typemask) == GOMP_MAP_TO_PSET)
226 1.1 mrg {
227 1.1 mrg size_t j;
228 1.1 mrg for (j = i + 1; j < mapnum; j++)
229 1.1 mrg if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
230 1.1 mrg & typemask))
231 1.1 mrg break;
232 1.1 mrg else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
233 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
234 1.1 mrg > cur_node.host_end))
235 1.1 mrg break;
236 1.1 mrg else
237 1.1 mrg {
238 1.1 mrg tgt->list[j] = NULL;
239 1.1 mrg i++;
240 1.1 mrg }
241 1.1 mrg }
242 1.1 mrg }
243 1.1 mrg }
244 1.1 mrg
245 1.1 mrg if (devaddrs)
246 1.1 mrg {
247 1.1 mrg if (mapnum != 1)
248 1.1 mrg {
249 1.1 mrg gomp_mutex_unlock (&devicep->lock);
250 1.1 mrg gomp_fatal ("unexpected aggregation");
251 1.1 mrg }
252 1.1 mrg tgt->to_free = devaddrs[0];
253 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
254 1.1 mrg tgt->tgt_end = tgt->tgt_start + sizes[0];
255 1.1 mrg }
256 1.1 mrg else if (not_found_cnt || is_target)
257 1.1 mrg {
258 1.1 mrg /* Allocate tgt_align aligned tgt_size block of memory. */
259 1.1 mrg /* FIXME: Perhaps change interface to allocate properly aligned
260 1.1 mrg memory. */
261 1.1 mrg tgt->to_free = devicep->alloc_func (devicep->target_id,
262 1.1 mrg tgt_size + tgt_align - 1);
263 1.1 mrg tgt->tgt_start = (uintptr_t) tgt->to_free;
264 1.1 mrg tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
265 1.1 mrg tgt->tgt_end = tgt->tgt_start + tgt_size;
266 1.1 mrg }
267 1.1 mrg else
268 1.1 mrg {
269 1.1 mrg tgt->to_free = NULL;
270 1.1 mrg tgt->tgt_start = 0;
271 1.1 mrg tgt->tgt_end = 0;
272 1.1 mrg }
273 1.1 mrg
274 1.1 mrg tgt_size = 0;
275 1.1 mrg if (is_target)
276 1.1 mrg tgt_size = mapnum * sizeof (void *);
277 1.1 mrg
278 1.1 mrg tgt->array = NULL;
279 1.1 mrg if (not_found_cnt)
280 1.1 mrg {
281 1.1 mrg tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
282 1.1 mrg splay_tree_node array = tgt->array;
283 1.1 mrg size_t j;
284 1.1 mrg
285 1.1 mrg for (i = 0; i < mapnum; i++)
286 1.1 mrg if (tgt->list[i] == NULL)
287 1.1 mrg {
288 1.1 mrg int kind = get_kind (is_openacc, kinds, i);
289 1.1 mrg if (hostaddrs[i] == NULL)
290 1.1 mrg continue;
291 1.1 mrg splay_tree_key k = &array->key;
292 1.1 mrg k->host_start = (uintptr_t) hostaddrs[i];
293 1.1 mrg if (!GOMP_MAP_POINTER_P (kind & typemask))
294 1.1 mrg k->host_end = k->host_start + sizes[i];
295 1.1 mrg else
296 1.1 mrg k->host_end = k->host_start + sizeof (void *);
297 1.1 mrg splay_tree_key n = splay_tree_lookup (mem_map, k);
298 1.1 mrg if (n)
299 1.1 mrg {
300 1.1 mrg tgt->list[i] = n;
301 1.1 mrg gomp_map_vars_existing (devicep, n, k, kind & typemask);
302 1.1 mrg }
303 1.1 mrg else
304 1.1 mrg {
305 1.1 mrg size_t align = (size_t) 1 << (kind >> rshift);
306 1.1 mrg tgt->list[i] = k;
307 1.1 mrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
308 1.1 mrg k->tgt = tgt;
309 1.1 mrg k->tgt_offset = tgt_size;
310 1.1 mrg tgt_size += k->host_end - k->host_start;
311 1.1 mrg k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
312 1.1 mrg k->refcount = 1;
313 1.1 mrg k->async_refcount = 0;
314 1.1 mrg tgt->refcount++;
315 1.1 mrg array->left = NULL;
316 1.1 mrg array->right = NULL;
317 1.1 mrg splay_tree_insert (mem_map, array);
318 1.1 mrg switch (kind & typemask)
319 1.1 mrg {
320 1.1 mrg case GOMP_MAP_ALLOC:
321 1.1 mrg case GOMP_MAP_FROM:
322 1.1 mrg case GOMP_MAP_FORCE_ALLOC:
323 1.1 mrg case GOMP_MAP_FORCE_FROM:
324 1.1 mrg break;
325 1.1 mrg case GOMP_MAP_TO:
326 1.1 mrg case GOMP_MAP_TOFROM:
327 1.1 mrg case GOMP_MAP_FORCE_TO:
328 1.1 mrg case GOMP_MAP_FORCE_TOFROM:
329 1.1 mrg /* FIXME: Perhaps add some smarts, like if copying
330 1.1 mrg several adjacent fields from host to target, use some
331 1.1 mrg host buffer to avoid sending each var individually. */
332 1.1 mrg devicep->host2dev_func (devicep->target_id,
333 1.1 mrg (void *) (tgt->tgt_start
334 1.1 mrg + k->tgt_offset),
335 1.1 mrg (void *) k->host_start,
336 1.1 mrg k->host_end - k->host_start);
337 1.1 mrg break;
338 1.1 mrg case GOMP_MAP_POINTER:
339 1.1 mrg cur_node.host_start
340 1.1 mrg = (uintptr_t) *(void **) k->host_start;
341 1.1 mrg if (cur_node.host_start == (uintptr_t) NULL)
342 1.1 mrg {
343 1.1 mrg cur_node.tgt_offset = (uintptr_t) NULL;
344 1.1 mrg /* FIXME: see above FIXME comment. */
345 1.1 mrg devicep->host2dev_func (devicep->target_id,
346 1.1 mrg (void *) (tgt->tgt_start
347 1.1 mrg + k->tgt_offset),
348 1.1 mrg (void *) &cur_node.tgt_offset,
349 1.1 mrg sizeof (void *));
350 1.1 mrg break;
351 1.1 mrg }
352 1.1 mrg /* Add bias to the pointer value. */
353 1.1 mrg cur_node.host_start += sizes[i];
354 1.1 mrg cur_node.host_end = cur_node.host_start + 1;
355 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
356 1.1 mrg if (n == NULL)
357 1.1 mrg {
358 1.1 mrg /* Could be possibly zero size array section. */
359 1.1 mrg cur_node.host_end--;
360 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
361 1.1 mrg if (n == NULL)
362 1.1 mrg {
363 1.1 mrg cur_node.host_start--;
364 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
365 1.1 mrg cur_node.host_start++;
366 1.1 mrg }
367 1.1 mrg }
368 1.1 mrg if (n == NULL)
369 1.1 mrg {
370 1.1 mrg gomp_mutex_unlock (&devicep->lock);
371 1.1 mrg gomp_fatal ("Pointer target of array section "
372 1.1 mrg "wasn't mapped");
373 1.1 mrg }
374 1.1 mrg cur_node.host_start -= n->host_start;
375 1.1 mrg cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
376 1.1 mrg + cur_node.host_start;
377 1.1 mrg /* At this point tgt_offset is target address of the
378 1.1 mrg array section. Now subtract bias to get what we want
379 1.1 mrg to initialize the pointer with. */
380 1.1 mrg cur_node.tgt_offset -= sizes[i];
381 1.1 mrg /* FIXME: see above FIXME comment. */
382 1.1 mrg devicep->host2dev_func (devicep->target_id,
383 1.1 mrg (void *) (tgt->tgt_start
384 1.1 mrg + k->tgt_offset),
385 1.1 mrg (void *) &cur_node.tgt_offset,
386 1.1 mrg sizeof (void *));
387 1.1 mrg break;
388 1.1 mrg case GOMP_MAP_TO_PSET:
389 1.1 mrg /* FIXME: see above FIXME comment. */
390 1.1 mrg devicep->host2dev_func (devicep->target_id,
391 1.1 mrg (void *) (tgt->tgt_start
392 1.1 mrg + k->tgt_offset),
393 1.1 mrg (void *) k->host_start,
394 1.1 mrg k->host_end - k->host_start);
395 1.1 mrg
396 1.1 mrg for (j = i + 1; j < mapnum; j++)
397 1.1 mrg if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
398 1.1 mrg & typemask))
399 1.1 mrg break;
400 1.1 mrg else if ((uintptr_t) hostaddrs[j] < k->host_start
401 1.1 mrg || ((uintptr_t) hostaddrs[j] + sizeof (void *)
402 1.1 mrg > k->host_end))
403 1.1 mrg break;
404 1.1 mrg else
405 1.1 mrg {
406 1.1 mrg tgt->list[j] = k;
407 1.1 mrg k->refcount++;
408 1.1 mrg cur_node.host_start
409 1.1 mrg = (uintptr_t) *(void **) hostaddrs[j];
410 1.1 mrg if (cur_node.host_start == (uintptr_t) NULL)
411 1.1 mrg {
412 1.1 mrg cur_node.tgt_offset = (uintptr_t) NULL;
413 1.1 mrg /* FIXME: see above FIXME comment. */
414 1.1 mrg devicep->host2dev_func (devicep->target_id,
415 1.1 mrg (void *) (tgt->tgt_start + k->tgt_offset
416 1.1 mrg + ((uintptr_t) hostaddrs[j]
417 1.1 mrg - k->host_start)),
418 1.1 mrg (void *) &cur_node.tgt_offset,
419 1.1 mrg sizeof (void *));
420 1.1 mrg i++;
421 1.1 mrg continue;
422 1.1 mrg }
423 1.1 mrg /* Add bias to the pointer value. */
424 1.1 mrg cur_node.host_start += sizes[j];
425 1.1 mrg cur_node.host_end = cur_node.host_start + 1;
426 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
427 1.1 mrg if (n == NULL)
428 1.1 mrg {
429 1.1 mrg /* Could be possibly zero size array section. */
430 1.1 mrg cur_node.host_end--;
431 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
432 1.1 mrg if (n == NULL)
433 1.1 mrg {
434 1.1 mrg cur_node.host_start--;
435 1.1 mrg n = splay_tree_lookup (mem_map, &cur_node);
436 1.1 mrg cur_node.host_start++;
437 1.1 mrg }
438 1.1 mrg }
439 1.1 mrg if (n == NULL)
440 1.1 mrg {
441 1.1 mrg gomp_mutex_unlock (&devicep->lock);
442 1.1 mrg gomp_fatal ("Pointer target of array section "
443 1.1 mrg "wasn't mapped");
444 1.1 mrg }
445 1.1 mrg cur_node.host_start -= n->host_start;
446 1.1 mrg cur_node.tgt_offset = n->tgt->tgt_start
447 1.1 mrg + n->tgt_offset
448 1.1 mrg + cur_node.host_start;
449 1.1 mrg /* At this point tgt_offset is target address of the
450 1.1 mrg array section. Now subtract bias to get what we
451 1.1 mrg want to initialize the pointer with. */
452 1.1 mrg cur_node.tgt_offset -= sizes[j];
453 1.1 mrg /* FIXME: see above FIXME comment. */
454 1.1 mrg devicep->host2dev_func (devicep->target_id,
455 1.1 mrg (void *) (tgt->tgt_start + k->tgt_offset
456 1.1 mrg + ((uintptr_t) hostaddrs[j]
457 1.1 mrg - k->host_start)),
458 1.1 mrg (void *) &cur_node.tgt_offset,
459 1.1 mrg sizeof (void *));
460 1.1 mrg i++;
461 1.1 mrg }
462 1.1 mrg break;
463 1.1 mrg case GOMP_MAP_FORCE_PRESENT:
464 1.1 mrg {
465 1.1 mrg /* We already looked up the memory region above and it
466 1.1 mrg was missing. */
467 1.1 mrg size_t size = k->host_end - k->host_start;
468 1.1 mrg gomp_mutex_unlock (&devicep->lock);
469 1.1 mrg #ifdef HAVE_INTTYPES_H
470 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
471 1.1 mrg "%"PRIu64" (0x%"PRIx64"))",
472 1.1 mrg (void *) k->host_start,
473 1.1 mrg (uint64_t) size, (uint64_t) size);
474 1.1 mrg #else
475 1.1 mrg gomp_fatal ("present clause: !acc_is_present (%p, "
476 1.1 mrg "%lu (0x%lx))", (void *) k->host_start,
477 1.1 mrg (unsigned long) size, (unsigned long) size);
478 1.1 mrg #endif
479 1.1 mrg }
480 1.1 mrg break;
481 1.1 mrg case GOMP_MAP_FORCE_DEVICEPTR:
482 1.1 mrg assert (k->host_end - k->host_start == sizeof (void *));
483 1.1 mrg
484 1.1 mrg devicep->host2dev_func (devicep->target_id,
485 1.1 mrg (void *) (tgt->tgt_start
486 1.1 mrg + k->tgt_offset),
487 1.1 mrg (void *) k->host_start,
488 1.1 mrg sizeof (void *));
489 1.1 mrg break;
490 1.1 mrg default:
491 1.1 mrg gomp_mutex_unlock (&devicep->lock);
492 1.1 mrg gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
493 1.1 mrg kind);
494 1.1 mrg }
495 1.1 mrg array++;
496 1.1 mrg }
497 1.1 mrg }
498 1.1 mrg }
499 1.1 mrg
500 1.1 mrg if (is_target)
501 1.1 mrg {
502 1.1 mrg for (i = 0; i < mapnum; i++)
503 1.1 mrg {
504 1.1 mrg if (tgt->list[i] == NULL)
505 1.1 mrg cur_node.tgt_offset = (uintptr_t) NULL;
506 1.1 mrg else
507 1.1 mrg cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
508 1.1 mrg + tgt->list[i]->tgt_offset;
509 1.1 mrg /* FIXME: see above FIXME comment. */
510 1.1 mrg devicep->host2dev_func (devicep->target_id,
511 1.1 mrg (void *) (tgt->tgt_start
512 1.1 mrg + i * sizeof (void *)),
513 1.1 mrg (void *) &cur_node.tgt_offset,
514 1.1 mrg sizeof (void *));
515 1.1 mrg }
516 1.1 mrg }
517 1.1 mrg
518 1.1 mrg gomp_mutex_unlock (&devicep->lock);
519 1.1 mrg return tgt;
520 1.1 mrg }
521 1.1 mrg
522 1.1 mrg static void
523 1.1 mrg gomp_unmap_tgt (struct target_mem_desc *tgt)
524 1.1 mrg {
525 1.1 mrg /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
526 1.1 mrg if (tgt->tgt_end)
527 1.1 mrg tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
528 1.1 mrg
529 1.1 mrg free (tgt->array);
530 1.1 mrg free (tgt);
531 1.1 mrg }
532 1.1 mrg
533 1.1 mrg /* Decrease the refcount for a set of mapped variables, and queue asychronous
534 1.1 mrg copies from the device back to the host after any work that has been issued.
535 1.1 mrg Because the regions are still "live", increment an asynchronous reference
536 1.1 mrg count to indicate that they should not be unmapped from host-side data
537 1.1 mrg structures until the asynchronous copy has completed. */
538 1.1 mrg
539 1.1 mrg attribute_hidden void
540 1.1 mrg gomp_copy_from_async (struct target_mem_desc *tgt)
541 1.1 mrg {
542 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
543 1.1 mrg size_t i;
544 1.1 mrg
545 1.1 mrg gomp_mutex_lock (&devicep->lock);
546 1.1 mrg
547 1.1 mrg for (i = 0; i < tgt->list_count; i++)
548 1.1 mrg if (tgt->list[i] == NULL)
549 1.1 mrg ;
550 1.1 mrg else if (tgt->list[i]->refcount > 1)
551 1.1 mrg {
552 1.1 mrg tgt->list[i]->refcount--;
553 1.1 mrg tgt->list[i]->async_refcount++;
554 1.1 mrg }
555 1.1 mrg else
556 1.1 mrg {
557 1.1 mrg splay_tree_key k = tgt->list[i];
558 1.1 mrg if (k->copy_from)
559 1.1 mrg devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560 1.1 mrg (void *) (k->tgt->tgt_start + k->tgt_offset),
561 1.1 mrg k->host_end - k->host_start);
562 1.1 mrg }
563 1.1 mrg
564 1.1 mrg gomp_mutex_unlock (&devicep->lock);
565 1.1 mrg }
566 1.1 mrg
567 1.1 mrg /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
568 1.1 mrg variables back from device to host: if it is false, it is assumed that this
569 1.1 mrg has been done already, i.e. by gomp_copy_from_async above. */
570 1.1 mrg
571 1.1 mrg attribute_hidden void
572 1.1 mrg gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
573 1.1 mrg {
574 1.1 mrg struct gomp_device_descr *devicep = tgt->device_descr;
575 1.1 mrg
576 1.1 mrg if (tgt->list_count == 0)
577 1.1 mrg {
578 1.1 mrg free (tgt);
579 1.1 mrg return;
580 1.1 mrg }
581 1.1 mrg
582 1.1 mrg gomp_mutex_lock (&devicep->lock);
583 1.1 mrg
584 1.1 mrg size_t i;
585 1.1 mrg for (i = 0; i < tgt->list_count; i++)
586 1.1 mrg if (tgt->list[i] == NULL)
587 1.1 mrg ;
588 1.1 mrg else if (tgt->list[i]->refcount > 1)
589 1.1 mrg tgt->list[i]->refcount--;
590 1.1 mrg else if (tgt->list[i]->async_refcount > 0)
591 1.1 mrg tgt->list[i]->async_refcount--;
592 1.1 mrg else
593 1.1 mrg {
594 1.1 mrg splay_tree_key k = tgt->list[i];
595 1.1 mrg if (k->copy_from && do_copyfrom)
596 1.1 mrg devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
597 1.1 mrg (void *) (k->tgt->tgt_start + k->tgt_offset),
598 1.1 mrg k->host_end - k->host_start);
599 1.1 mrg splay_tree_remove (&devicep->mem_map, k);
600 1.1 mrg if (k->tgt->refcount > 1)
601 1.1 mrg k->tgt->refcount--;
602 1.1 mrg else
603 1.1 mrg gomp_unmap_tgt (k->tgt);
604 1.1 mrg }
605 1.1 mrg
606 1.1 mrg if (tgt->refcount > 1)
607 1.1 mrg tgt->refcount--;
608 1.1 mrg else
609 1.1 mrg gomp_unmap_tgt (tgt);
610 1.1 mrg
611 1.1 mrg gomp_mutex_unlock (&devicep->lock);
612 1.1 mrg }
613 1.1 mrg
614 1.1 mrg static void
615 1.1 mrg gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
616 1.1 mrg size_t *sizes, void *kinds, bool is_openacc)
617 1.1 mrg {
618 1.1 mrg size_t i;
619 1.1 mrg struct splay_tree_key_s cur_node;
620 1.1 mrg const int typemask = is_openacc ? 0xff : 0x7;
621 1.1 mrg
622 1.1 mrg if (!devicep)
623 1.1 mrg return;
624 1.1 mrg
625 1.1 mrg if (mapnum == 0)
626 1.1 mrg return;
627 1.1 mrg
628 1.1 mrg gomp_mutex_lock (&devicep->lock);
629 1.1 mrg for (i = 0; i < mapnum; i++)
630 1.1 mrg if (sizes[i])
631 1.1 mrg {
632 1.1 mrg cur_node.host_start = (uintptr_t) hostaddrs[i];
633 1.1 mrg cur_node.host_end = cur_node.host_start + sizes[i];
634 1.1 mrg splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
635 1.1 mrg if (n)
636 1.1 mrg {
637 1.1 mrg int kind = get_kind (is_openacc, kinds, i);
638 1.1 mrg if (n->host_start > cur_node.host_start
639 1.1 mrg || n->host_end < cur_node.host_end)
640 1.1 mrg {
641 1.1 mrg gomp_mutex_unlock (&devicep->lock);
642 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object when "
643 1.1 mrg "only [%p..%p) is mapped",
644 1.1 mrg (void *) cur_node.host_start,
645 1.1 mrg (void *) cur_node.host_end,
646 1.1 mrg (void *) n->host_start,
647 1.1 mrg (void *) n->host_end);
648 1.1 mrg }
649 1.1 mrg if (GOMP_MAP_COPY_TO_P (kind & typemask))
650 1.1 mrg devicep->host2dev_func (devicep->target_id,
651 1.1 mrg (void *) (n->tgt->tgt_start
652 1.1 mrg + n->tgt_offset
653 1.1 mrg + cur_node.host_start
654 1.1 mrg - n->host_start),
655 1.1 mrg (void *) cur_node.host_start,
656 1.1 mrg cur_node.host_end - cur_node.host_start);
657 1.1 mrg if (GOMP_MAP_COPY_FROM_P (kind & typemask))
658 1.1 mrg devicep->dev2host_func (devicep->target_id,
659 1.1 mrg (void *) cur_node.host_start,
660 1.1 mrg (void *) (n->tgt->tgt_start
661 1.1 mrg + n->tgt_offset
662 1.1 mrg + cur_node.host_start
663 1.1 mrg - n->host_start),
664 1.1 mrg cur_node.host_end - cur_node.host_start);
665 1.1 mrg }
666 1.1 mrg else
667 1.1 mrg {
668 1.1 mrg gomp_mutex_unlock (&devicep->lock);
669 1.1 mrg gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
670 1.1 mrg (void *) cur_node.host_start,
671 1.1 mrg (void *) cur_node.host_end);
672 1.1 mrg }
673 1.1 mrg }
674 1.1 mrg gomp_mutex_unlock (&devicep->lock);
675 1.1 mrg }
676 1.1 mrg
677 1.1 mrg /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
678 1.1 mrg And insert to splay tree the mapping between addresses from HOST_TABLE and
679 1.1 mrg from loaded target image. */
680 1.1 mrg
681 1.1 mrg static void
682 1.1 mrg gomp_offload_image_to_device (struct gomp_device_descr *devicep,
683 1.1 mrg void *host_table, void *target_data,
684 1.1 mrg bool is_register_lock)
685 1.1 mrg {
686 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
687 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
688 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
689 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
690 1.1 mrg
691 1.1 mrg /* The func table contains only addresses, the var table contains addresses
692 1.1 mrg and corresponding sizes. */
693 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
694 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
695 1.1 mrg
696 1.1 mrg /* Load image to device and get target addresses for the image. */
697 1.1 mrg struct addr_pair *target_table = NULL;
698 1.1 mrg int i, num_target_entries
699 1.1 mrg = devicep->load_image_func (devicep->target_id, target_data, &target_table);
700 1.1 mrg
701 1.1 mrg if (num_target_entries != num_funcs + num_vars)
702 1.1 mrg {
703 1.1 mrg gomp_mutex_unlock (&devicep->lock);
704 1.1 mrg if (is_register_lock)
705 1.1 mrg gomp_mutex_unlock (®ister_lock);
706 1.1 mrg gomp_fatal ("Can't map target functions or variables");
707 1.1 mrg }
708 1.1 mrg
709 1.1 mrg /* Insert host-target address mapping into splay tree. */
710 1.1 mrg struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
711 1.1 mrg tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
712 1.1 mrg tgt->refcount = 1;
713 1.1 mrg tgt->tgt_start = 0;
714 1.1 mrg tgt->tgt_end = 0;
715 1.1 mrg tgt->to_free = NULL;
716 1.1 mrg tgt->prev = NULL;
717 1.1 mrg tgt->list_count = 0;
718 1.1 mrg tgt->device_descr = devicep;
719 1.1 mrg splay_tree_node array = tgt->array;
720 1.1 mrg
721 1.1 mrg for (i = 0; i < num_funcs; i++)
722 1.1 mrg {
723 1.1 mrg splay_tree_key k = &array->key;
724 1.1 mrg k->host_start = (uintptr_t) host_func_table[i];
725 1.1 mrg k->host_end = k->host_start + 1;
726 1.1 mrg k->tgt = tgt;
727 1.1 mrg k->tgt_offset = target_table[i].start;
728 1.1 mrg k->refcount = 1;
729 1.1 mrg k->async_refcount = 0;
730 1.1 mrg k->copy_from = false;
731 1.1 mrg array->left = NULL;
732 1.1 mrg array->right = NULL;
733 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
734 1.1 mrg array++;
735 1.1 mrg }
736 1.1 mrg
737 1.1 mrg for (i = 0; i < num_vars; i++)
738 1.1 mrg {
739 1.1 mrg struct addr_pair *target_var = &target_table[num_funcs + i];
740 1.1 mrg if (target_var->end - target_var->start
741 1.1 mrg != (uintptr_t) host_var_table[i * 2 + 1])
742 1.1 mrg {
743 1.1 mrg gomp_mutex_unlock (&devicep->lock);
744 1.1 mrg if (is_register_lock)
745 1.1 mrg gomp_mutex_unlock (®ister_lock);
746 1.1 mrg gomp_fatal ("Can't map target variables (size mismatch)");
747 1.1 mrg }
748 1.1 mrg
749 1.1 mrg splay_tree_key k = &array->key;
750 1.1 mrg k->host_start = (uintptr_t) host_var_table[i * 2];
751 1.1 mrg k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
752 1.1 mrg k->tgt = tgt;
753 1.1 mrg k->tgt_offset = target_var->start;
754 1.1 mrg k->refcount = 1;
755 1.1 mrg k->async_refcount = 0;
756 1.1 mrg k->copy_from = false;
757 1.1 mrg array->left = NULL;
758 1.1 mrg array->right = NULL;
759 1.1 mrg splay_tree_insert (&devicep->mem_map, array);
760 1.1 mrg array++;
761 1.1 mrg }
762 1.1 mrg
763 1.1 mrg free (target_table);
764 1.1 mrg }
765 1.1 mrg
766 1.1 mrg /* This function should be called from every offload image while loading.
767 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
768 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
769 1.1 mrg
770 1.1 mrg void
771 1.1 mrg GOMP_offload_register (void *host_table, enum offload_target_type target_type,
772 1.1 mrg void *target_data)
773 1.1 mrg {
774 1.1 mrg int i;
775 1.1 mrg gomp_mutex_lock (®ister_lock);
776 1.1 mrg
777 1.1 mrg /* Load image to all initialized devices. */
778 1.1 mrg for (i = 0; i < num_devices; i++)
779 1.1 mrg {
780 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
781 1.1 mrg gomp_mutex_lock (&devicep->lock);
782 1.1 mrg if (devicep->type == target_type && devicep->is_initialized)
783 1.1 mrg gomp_offload_image_to_device (devicep, host_table, target_data, true);
784 1.1 mrg gomp_mutex_unlock (&devicep->lock);
785 1.1 mrg }
786 1.1 mrg
787 1.1 mrg /* Insert image to array of pending images. */
788 1.1 mrg offload_images
789 1.1 mrg = gomp_realloc_unlock (offload_images,
790 1.1 mrg (num_offload_images + 1)
791 1.1 mrg * sizeof (struct offload_image_descr));
792 1.1 mrg offload_images[num_offload_images].type = target_type;
793 1.1 mrg offload_images[num_offload_images].host_table = host_table;
794 1.1 mrg offload_images[num_offload_images].target_data = target_data;
795 1.1 mrg
796 1.1 mrg num_offload_images++;
797 1.1 mrg gomp_mutex_unlock (®ister_lock);
798 1.1 mrg }
799 1.1 mrg
800 1.1 mrg /* This function should be called from every offload image while unloading.
801 1.1 mrg It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
802 1.1 mrg the target, and TARGET_DATA needed by target plugin. */
803 1.1 mrg
804 1.1 mrg void
805 1.1 mrg GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
806 1.1 mrg void *target_data)
807 1.1 mrg {
808 1.1 mrg void **host_func_table = ((void ***) host_table)[0];
809 1.1 mrg void **host_funcs_end = ((void ***) host_table)[1];
810 1.1 mrg void **host_var_table = ((void ***) host_table)[2];
811 1.1 mrg void **host_vars_end = ((void ***) host_table)[3];
812 1.1 mrg int i;
813 1.1 mrg
814 1.1 mrg /* The func table contains only addresses, the var table contains addresses
815 1.1 mrg and corresponding sizes. */
816 1.1 mrg int num_funcs = host_funcs_end - host_func_table;
817 1.1 mrg int num_vars = (host_vars_end - host_var_table) / 2;
818 1.1 mrg
819 1.1 mrg gomp_mutex_lock (®ister_lock);
820 1.1 mrg
821 1.1 mrg /* Unload image from all initialized devices. */
822 1.1 mrg for (i = 0; i < num_devices; i++)
823 1.1 mrg {
824 1.1 mrg int j;
825 1.1 mrg struct gomp_device_descr *devicep = &devices[i];
826 1.1 mrg gomp_mutex_lock (&devicep->lock);
827 1.1 mrg if (devicep->type != target_type || !devicep->is_initialized)
828 1.1 mrg {
829 1.1 mrg gomp_mutex_unlock (&devicep->lock);
830 1.1 mrg continue;
831 1.1 mrg }
832 1.1 mrg
833 1.1 mrg devicep->unload_image_func (devicep->target_id, target_data);
834 1.1 mrg
835 1.1 mrg /* Remove mapping from splay tree. */
836 1.1 mrg struct splay_tree_key_s k;
837 1.1 mrg splay_tree_key node = NULL;
838 1.1 mrg if (num_funcs > 0)
839 1.1 mrg {
840 1.1 mrg k.host_start = (uintptr_t) host_func_table[0];
841 1.1 mrg k.host_end = k.host_start + 1;
842 1.1 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
843 1.1 mrg }
844 1.1 mrg else if (num_vars > 0)
845 1.1 mrg {
846 1.1 mrg k.host_start = (uintptr_t) host_var_table[0];
847 1.1 mrg k.host_end = k.host_start + (uintptr_t) host_var_table[1];
848 1.1 mrg node = splay_tree_lookup (&devicep->mem_map, &k);
849 1.1 mrg }
850 1.1 mrg
851 1.1 mrg for (j = 0; j < num_funcs; j++)
852 1.1 mrg {
853 1.1 mrg k.host_start = (uintptr_t) host_func_table[j];
854 1.1 mrg k.host_end = k.host_start + 1;
855 1.1 mrg splay_tree_remove (&devicep->mem_map, &k);
856 1.1 mrg }
857 1.1 mrg
858 1.1 mrg for (j = 0; j < num_vars; j++)
859 1.1 mrg {
860 1.1 mrg k.host_start = (uintptr_t) host_var_table[j * 2];
861 1.1 mrg k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
862 1.1 mrg splay_tree_remove (&devicep->mem_map, &k);
863 1.1 mrg }
864 1.1 mrg
865 1.1 mrg if (node)
866 1.1 mrg {
867 1.1 mrg free (node->tgt);
868 1.1 mrg free (node);
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg gomp_mutex_unlock (&devicep->lock);
872 1.1 mrg }
873 1.1 mrg
874 1.1 mrg /* Remove image from array of pending images. */
875 1.1 mrg for (i = 0; i < num_offload_images; i++)
876 1.1 mrg if (offload_images[i].target_data == target_data)
877 1.1 mrg {
878 1.1 mrg offload_images[i] = offload_images[--num_offload_images];
879 1.1 mrg break;
880 1.1 mrg }
881 1.1 mrg
882 1.1 mrg gomp_mutex_unlock (®ister_lock);
883 1.1 mrg }
884 1.1 mrg
885 1.1 mrg /* This function initializes the target device, specified by DEVICEP. DEVICEP
886 1.1 mrg must be locked on entry, and remains locked on return. */
887 1.1 mrg
888 1.1 mrg attribute_hidden void
889 1.1 mrg gomp_init_device (struct gomp_device_descr *devicep)
890 1.1 mrg {
891 1.1 mrg int i;
892 1.1 mrg devicep->init_device_func (devicep->target_id);
893 1.1 mrg
894 1.1 mrg /* Load to device all images registered by the moment. */
895 1.1 mrg for (i = 0; i < num_offload_images; i++)
896 1.1 mrg {
897 1.1 mrg struct offload_image_descr *image = &offload_images[i];
898 1.1 mrg if (image->type == devicep->type)
899 1.1 mrg gomp_offload_image_to_device (devicep, image->host_table,
900 1.1 mrg image->target_data, false);
901 1.1 mrg }
902 1.1 mrg
903 1.1 mrg devicep->is_initialized = true;
904 1.1 mrg }
905 1.1 mrg
906 1.1 mrg /* Free address mapping tables. MM must be locked on entry, and remains locked
907 1.1 mrg on return. */
908 1.1 mrg
909 1.1 mrg attribute_hidden void
910 1.1 mrg gomp_free_memmap (struct splay_tree_s *mem_map)
911 1.1 mrg {
912 1.1 mrg while (mem_map->root)
913 1.1 mrg {
914 1.1 mrg struct target_mem_desc *tgt = mem_map->root->key.tgt;
915 1.1 mrg
916 1.1 mrg splay_tree_remove (mem_map, &mem_map->root->key);
917 1.1 mrg free (tgt->array);
918 1.1 mrg free (tgt);
919 1.1 mrg }
920 1.1 mrg }
921 1.1 mrg
922 1.1 mrg /* This function de-initializes the target device, specified by DEVICEP.
923 1.1 mrg DEVICEP must be locked on entry, and remains locked on return. */
924 1.1 mrg
925 1.1 mrg attribute_hidden void
926 1.1 mrg gomp_fini_device (struct gomp_device_descr *devicep)
927 1.1 mrg {
928 1.1 mrg if (devicep->is_initialized)
929 1.1 mrg devicep->fini_device_func (devicep->target_id);
930 1.1 mrg
931 1.1 mrg devicep->is_initialized = false;
932 1.1 mrg }
933 1.1 mrg
934 1.1 mrg /* Called when encountering a target directive. If DEVICE
935 1.1 mrg is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
936 1.1 mrg GOMP_DEVICE_HOST_FALLBACK (or any value
937 1.1 mrg larger than last available hw device), use host fallback.
938 1.1 mrg FN is address of host code, UNUSED is part of the current ABI, but
939 1.1 mrg we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
940 1.1 mrg with MAPNUM entries, with addresses of the host objects,
941 1.1 mrg sizes of the host objects (resp. for pointer kind pointer bias
942 1.1 mrg and assumed sizeof (void *) size) and kinds. */
943 1.1 mrg
944 1.1 mrg void
945 1.1 mrg GOMP_target (int device, void (*fn) (void *), const void *unused,
946 1.1 mrg size_t mapnum, void **hostaddrs, size_t *sizes,
947 1.1 mrg unsigned char *kinds)
948 1.1 mrg {
949 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
950 1.1 mrg
951 1.1 mrg if (devicep == NULL
952 1.1 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
953 1.1 mrg {
954 1.1 mrg /* Host fallback. */
955 1.1 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
956 1.1 mrg old_thr = *thr;
957 1.1 mrg memset (thr, '\0', sizeof (*thr));
958 1.1 mrg if (gomp_places_list)
959 1.1 mrg {
960 1.1 mrg thr->place = old_thr.place;
961 1.1 mrg thr->ts.place_partition_len = gomp_places_list_len;
962 1.1 mrg }
963 1.1 mrg fn (hostaddrs);
964 1.1 mrg gomp_free_thread (thr);
965 1.1 mrg *thr = old_thr;
966 1.1 mrg return;
967 1.1 mrg }
968 1.1 mrg
969 1.1 mrg gomp_mutex_lock (&devicep->lock);
970 1.1 mrg if (!devicep->is_initialized)
971 1.1 mrg gomp_init_device (devicep);
972 1.1 mrg gomp_mutex_unlock (&devicep->lock);
973 1.1 mrg
974 1.1 mrg void *fn_addr;
975 1.1 mrg
976 1.1 mrg if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
977 1.1 mrg fn_addr = (void *) fn;
978 1.1 mrg else
979 1.1 mrg {
980 1.1 mrg gomp_mutex_lock (&devicep->lock);
981 1.1 mrg struct splay_tree_key_s k;
982 1.1 mrg k.host_start = (uintptr_t) fn;
983 1.1 mrg k.host_end = k.host_start + 1;
984 1.1 mrg splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
985 1.1 mrg if (tgt_fn == NULL)
986 1.1 mrg {
987 1.1 mrg gomp_mutex_unlock (&devicep->lock);
988 1.1 mrg gomp_fatal ("Target function wasn't mapped");
989 1.1 mrg }
990 1.1 mrg gomp_mutex_unlock (&devicep->lock);
991 1.1 mrg
992 1.1 mrg fn_addr = (void *) tgt_fn->tgt_offset;
993 1.1 mrg }
994 1.1 mrg
995 1.1 mrg struct target_mem_desc *tgt_vars
996 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
997 1.1 mrg true);
998 1.1 mrg struct gomp_thread old_thr, *thr = gomp_thread ();
999 1.1 mrg old_thr = *thr;
1000 1.1 mrg memset (thr, '\0', sizeof (*thr));
1001 1.1 mrg if (gomp_places_list)
1002 1.1 mrg {
1003 1.1 mrg thr->place = old_thr.place;
1004 1.1 mrg thr->ts.place_partition_len = gomp_places_list_len;
1005 1.1 mrg }
1006 1.1 mrg devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1007 1.1 mrg gomp_free_thread (thr);
1008 1.1 mrg *thr = old_thr;
1009 1.1 mrg gomp_unmap_vars (tgt_vars, true);
1010 1.1 mrg }
1011 1.1 mrg
1012 1.1 mrg void
1013 1.1 mrg GOMP_target_data (int device, const void *unused, size_t mapnum,
1014 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1015 1.1 mrg {
1016 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1017 1.1 mrg
1018 1.1 mrg if (devicep == NULL
1019 1.1 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1020 1.1 mrg {
1021 1.1 mrg /* Host fallback. */
1022 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
1023 1.1 mrg if (icv->target_data)
1024 1.1 mrg {
1025 1.1 mrg /* Even when doing a host fallback, if there are any active
1026 1.1 mrg #pragma omp target data constructs, need to remember the
1027 1.1 mrg new #pragma omp target data, otherwise GOMP_target_end_data
1028 1.1 mrg would get out of sync. */
1029 1.1 mrg struct target_mem_desc *tgt
1030 1.1 mrg = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1031 1.1 mrg tgt->prev = icv->target_data;
1032 1.1 mrg icv->target_data = tgt;
1033 1.1 mrg }
1034 1.1 mrg return;
1035 1.1 mrg }
1036 1.1 mrg
1037 1.1 mrg gomp_mutex_lock (&devicep->lock);
1038 1.1 mrg if (!devicep->is_initialized)
1039 1.1 mrg gomp_init_device (devicep);
1040 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1041 1.1 mrg
1042 1.1 mrg struct target_mem_desc *tgt
1043 1.1 mrg = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1044 1.1 mrg false);
1045 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
1046 1.1 mrg tgt->prev = icv->target_data;
1047 1.1 mrg icv->target_data = tgt;
1048 1.1 mrg }
1049 1.1 mrg
1050 1.1 mrg void
1051 1.1 mrg GOMP_target_end_data (void)
1052 1.1 mrg {
1053 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false);
1054 1.1 mrg if (icv->target_data)
1055 1.1 mrg {
1056 1.1 mrg struct target_mem_desc *tgt = icv->target_data;
1057 1.1 mrg icv->target_data = tgt->prev;
1058 1.1 mrg gomp_unmap_vars (tgt, true);
1059 1.1 mrg }
1060 1.1 mrg }
1061 1.1 mrg
1062 1.1 mrg void
1063 1.1 mrg GOMP_target_update (int device, const void *unused, size_t mapnum,
1064 1.1 mrg void **hostaddrs, size_t *sizes, unsigned char *kinds)
1065 1.1 mrg {
1066 1.1 mrg struct gomp_device_descr *devicep = resolve_device (device);
1067 1.1 mrg
1068 1.1 mrg if (devicep == NULL
1069 1.1 mrg || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1070 1.1 mrg return;
1071 1.1 mrg
1072 1.1 mrg gomp_mutex_lock (&devicep->lock);
1073 1.1 mrg if (!devicep->is_initialized)
1074 1.1 mrg gomp_init_device (devicep);
1075 1.1 mrg gomp_mutex_unlock (&devicep->lock);
1076 1.1 mrg
1077 1.1 mrg gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1078 1.1 mrg }
1079 1.1 mrg
1080 1.1 mrg void
1081 1.1 mrg GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1082 1.1 mrg {
1083 1.1 mrg if (thread_limit)
1084 1.1 mrg {
1085 1.1 mrg struct gomp_task_icv *icv = gomp_icv (true);
1086 1.1 mrg icv->thread_limit_var
1087 1.1 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1088 1.1 mrg }
1089 1.1 mrg (void) num_teams;
1090 1.1 mrg }
1091 1.1 mrg
1092 1.1 mrg #ifdef PLUGIN_SUPPORT
1093 1.1 mrg
1094 1.1 mrg /* This function tries to load a plugin for DEVICE. Name of plugin is passed
1095 1.1 mrg in PLUGIN_NAME.
1096 1.1 mrg The handles of the found functions are stored in the corresponding fields
1097 1.1 mrg of DEVICE. The function returns TRUE on success and FALSE otherwise. */
1098 1.1 mrg
1099 1.1 mrg static bool
1100 1.1 mrg gomp_load_plugin_for_device (struct gomp_device_descr *device,
1101 1.1 mrg const char *plugin_name)
1102 1.1 mrg {
1103 1.1 mrg const char *err = NULL, *last_missing = NULL;
1104 1.1 mrg int optional_present, optional_total;
1105 1.1 mrg
1106 1.1 mrg /* Clear any existing error. */
1107 1.1 mrg dlerror ();
1108 1.1 mrg
1109 1.1 mrg void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1110 1.1 mrg if (!plugin_handle)
1111 1.1 mrg {
1112 1.1 mrg err = dlerror ();
1113 1.1 mrg goto out;
1114 1.1 mrg }
1115 1.1 mrg
1116 1.1 mrg /* Check if all required functions are available in the plugin and store
1117 1.1 mrg their handlers. */
1118 1.1 mrg #define DLSYM(f) \
1119 1.1 mrg do \
1120 1.1 mrg { \
1121 1.1 mrg device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
1122 1.1 mrg err = dlerror (); \
1123 1.1 mrg if (err != NULL) \
1124 1.1 mrg goto out; \
1125 1.1 mrg } \
1126 1.1 mrg while (0)
1127 1.1 mrg /* Similar, but missing functions are not an error. */
1128 1.1 mrg #define DLSYM_OPT(f, n) \
1129 1.1 mrg do \
1130 1.1 mrg { \
1131 1.1 mrg const char *tmp_err; \
1132 1.1 mrg device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
1133 1.1 mrg tmp_err = dlerror (); \
1134 1.1 mrg if (tmp_err == NULL) \
1135 1.1 mrg optional_present++; \
1136 1.1 mrg else \
1137 1.1 mrg last_missing = #n; \
1138 1.1 mrg optional_total++; \
1139 1.1 mrg } \
1140 1.1 mrg while (0)
1141 1.1 mrg
1142 1.1 mrg DLSYM (get_name);
1143 1.1 mrg DLSYM (get_caps);
1144 1.1 mrg DLSYM (get_type);
1145 1.1 mrg DLSYM (get_num_devices);
1146 1.1 mrg DLSYM (init_device);
1147 1.1 mrg DLSYM (fini_device);
1148 1.1 mrg DLSYM (load_image);
1149 1.1 mrg DLSYM (unload_image);
1150 1.1 mrg DLSYM (alloc);
1151 1.1 mrg DLSYM (free);
1152 1.1 mrg DLSYM (dev2host);
1153 1.1 mrg DLSYM (host2dev);
1154 1.1 mrg device->capabilities = device->get_caps_func ();
1155 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1156 1.1 mrg DLSYM (run);
1157 1.1 mrg if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1158 1.1 mrg {
1159 1.1 mrg optional_present = optional_total = 0;
1160 1.1 mrg DLSYM_OPT (openacc.exec, openacc_parallel);
1161 1.1 mrg DLSYM_OPT (openacc.register_async_cleanup,
1162 1.1 mrg openacc_register_async_cleanup);
1163 1.1 mrg DLSYM_OPT (openacc.async_test, openacc_async_test);
1164 1.1 mrg DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1165 1.1 mrg DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1166 1.1 mrg DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1167 1.1 mrg DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1168 1.1 mrg DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1169 1.1 mrg DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1170 1.1 mrg DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1171 1.1 mrg DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1172 1.1 mrg /* Require all the OpenACC handlers if we have
1173 1.1 mrg GOMP_OFFLOAD_CAP_OPENACC_200. */
1174 1.1 mrg if (optional_present != optional_total)
1175 1.1 mrg {
1176 1.1 mrg err = "plugin missing OpenACC handler function";
1177 1.1 mrg goto out;
1178 1.1 mrg }
1179 1.1 mrg optional_present = optional_total = 0;
1180 1.1 mrg DLSYM_OPT (openacc.cuda.get_current_device,
1181 1.1 mrg openacc_get_current_cuda_device);
1182 1.1 mrg DLSYM_OPT (openacc.cuda.get_current_context,
1183 1.1 mrg openacc_get_current_cuda_context);
1184 1.1 mrg DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1185 1.1 mrg DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1186 1.1 mrg /* Make sure all the CUDA functions are there if any of them are. */
1187 1.1 mrg if (optional_present && optional_present != optional_total)
1188 1.1 mrg {
1189 1.1 mrg err = "plugin missing OpenACC CUDA handler function";
1190 1.1 mrg goto out;
1191 1.1 mrg }
1192 1.1 mrg }
1193 1.1 mrg #undef DLSYM
1194 1.1 mrg #undef DLSYM_OPT
1195 1.1 mrg
1196 1.1 mrg out:
1197 1.1 mrg if (err != NULL)
1198 1.1 mrg {
1199 1.1 mrg gomp_error ("while loading %s: %s", plugin_name, err);
1200 1.1 mrg if (last_missing)
1201 1.1 mrg gomp_error ("missing function was %s", last_missing);
1202 1.1 mrg if (plugin_handle)
1203 1.1 mrg dlclose (plugin_handle);
1204 1.1 mrg }
1205 1.1 mrg return err == NULL;
1206 1.1 mrg }
1207 1.1 mrg
1208 1.1 mrg /* This function initializes the runtime needed for offloading.
1209 1.1 mrg It parses the list of offload targets and tries to load the plugins for
1210 1.1 mrg these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1211 1.1 mrg will be set, and the array DEVICES initialized, containing descriptors for
1212 1.1 mrg corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1213 1.1 mrg by the others. */
1214 1.1 mrg
1215 1.1 mrg static void
1216 1.1 mrg gomp_target_init (void)
1217 1.1 mrg {
1218 1.1 mrg const char *prefix ="libgomp-plugin-";
1219 1.1 mrg const char *suffix = SONAME_SUFFIX (1);
1220 1.1 mrg const char *cur, *next;
1221 1.1 mrg char *plugin_name;
1222 1.1 mrg int i, new_num_devices;
1223 1.1 mrg
1224 1.1 mrg num_devices = 0;
1225 1.1 mrg devices = NULL;
1226 1.1 mrg
1227 1.1 mrg cur = OFFLOAD_TARGETS;
1228 1.1 mrg if (*cur)
1229 1.1 mrg do
1230 1.1 mrg {
1231 1.1 mrg struct gomp_device_descr current_device;
1232 1.1 mrg
1233 1.1 mrg next = strchr (cur, ',');
1234 1.1 mrg
1235 1.1 mrg plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1236 1.1 mrg + strlen (prefix) + strlen (suffix));
1237 1.1 mrg if (!plugin_name)
1238 1.1 mrg {
1239 1.1 mrg num_devices = 0;
1240 1.1 mrg break;
1241 1.1 mrg }
1242 1.1 mrg
1243 1.1 mrg strcpy (plugin_name, prefix);
1244 1.1 mrg strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1245 1.1 mrg strcat (plugin_name, suffix);
1246 1.1 mrg
1247 1.1 mrg if (gomp_load_plugin_for_device (¤t_device, plugin_name))
1248 1.1 mrg {
1249 1.1 mrg new_num_devices = current_device.get_num_devices_func ();
1250 1.1 mrg if (new_num_devices >= 1)
1251 1.1 mrg {
1252 1.1 mrg /* Augment DEVICES and NUM_DEVICES. */
1253 1.1 mrg
1254 1.1 mrg devices = realloc (devices, (num_devices + new_num_devices)
1255 1.1 mrg * sizeof (struct gomp_device_descr));
1256 1.1 mrg if (!devices)
1257 1.1 mrg {
1258 1.1 mrg num_devices = 0;
1259 1.1 mrg free (plugin_name);
1260 1.1 mrg break;
1261 1.1 mrg }
1262 1.1 mrg
1263 1.1 mrg current_device.name = current_device.get_name_func ();
1264 1.1 mrg /* current_device.capabilities has already been set. */
1265 1.1 mrg current_device.type = current_device.get_type_func ();
1266 1.1 mrg current_device.mem_map.root = NULL;
1267 1.1 mrg current_device.is_initialized = false;
1268 1.1 mrg current_device.openacc.data_environ = NULL;
1269 1.1 mrg for (i = 0; i < new_num_devices; i++)
1270 1.1 mrg {
1271 1.1 mrg current_device.target_id = i;
1272 1.1 mrg devices[num_devices] = current_device;
1273 1.1 mrg gomp_mutex_init (&devices[num_devices].lock);
1274 1.1 mrg num_devices++;
1275 1.1 mrg }
1276 1.1 mrg }
1277 1.1 mrg }
1278 1.1 mrg
1279 1.1 mrg free (plugin_name);
1280 1.1 mrg cur = next + 1;
1281 1.1 mrg }
1282 1.1 mrg while (next);
1283 1.1 mrg
1284 1.1 mrg /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1285 1.1 mrg NUM_DEVICES_OPENMP. */
1286 1.1 mrg struct gomp_device_descr *devices_s
1287 1.1 mrg = malloc (num_devices * sizeof (struct gomp_device_descr));
1288 1.1 mrg if (!devices_s)
1289 1.1 mrg {
1290 1.1 mrg num_devices = 0;
1291 1.1 mrg free (devices);
1292 1.1 mrg devices = NULL;
1293 1.1 mrg }
1294 1.1 mrg num_devices_openmp = 0;
1295 1.1 mrg for (i = 0; i < num_devices; i++)
1296 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1297 1.1 mrg devices_s[num_devices_openmp++] = devices[i];
1298 1.1 mrg int num_devices_after_openmp = num_devices_openmp;
1299 1.1 mrg for (i = 0; i < num_devices; i++)
1300 1.1 mrg if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1301 1.1 mrg devices_s[num_devices_after_openmp++] = devices[i];
1302 1.1 mrg free (devices);
1303 1.1 mrg devices = devices_s;
1304 1.1 mrg
1305 1.1 mrg for (i = 0; i < num_devices; i++)
1306 1.1 mrg {
1307 1.1 mrg /* The 'devices' array can be moved (by the realloc call) until we have
1308 1.1 mrg found all the plugins, so registering with the OpenACC runtime (which
1309 1.1 mrg takes a copy of the pointer argument) must be delayed until now. */
1310 1.1 mrg if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1311 1.1 mrg goacc_register (&devices[i]);
1312 1.1 mrg }
1313 1.1 mrg }
1314 1.1 mrg
1315 1.1 mrg #else /* PLUGIN_SUPPORT */
1316 1.1 mrg /* If dlfcn.h is unavailable we always fallback to host execution.
1317 1.1 mrg GOMP_target* routines are just stubs for this case. */
1318 1.1 mrg static void
1319 1.1 mrg gomp_target_init (void)
1320 1.1 mrg {
1321 1.1 mrg }
1322 1.1 mrg #endif /* PLUGIN_SUPPORT */
1323