1 1.7 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg Contributed by Mentor Embedded. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 6 1.1 mrg (libgomp). 7 1.1 mrg 8 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 9 1.1 mrg under the terms of the GNU General Public License as published by 10 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 11 1.1 mrg any later version. 12 1.1 mrg 13 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 1.1 mrg more details. 17 1.1 mrg 18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 19 1.1 mrg permissions described in the GCC Runtime Library Exception, version 20 1.1 mrg 3.1, as published by the Free Software Foundation. 21 1.1 mrg 22 1.1 mrg You should have received a copy of the GNU General Public License and 23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 1.1 mrg <http://www.gnu.org/licenses/>. */ 26 1.1 mrg 27 1.1 mrg /* This file handles OpenACC constructs. */ 28 1.1 mrg 29 1.1 mrg #include "openacc.h" 30 1.1 mrg #include "libgomp.h" 31 1.1 mrg #include "gomp-constants.h" 32 1.1 mrg #include "oacc-int.h" 33 1.1 mrg #ifdef HAVE_INTTYPES_H 34 1.1 mrg # include <inttypes.h> /* For PRIu64. */ 35 1.1 mrg #endif 36 1.1 mrg #include <string.h> 37 1.1 mrg #include <stdarg.h> 38 1.1 mrg #include <assert.h> 39 1.1 mrg 40 1.5 mrg 41 1.5 mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we 42 1.5 mrg continue to support the following two legacy values. */ 43 1.5 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, 44 1.5 mrg "legacy GOMP_DEVICE_ICV broken"); 45 1.5 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) 46 1.5 mrg == GOACC_FLAG_HOST_FALLBACK, 47 1.5 mrg "legacy GOMP_DEVICE_HOST_FALLBACK broken"); 48 1.5 mrg 49 1.5 mrg 50 1.5 mrg /* Handle the mapping pair that are presented when a 51 1.5 mrg deviceptr clause is used with Fortran. */ 52 1.5 mrg 53 1.5 mrg static void 54 1.5 mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, 55 1.5 mrg unsigned short *kinds) 56 1.5 mrg { 57 1.5 mrg int i; 58 1.5 mrg 59 1.5 mrg for (i = 0; i < mapnum; i++) 60 1.5 mrg { 61 1.5 mrg unsigned short kind1 = kinds[i] & 0xff; 62 1.5 mrg 63 1.5 mrg /* Handle Fortran deviceptr clause. */ 64 1.5 mrg if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) 65 1.5 mrg { 66 1.5 mrg unsigned short kind2; 67 1.5 mrg 68 1.5 mrg if (i < (signed)mapnum - 1) 69 1.5 mrg kind2 = kinds[i + 1] & 0xff; 70 1.5 mrg else 71 1.5 mrg kind2 = 0xffff; 72 1.5 mrg 73 1.5 mrg if (sizes[i] == sizeof (void *)) 74 1.5 mrg continue; 75 1.5 mrg 76 1.5 mrg /* At this point, we're dealing with a Fortran deviceptr. 77 1.5 mrg If the next element is not what we're expecting, then 78 1.5 mrg this is an instance of where the deviceptr variable was 79 1.5 mrg not used within the region and the pointer was removed 80 1.5 mrg by the gimplifier. */ 81 1.5 mrg if (kind2 == GOMP_MAP_POINTER 82 1.5 mrg && sizes[i + 1] == 0 83 1.5 mrg && hostaddrs[i] == *(void **)hostaddrs[i + 1]) 84 1.5 mrg { 85 1.5 mrg kinds[i+1] = kinds[i]; 86 1.5 mrg sizes[i+1] = sizeof (void *); 87 1.5 mrg } 88 1.5 mrg 89 1.5 mrg /* Invalidate the entry. */ 90 1.5 mrg hostaddrs[i] = NULL; 91 1.5 mrg } 92 1.5 mrg } 93 1.1 mrg } 94 1.1 mrg 95 1.3 mrg 96 1.5 mrg /* Launch a possibly offloaded function with FLAGS. FN is the host fn 97 1.3 mrg address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory 98 1.3 mrg blocks to be copied to/from the device. Varadic arguments are 99 1.3 mrg keyed optional parameters terminated with a zero. */ 100 1.1 mrg 101 1.1 mrg void 102 1.5 mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *), 103 1.3 mrg size_t mapnum, void **hostaddrs, size_t *sizes, 104 1.3 mrg unsigned short *kinds, ...) 105 1.1 mrg { 106 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 107 1.5 mrg 108 1.1 mrg va_list ap; 109 1.1 mrg struct goacc_thread *thr; 110 1.1 mrg struct gomp_device_descr *acc_dev; 111 1.1 mrg struct target_mem_desc *tgt; 112 1.1 mrg void **devaddrs; 113 1.1 mrg unsigned int i; 114 1.1 mrg struct splay_tree_key_s k; 115 1.1 mrg splay_tree_key tgt_fn_key; 116 1.1 mrg void (*tgt_fn); 117 1.3 mrg int async = GOMP_ASYNC_SYNC; 118 1.3 mrg unsigned dims[GOMP_DIM_MAX]; 119 1.3 mrg unsigned tag; 120 1.1 mrg 121 1.1 mrg #ifdef HAVE_INTTYPES_H 122 1.3 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 123 1.3 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 124 1.1 mrg #else 125 1.3 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 126 1.3 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 127 1.1 mrg #endif 128 1.1 mrg goacc_lazy_initialize (); 129 1.1 mrg 130 1.1 mrg thr = goacc_thread (); 131 1.1 mrg acc_dev = thr->dev; 132 1.1 mrg 133 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 134 1.6 mrg 135 1.6 mrg acc_prof_info prof_info; 136 1.6 mrg if (profiling_p) 137 1.6 mrg { 138 1.6 mrg thr->prof_info = &prof_info; 139 1.6 mrg 140 1.6 mrg prof_info.event_type = acc_ev_compute_construct_start; 141 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 142 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 143 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type); 144 1.6 mrg prof_info.device_number = acc_dev->target_id; 145 1.6 mrg prof_info.thread_id = -1; 146 1.6 mrg prof_info.async = async; 147 1.6 mrg prof_info.async_queue = prof_info.async; 148 1.6 mrg prof_info.src_file = NULL; 149 1.6 mrg prof_info.func_name = NULL; 150 1.6 mrg prof_info.line_no = -1; 151 1.6 mrg prof_info.end_line_no = -1; 152 1.6 mrg prof_info.func_line_no = -1; 153 1.6 mrg prof_info.func_end_line_no = -1; 154 1.6 mrg } 155 1.6 mrg acc_event_info compute_construct_event_info; 156 1.6 mrg if (profiling_p) 157 1.6 mrg { 158 1.6 mrg compute_construct_event_info.other_event.event_type 159 1.6 mrg = prof_info.event_type; 160 1.6 mrg compute_construct_event_info.other_event.valid_bytes 161 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 162 1.6 mrg compute_construct_event_info.other_event.parent_construct 163 1.6 mrg = acc_construct_parallel; 164 1.6 mrg compute_construct_event_info.other_event.implicit = 0; 165 1.6 mrg compute_construct_event_info.other_event.tool_info = NULL; 166 1.6 mrg } 167 1.6 mrg acc_api_info api_info; 168 1.6 mrg if (profiling_p) 169 1.6 mrg { 170 1.6 mrg thr->api_info = &api_info; 171 1.6 mrg 172 1.6 mrg api_info.device_api = acc_device_api_none; 173 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 174 1.6 mrg api_info.device_type = prof_info.device_type; 175 1.6 mrg api_info.vendor = -1; 176 1.6 mrg api_info.device_handle = NULL; 177 1.6 mrg api_info.context_handle = NULL; 178 1.6 mrg api_info.async_handle = NULL; 179 1.6 mrg } 180 1.6 mrg 181 1.6 mrg if (profiling_p) 182 1.6 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, 183 1.6 mrg &api_info); 184 1.6 mrg 185 1.5 mrg handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); 186 1.5 mrg 187 1.1 mrg /* Host fallback if "if" clause is false or if the current device is set to 188 1.1 mrg the host. */ 189 1.5 mrg if (flags & GOACC_FLAG_HOST_FALLBACK) 190 1.1 mrg { 191 1.6 mrg prof_info.device_type = acc_device_host; 192 1.6 mrg api_info.device_type = prof_info.device_type; 193 1.1 mrg goacc_save_and_set_bind (acc_device_host); 194 1.1 mrg fn (hostaddrs); 195 1.1 mrg goacc_restore_bind (); 196 1.6 mrg goto out_prof; 197 1.1 mrg } 198 1.1 mrg else if (acc_device_type (acc_dev->type) == acc_device_host) 199 1.1 mrg { 200 1.1 mrg fn (hostaddrs); 201 1.6 mrg goto out_prof; 202 1.1 mrg } 203 1.1 mrg 204 1.3 mrg /* Default: let the runtime choose. */ 205 1.3 mrg for (i = 0; i != GOMP_DIM_MAX; i++) 206 1.3 mrg dims[i] = 0; 207 1.3 mrg 208 1.3 mrg va_start (ap, kinds); 209 1.3 mrg /* TODO: This will need amending when device_type is implemented. */ 210 1.3 mrg while ((tag = va_arg (ap, unsigned)) != 0) 211 1.3 mrg { 212 1.3 mrg if (GOMP_LAUNCH_DEVICE (tag)) 213 1.3 mrg gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", 214 1.3 mrg GOMP_LAUNCH_DEVICE (tag)); 215 1.3 mrg 216 1.3 mrg switch (GOMP_LAUNCH_CODE (tag)) 217 1.3 mrg { 218 1.3 mrg case GOMP_LAUNCH_DIM: 219 1.3 mrg { 220 1.3 mrg unsigned mask = GOMP_LAUNCH_OP (tag); 221 1.3 mrg 222 1.3 mrg for (i = 0; i != GOMP_DIM_MAX; i++) 223 1.3 mrg if (mask & GOMP_DIM_MASK (i)) 224 1.3 mrg dims[i] = va_arg (ap, unsigned); 225 1.3 mrg } 226 1.3 mrg break; 227 1.3 mrg 228 1.3 mrg case GOMP_LAUNCH_ASYNC: 229 1.3 mrg { 230 1.3 mrg /* Small constant values are encoded in the operand. */ 231 1.3 mrg async = GOMP_LAUNCH_OP (tag); 232 1.3 mrg 233 1.3 mrg if (async == GOMP_LAUNCH_OP_MAX) 234 1.3 mrg async = va_arg (ap, unsigned); 235 1.6 mrg 236 1.6 mrg if (profiling_p) 237 1.6 mrg { 238 1.6 mrg prof_info.async = async; 239 1.6 mrg prof_info.async_queue = prof_info.async; 240 1.6 mrg } 241 1.6 mrg 242 1.3 mrg break; 243 1.3 mrg } 244 1.3 mrg 245 1.3 mrg case GOMP_LAUNCH_WAIT: 246 1.3 mrg { 247 1.3 mrg unsigned num_waits = GOMP_LAUNCH_OP (tag); 248 1.5 mrg goacc_wait (async, num_waits, &ap); 249 1.3 mrg break; 250 1.3 mrg } 251 1.1 mrg 252 1.3 mrg default: 253 1.3 mrg gomp_fatal ("unrecognized offload code '%d'," 254 1.3 mrg " libgomp is too old", GOMP_LAUNCH_CODE (tag)); 255 1.3 mrg } 256 1.3 mrg } 257 1.1 mrg va_end (ap); 258 1.3 mrg 259 1.1 mrg if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) 260 1.1 mrg { 261 1.1 mrg k.host_start = (uintptr_t) fn; 262 1.1 mrg k.host_end = k.host_start + 1; 263 1.1 mrg gomp_mutex_lock (&acc_dev->lock); 264 1.1 mrg tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); 265 1.1 mrg gomp_mutex_unlock (&acc_dev->lock); 266 1.1 mrg 267 1.1 mrg if (tgt_fn_key == NULL) 268 1.1 mrg gomp_fatal ("target function wasn't mapped"); 269 1.1 mrg 270 1.1 mrg tgt_fn = (void (*)) tgt_fn_key->tgt_offset; 271 1.1 mrg } 272 1.1 mrg else 273 1.1 mrg tgt_fn = (void (*)) fn; 274 1.1 mrg 275 1.6 mrg acc_event_info enter_exit_data_event_info; 276 1.6 mrg if (profiling_p) 277 1.6 mrg { 278 1.6 mrg prof_info.event_type = acc_ev_enter_data_start; 279 1.6 mrg enter_exit_data_event_info.other_event.event_type 280 1.6 mrg = prof_info.event_type; 281 1.6 mrg enter_exit_data_event_info.other_event.valid_bytes 282 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 283 1.6 mrg enter_exit_data_event_info.other_event.parent_construct 284 1.6 mrg = compute_construct_event_info.other_event.parent_construct; 285 1.6 mrg enter_exit_data_event_info.other_event.implicit = 1; 286 1.6 mrg enter_exit_data_event_info.other_event.tool_info = NULL; 287 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 288 1.6 mrg &api_info); 289 1.6 mrg } 290 1.6 mrg 291 1.6 mrg goacc_aq aq = get_goacc_asyncqueue (async); 292 1.6 mrg 293 1.7 mrg tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, 294 1.7 mrg true, 0); 295 1.6 mrg if (profiling_p) 296 1.6 mrg { 297 1.6 mrg prof_info.event_type = acc_ev_enter_data_end; 298 1.6 mrg enter_exit_data_event_info.other_event.event_type 299 1.6 mrg = prof_info.event_type; 300 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 301 1.6 mrg &api_info); 302 1.6 mrg } 303 1.7 mrg 304 1.1 mrg devaddrs = gomp_alloca (sizeof (void *) * mapnum); 305 1.1 mrg for (i = 0; i < mapnum; i++) 306 1.6 mrg devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); 307 1.6 mrg 308 1.6 mrg if (aq == NULL) 309 1.6 mrg acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, 310 1.6 mrg tgt); 311 1.6 mrg else 312 1.6 mrg acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, 313 1.6 mrg dims, tgt, aq); 314 1.1 mrg 315 1.6 mrg if (profiling_p) 316 1.6 mrg { 317 1.6 mrg prof_info.event_type = acc_ev_exit_data_start; 318 1.6 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type; 319 1.6 mrg enter_exit_data_event_info.other_event.tool_info = NULL; 320 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 321 1.6 mrg &api_info); 322 1.6 mrg } 323 1.1 mrg 324 1.7 mrg /* If running synchronously (aq == NULL), this will unmap immediately. */ 325 1.7 mrg goacc_unmap_vars (tgt, true, aq); 326 1.6 mrg 327 1.6 mrg if (profiling_p) 328 1.5 mrg { 329 1.6 mrg prof_info.event_type = acc_ev_exit_data_end; 330 1.6 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type; 331 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 332 1.6 mrg &api_info); 333 1.5 mrg } 334 1.1 mrg 335 1.6 mrg out_prof: 336 1.6 mrg if (profiling_p) 337 1.6 mrg { 338 1.6 mrg prof_info.event_type = acc_ev_compute_construct_end; 339 1.6 mrg compute_construct_event_info.other_event.event_type 340 1.6 mrg = prof_info.event_type; 341 1.6 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, 342 1.6 mrg &api_info); 343 1.6 mrg 344 1.6 mrg thr->prof_info = NULL; 345 1.6 mrg thr->api_info = NULL; 346 1.6 mrg } 347 1.1 mrg } 348 1.1 mrg 349 1.6 mrg /* Legacy entry point (GCC 5). Only provide host fallback execution. */ 350 1.3 mrg 351 1.3 mrg void 352 1.5 mrg GOACC_parallel (int flags_m, void (*fn) (void *), 353 1.3 mrg size_t mapnum, void **hostaddrs, size_t *sizes, 354 1.3 mrg unsigned short *kinds, 355 1.3 mrg int num_gangs, int num_workers, int vector_length, 356 1.3 mrg int async, int num_waits, ...) 357 1.3 mrg { 358 1.3 mrg goacc_save_and_set_bind (acc_device_host); 359 1.3 mrg fn (hostaddrs); 360 1.3 mrg goacc_restore_bind (); 361 1.3 mrg } 362 1.3 mrg 363 1.1 mrg void 364 1.5 mrg GOACC_data_start (int flags_m, size_t mapnum, 365 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds) 366 1.1 mrg { 367 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 368 1.5 mrg 369 1.1 mrg struct target_mem_desc *tgt; 370 1.1 mrg 371 1.1 mrg #ifdef HAVE_INTTYPES_H 372 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 373 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 374 1.1 mrg #else 375 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 376 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 377 1.1 mrg #endif 378 1.1 mrg 379 1.1 mrg goacc_lazy_initialize (); 380 1.1 mrg 381 1.1 mrg struct goacc_thread *thr = goacc_thread (); 382 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev; 383 1.1 mrg 384 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 385 1.6 mrg 386 1.6 mrg acc_prof_info prof_info; 387 1.6 mrg if (profiling_p) 388 1.6 mrg { 389 1.6 mrg thr->prof_info = &prof_info; 390 1.6 mrg 391 1.6 mrg prof_info.event_type = acc_ev_enter_data_start; 392 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 393 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 394 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type); 395 1.6 mrg prof_info.device_number = acc_dev->target_id; 396 1.6 mrg prof_info.thread_id = -1; 397 1.6 mrg prof_info.async = acc_async_sync; /* Always synchronous. */ 398 1.6 mrg prof_info.async_queue = prof_info.async; 399 1.6 mrg prof_info.src_file = NULL; 400 1.6 mrg prof_info.func_name = NULL; 401 1.6 mrg prof_info.line_no = -1; 402 1.6 mrg prof_info.end_line_no = -1; 403 1.6 mrg prof_info.func_line_no = -1; 404 1.6 mrg prof_info.func_end_line_no = -1; 405 1.6 mrg } 406 1.6 mrg acc_event_info enter_data_event_info; 407 1.6 mrg if (profiling_p) 408 1.6 mrg { 409 1.6 mrg enter_data_event_info.other_event.event_type 410 1.6 mrg = prof_info.event_type; 411 1.6 mrg enter_data_event_info.other_event.valid_bytes 412 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 413 1.6 mrg enter_data_event_info.other_event.parent_construct = acc_construct_data; 414 1.6 mrg for (int i = 0; i < mapnum; ++i) 415 1.6 mrg if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR 416 1.6 mrg || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 417 1.6 mrg { 418 1.6 mrg /* If there is one such data mapping kind, then this is actually an 419 1.6 mrg OpenACC 'host_data' construct. (GCC maps the OpenACC 420 1.6 mrg 'host_data' construct to the OpenACC 'data' construct.) Apart 421 1.6 mrg from artificial test cases (such as an OpenACC 'host_data' 422 1.6 mrg construct's (implicit) device initialization when there hasn't 423 1.6 mrg been any device data be set up before...), there can't really 424 1.6 mrg any meaningful events be generated from OpenACC 'host_data' 425 1.6 mrg constructs, though. */ 426 1.6 mrg enter_data_event_info.other_event.parent_construct 427 1.6 mrg = acc_construct_host_data; 428 1.6 mrg break; 429 1.6 mrg } 430 1.6 mrg enter_data_event_info.other_event.implicit = 0; 431 1.6 mrg enter_data_event_info.other_event.tool_info = NULL; 432 1.6 mrg } 433 1.6 mrg acc_api_info api_info; 434 1.6 mrg if (profiling_p) 435 1.6 mrg { 436 1.6 mrg thr->api_info = &api_info; 437 1.6 mrg 438 1.6 mrg api_info.device_api = acc_device_api_none; 439 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 440 1.6 mrg api_info.device_type = prof_info.device_type; 441 1.6 mrg api_info.vendor = -1; 442 1.6 mrg api_info.device_handle = NULL; 443 1.6 mrg api_info.context_handle = NULL; 444 1.6 mrg api_info.async_handle = NULL; 445 1.6 mrg } 446 1.6 mrg 447 1.6 mrg if (profiling_p) 448 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); 449 1.6 mrg 450 1.1 mrg /* Host fallback or 'do nothing'. */ 451 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 452 1.5 mrg || (flags & GOACC_FLAG_HOST_FALLBACK)) 453 1.1 mrg { 454 1.6 mrg prof_info.device_type = acc_device_host; 455 1.6 mrg api_info.device_type = prof_info.device_type; 456 1.7 mrg tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); 457 1.1 mrg tgt->prev = thr->mapped_data; 458 1.1 mrg thr->mapped_data = tgt; 459 1.1 mrg 460 1.6 mrg goto out_prof; 461 1.1 mrg } 462 1.1 mrg 463 1.1 mrg gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 464 1.7 mrg tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, 465 1.7 mrg true, 0); 466 1.1 mrg gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 467 1.1 mrg tgt->prev = thr->mapped_data; 468 1.1 mrg thr->mapped_data = tgt; 469 1.6 mrg 470 1.6 mrg out_prof: 471 1.6 mrg if (profiling_p) 472 1.6 mrg { 473 1.6 mrg prof_info.event_type = acc_ev_enter_data_end; 474 1.6 mrg enter_data_event_info.other_event.event_type = prof_info.event_type; 475 1.6 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); 476 1.6 mrg 477 1.6 mrg thr->prof_info = NULL; 478 1.6 mrg thr->api_info = NULL; 479 1.6 mrg } 480 1.1 mrg } 481 1.1 mrg 482 1.1 mrg void 483 1.1 mrg GOACC_data_end (void) 484 1.1 mrg { 485 1.1 mrg struct goacc_thread *thr = goacc_thread (); 486 1.6 mrg struct gomp_device_descr *acc_dev = thr->dev; 487 1.1 mrg struct target_mem_desc *tgt = thr->mapped_data; 488 1.1 mrg 489 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 490 1.1 mrg 491 1.6 mrg acc_prof_info prof_info; 492 1.6 mrg if (profiling_p) 493 1.1 mrg { 494 1.6 mrg thr->prof_info = &prof_info; 495 1.1 mrg 496 1.6 mrg prof_info.event_type = acc_ev_exit_data_start; 497 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 498 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 499 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type); 500 1.6 mrg prof_info.device_number = acc_dev->target_id; 501 1.6 mrg prof_info.thread_id = -1; 502 1.6 mrg prof_info.async = acc_async_sync; /* Always synchronous. */ 503 1.6 mrg prof_info.async_queue = prof_info.async; 504 1.6 mrg prof_info.src_file = NULL; 505 1.6 mrg prof_info.func_name = NULL; 506 1.6 mrg prof_info.line_no = -1; 507 1.6 mrg prof_info.end_line_no = -1; 508 1.6 mrg prof_info.func_line_no = -1; 509 1.6 mrg prof_info.func_end_line_no = -1; 510 1.6 mrg } 511 1.6 mrg acc_event_info exit_data_event_info; 512 1.6 mrg if (profiling_p) 513 1.6 mrg { 514 1.6 mrg exit_data_event_info.other_event.event_type 515 1.6 mrg = prof_info.event_type; 516 1.6 mrg exit_data_event_info.other_event.valid_bytes 517 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 518 1.6 mrg exit_data_event_info.other_event.parent_construct = acc_construct_data; 519 1.6 mrg exit_data_event_info.other_event.implicit = 0; 520 1.6 mrg exit_data_event_info.other_event.tool_info = NULL; 521 1.6 mrg } 522 1.6 mrg acc_api_info api_info; 523 1.6 mrg if (profiling_p) 524 1.6 mrg { 525 1.6 mrg thr->api_info = &api_info; 526 1.6 mrg 527 1.6 mrg api_info.device_api = acc_device_api_none; 528 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 529 1.6 mrg api_info.device_type = prof_info.device_type; 530 1.6 mrg api_info.vendor = -1; 531 1.6 mrg api_info.device_handle = NULL; 532 1.6 mrg api_info.context_handle = NULL; 533 1.6 mrg api_info.async_handle = NULL; 534 1.1 mrg } 535 1.1 mrg 536 1.6 mrg if (profiling_p) 537 1.6 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); 538 1.5 mrg 539 1.6 mrg gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 540 1.6 mrg thr->mapped_data = tgt->prev; 541 1.7 mrg goacc_unmap_vars (tgt, true, NULL); 542 1.6 mrg gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 543 1.1 mrg 544 1.6 mrg if (profiling_p) 545 1.1 mrg { 546 1.6 mrg prof_info.event_type = acc_ev_exit_data_end; 547 1.6 mrg exit_data_event_info.other_event.event_type = prof_info.event_type; 548 1.6 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); 549 1.1 mrg 550 1.6 mrg thr->prof_info = NULL; 551 1.6 mrg thr->api_info = NULL; 552 1.1 mrg } 553 1.1 mrg } 554 1.1 mrg 555 1.1 mrg void 556 1.5 mrg GOACC_update (int flags_m, size_t mapnum, 557 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds, 558 1.1 mrg int async, int num_waits, ...) 559 1.1 mrg { 560 1.5 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 561 1.5 mrg 562 1.1 mrg size_t i; 563 1.1 mrg 564 1.1 mrg goacc_lazy_initialize (); 565 1.1 mrg 566 1.1 mrg struct goacc_thread *thr = goacc_thread (); 567 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev; 568 1.1 mrg 569 1.6 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 570 1.6 mrg 571 1.6 mrg acc_prof_info prof_info; 572 1.6 mrg if (profiling_p) 573 1.6 mrg { 574 1.6 mrg thr->prof_info = &prof_info; 575 1.6 mrg 576 1.6 mrg prof_info.event_type = acc_ev_update_start; 577 1.6 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 578 1.6 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 579 1.6 mrg prof_info.device_type = acc_device_type (acc_dev->type); 580 1.6 mrg prof_info.device_number = acc_dev->target_id; 581 1.6 mrg prof_info.thread_id = -1; 582 1.6 mrg prof_info.async = async; 583 1.6 mrg prof_info.async_queue = prof_info.async; 584 1.6 mrg prof_info.src_file = NULL; 585 1.6 mrg prof_info.func_name = NULL; 586 1.6 mrg prof_info.line_no = -1; 587 1.6 mrg prof_info.end_line_no = -1; 588 1.6 mrg prof_info.func_line_no = -1; 589 1.6 mrg prof_info.func_end_line_no = -1; 590 1.6 mrg } 591 1.6 mrg acc_event_info update_event_info; 592 1.6 mrg if (profiling_p) 593 1.6 mrg { 594 1.6 mrg update_event_info.other_event.event_type 595 1.6 mrg = prof_info.event_type; 596 1.6 mrg update_event_info.other_event.valid_bytes 597 1.6 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 598 1.6 mrg update_event_info.other_event.parent_construct = acc_construct_update; 599 1.6 mrg update_event_info.other_event.implicit = 0; 600 1.6 mrg update_event_info.other_event.tool_info = NULL; 601 1.6 mrg } 602 1.6 mrg acc_api_info api_info; 603 1.6 mrg if (profiling_p) 604 1.6 mrg { 605 1.6 mrg thr->api_info = &api_info; 606 1.6 mrg 607 1.6 mrg api_info.device_api = acc_device_api_none; 608 1.6 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 609 1.6 mrg api_info.device_type = prof_info.device_type; 610 1.6 mrg api_info.vendor = -1; 611 1.6 mrg api_info.device_handle = NULL; 612 1.6 mrg api_info.context_handle = NULL; 613 1.6 mrg api_info.async_handle = NULL; 614 1.6 mrg } 615 1.6 mrg 616 1.6 mrg if (profiling_p) 617 1.6 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); 618 1.6 mrg 619 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 620 1.5 mrg || (flags & GOACC_FLAG_HOST_FALLBACK)) 621 1.6 mrg { 622 1.6 mrg prof_info.device_type = acc_device_host; 623 1.6 mrg api_info.device_type = prof_info.device_type; 624 1.6 mrg 625 1.6 mrg goto out_prof; 626 1.6 mrg } 627 1.1 mrg 628 1.3 mrg if (num_waits) 629 1.1 mrg { 630 1.1 mrg va_list ap; 631 1.1 mrg 632 1.1 mrg va_start (ap, num_waits); 633 1.3 mrg goacc_wait (async, num_waits, &ap); 634 1.1 mrg va_end (ap); 635 1.1 mrg } 636 1.1 mrg 637 1.5 mrg bool update_device = false; 638 1.1 mrg for (i = 0; i < mapnum; ++i) 639 1.1 mrg { 640 1.1 mrg unsigned char kind = kinds[i] & 0xff; 641 1.1 mrg 642 1.1 mrg switch (kind) 643 1.1 mrg { 644 1.1 mrg case GOMP_MAP_POINTER: 645 1.1 mrg case GOMP_MAP_TO_PSET: 646 1.1 mrg break; 647 1.1 mrg 648 1.5 mrg case GOMP_MAP_ALWAYS_POINTER: 649 1.5 mrg if (update_device) 650 1.5 mrg { 651 1.5 mrg /* Save the contents of the host pointer. */ 652 1.5 mrg void *dptr = acc_deviceptr (hostaddrs[i-1]); 653 1.5 mrg uintptr_t t = *(uintptr_t *) hostaddrs[i]; 654 1.5 mrg 655 1.5 mrg /* Update the contents of the host pointer to reflect 656 1.5 mrg the value of the allocated device memory in the 657 1.5 mrg previous pointer. */ 658 1.5 mrg *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; 659 1.6 mrg /* TODO: verify that we really cannot use acc_update_device_async 660 1.6 mrg here. */ 661 1.5 mrg acc_update_device (hostaddrs[i], sizeof (uintptr_t)); 662 1.5 mrg 663 1.5 mrg /* Restore the host pointer. */ 664 1.5 mrg *(uintptr_t *) hostaddrs[i] = t; 665 1.5 mrg update_device = false; 666 1.5 mrg } 667 1.5 mrg break; 668 1.5 mrg 669 1.5 mrg case GOMP_MAP_TO: 670 1.5 mrg if (!acc_is_present (hostaddrs[i], sizes[i])) 671 1.5 mrg { 672 1.5 mrg update_device = false; 673 1.5 mrg break; 674 1.5 mrg } 675 1.5 mrg /* Fallthru */ 676 1.1 mrg case GOMP_MAP_FORCE_TO: 677 1.5 mrg update_device = true; 678 1.6 mrg acc_update_device_async (hostaddrs[i], sizes[i], async); 679 1.1 mrg break; 680 1.1 mrg 681 1.5 mrg case GOMP_MAP_FROM: 682 1.5 mrg if (!acc_is_present (hostaddrs[i], sizes[i])) 683 1.5 mrg { 684 1.5 mrg update_device = false; 685 1.5 mrg break; 686 1.5 mrg } 687 1.5 mrg /* Fallthru */ 688 1.1 mrg case GOMP_MAP_FORCE_FROM: 689 1.5 mrg update_device = false; 690 1.6 mrg acc_update_self_async (hostaddrs[i], sizes[i], async); 691 1.1 mrg break; 692 1.1 mrg 693 1.1 mrg default: 694 1.1 mrg gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); 695 1.1 mrg break; 696 1.1 mrg } 697 1.1 mrg } 698 1.1 mrg 699 1.6 mrg out_prof: 700 1.6 mrg if (profiling_p) 701 1.3 mrg { 702 1.6 mrg prof_info.event_type = acc_ev_update_end; 703 1.6 mrg update_event_info.other_event.event_type = prof_info.event_type; 704 1.6 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); 705 1.1 mrg 706 1.6 mrg thr->prof_info = NULL; 707 1.6 mrg thr->api_info = NULL; 708 1.3 mrg } 709 1.1 mrg } 710 1.1 mrg 711 1.6 mrg 712 1.6 mrg /* Legacy entry point (GCC 5). */ 713 1.6 mrg 714 1.1 mrg int 715 1.1 mrg GOACC_get_num_threads (void) 716 1.1 mrg { 717 1.1 mrg return 1; 718 1.1 mrg } 719 1.1 mrg 720 1.6 mrg /* Legacy entry point (GCC 5). */ 721 1.6 mrg 722 1.1 mrg int 723 1.1 mrg GOACC_get_thread_num (void) 724 1.1 mrg { 725 1.1 mrg return 0; 726 1.1 mrg } 727