1 1.1.1.10 mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg Contributed by Mentor Embedded. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 6 1.1 mrg (libgomp). 7 1.1 mrg 8 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 9 1.1 mrg under the terms of the GNU General Public License as published by 10 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 11 1.1 mrg any later version. 12 1.1 mrg 13 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 14 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 1.1 mrg more details. 17 1.1 mrg 18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 19 1.1 mrg permissions described in the GCC Runtime Library Exception, version 20 1.1 mrg 3.1, as published by the Free Software Foundation. 21 1.1 mrg 22 1.1 mrg You should have received a copy of the GNU General Public License and 23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 1.1 mrg <http://www.gnu.org/licenses/>. */ 26 1.1 mrg 27 1.1 mrg /* This file handles OpenACC constructs. */ 28 1.1 mrg 29 1.1 mrg #include "openacc.h" 30 1.1 mrg #include "libgomp.h" 31 1.1 mrg #include "gomp-constants.h" 32 1.1 mrg #include "oacc-int.h" 33 1.1 mrg #ifdef HAVE_INTTYPES_H 34 1.1 mrg # include <inttypes.h> /* For PRIu64. */ 35 1.1 mrg #endif 36 1.1 mrg #include <string.h> 37 1.1 mrg #include <stdarg.h> 38 1.1 mrg #include <assert.h> 39 1.1 mrg 40 1.1.1.7 mrg 41 1.1.1.7 mrg /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we 42 1.1.1.7 mrg continue to support the following two legacy values. */ 43 1.1.1.7 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, 44 1.1.1.7 mrg "legacy GOMP_DEVICE_ICV broken"); 45 1.1.1.7 mrg _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) 46 1.1.1.7 mrg == GOACC_FLAG_HOST_FALLBACK, 47 1.1.1.7 mrg "legacy GOMP_DEVICE_HOST_FALLBACK broken"); 48 1.1.1.7 mrg 49 1.1.1.7 mrg 50 1.1.1.7 mrg /* Handle the mapping pair that are presented when a 51 1.1.1.7 mrg deviceptr clause is used with Fortran. */ 52 1.1.1.7 mrg 53 1.1.1.7 mrg static void 54 1.1.1.7 mrg handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, 55 1.1.1.7 mrg unsigned short *kinds) 56 1.1.1.7 mrg { 57 1.1.1.7 mrg int i; 58 1.1.1.7 mrg 59 1.1.1.7 mrg for (i = 0; i < mapnum; i++) 60 1.1.1.7 mrg { 61 1.1.1.7 mrg unsigned short kind1 = kinds[i] & 0xff; 62 1.1.1.7 mrg 63 1.1.1.7 mrg /* Handle Fortran deviceptr clause. */ 64 1.1.1.7 mrg if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) 65 1.1.1.7 mrg { 66 1.1.1.7 mrg unsigned short kind2; 67 1.1.1.7 mrg 68 1.1.1.7 mrg if (i < (signed)mapnum - 1) 69 1.1.1.7 mrg kind2 = kinds[i + 1] & 0xff; 70 1.1.1.7 mrg else 71 1.1.1.7 mrg kind2 = 0xffff; 72 1.1.1.7 mrg 73 1.1.1.7 mrg if (sizes[i] == sizeof (void *)) 74 1.1.1.7 mrg continue; 75 1.1.1.7 mrg 76 1.1.1.7 mrg /* At this point, we're dealing with a Fortran deviceptr. 77 1.1.1.7 mrg If the next element is not what we're expecting, then 78 1.1.1.7 mrg this is an instance of where the deviceptr variable was 79 1.1.1.7 mrg not used within the region and the pointer was removed 80 1.1.1.7 mrg by the gimplifier. */ 81 1.1.1.7 mrg if (kind2 == GOMP_MAP_POINTER 82 1.1.1.7 mrg && sizes[i + 1] == 0 83 1.1.1.7 mrg && hostaddrs[i] == *(void **)hostaddrs[i + 1]) 84 1.1.1.7 mrg { 85 1.1.1.7 mrg kinds[i+1] = kinds[i]; 86 1.1.1.7 mrg sizes[i+1] = sizeof (void *); 87 1.1.1.7 mrg } 88 1.1.1.7 mrg 89 1.1.1.7 mrg /* Invalidate the entry. */ 90 1.1.1.7 mrg hostaddrs[i] = NULL; 91 1.1.1.7 mrg } 92 1.1.1.7 mrg } 93 1.1 mrg } 94 1.1 mrg 95 1.1.1.2 mrg 96 1.1.1.7 mrg /* Launch a possibly offloaded function with FLAGS. FN is the host fn 97 1.1.1.2 mrg address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory 98 1.1.1.2 mrg blocks to be copied to/from the device. Varadic arguments are 99 1.1.1.2 mrg keyed optional parameters terminated with a zero. */ 100 1.1 mrg 101 1.1 mrg void 102 1.1.1.7 mrg GOACC_parallel_keyed (int flags_m, void (*fn) (void *), 103 1.1.1.2 mrg size_t mapnum, void **hostaddrs, size_t *sizes, 104 1.1.1.2 mrg unsigned short *kinds, ...) 105 1.1 mrg { 106 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 107 1.1.1.7 mrg 108 1.1 mrg va_list ap; 109 1.1 mrg struct goacc_thread *thr; 110 1.1 mrg struct gomp_device_descr *acc_dev; 111 1.1 mrg unsigned int i; 112 1.1 mrg struct splay_tree_key_s k; 113 1.1 mrg splay_tree_key tgt_fn_key; 114 1.1 mrg void (*tgt_fn); 115 1.1.1.2 mrg int async = GOMP_ASYNC_SYNC; 116 1.1.1.2 mrg unsigned dims[GOMP_DIM_MAX]; 117 1.1.1.2 mrg unsigned tag; 118 1.1 mrg 119 1.1 mrg #ifdef HAVE_INTTYPES_H 120 1.1.1.2 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 121 1.1.1.2 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 122 1.1 mrg #else 123 1.1.1.2 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 124 1.1.1.2 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 125 1.1 mrg #endif 126 1.1 mrg goacc_lazy_initialize (); 127 1.1 mrg 128 1.1 mrg thr = goacc_thread (); 129 1.1 mrg acc_dev = thr->dev; 130 1.1 mrg 131 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 132 1.1.1.8 mrg 133 1.1.1.8 mrg acc_prof_info prof_info; 134 1.1.1.8 mrg if (profiling_p) 135 1.1.1.8 mrg { 136 1.1.1.8 mrg thr->prof_info = &prof_info; 137 1.1.1.8 mrg 138 1.1.1.8 mrg prof_info.event_type = acc_ev_compute_construct_start; 139 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 140 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 141 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type); 142 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id; 143 1.1.1.8 mrg prof_info.thread_id = -1; 144 1.1.1.8 mrg prof_info.async = async; 145 1.1.1.8 mrg prof_info.async_queue = prof_info.async; 146 1.1.1.8 mrg prof_info.src_file = NULL; 147 1.1.1.8 mrg prof_info.func_name = NULL; 148 1.1.1.8 mrg prof_info.line_no = -1; 149 1.1.1.8 mrg prof_info.end_line_no = -1; 150 1.1.1.8 mrg prof_info.func_line_no = -1; 151 1.1.1.8 mrg prof_info.func_end_line_no = -1; 152 1.1.1.8 mrg } 153 1.1.1.8 mrg acc_event_info compute_construct_event_info; 154 1.1.1.8 mrg if (profiling_p) 155 1.1.1.8 mrg { 156 1.1.1.8 mrg compute_construct_event_info.other_event.event_type 157 1.1.1.8 mrg = prof_info.event_type; 158 1.1.1.8 mrg compute_construct_event_info.other_event.valid_bytes 159 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 160 1.1.1.8 mrg compute_construct_event_info.other_event.parent_construct 161 1.1.1.8 mrg = acc_construct_parallel; 162 1.1.1.8 mrg compute_construct_event_info.other_event.implicit = 0; 163 1.1.1.8 mrg compute_construct_event_info.other_event.tool_info = NULL; 164 1.1.1.8 mrg } 165 1.1.1.8 mrg acc_api_info api_info; 166 1.1.1.8 mrg if (profiling_p) 167 1.1.1.8 mrg { 168 1.1.1.8 mrg thr->api_info = &api_info; 169 1.1.1.8 mrg 170 1.1.1.8 mrg api_info.device_api = acc_device_api_none; 171 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 172 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 173 1.1.1.8 mrg api_info.vendor = -1; 174 1.1.1.8 mrg api_info.device_handle = NULL; 175 1.1.1.8 mrg api_info.context_handle = NULL; 176 1.1.1.8 mrg api_info.async_handle = NULL; 177 1.1.1.8 mrg } 178 1.1.1.8 mrg 179 1.1.1.8 mrg if (profiling_p) 180 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, 181 1.1.1.8 mrg &api_info); 182 1.1.1.8 mrg 183 1.1.1.7 mrg handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); 184 1.1.1.7 mrg 185 1.1 mrg /* Host fallback if "if" clause is false or if the current device is set to 186 1.1 mrg the host. */ 187 1.1.1.10 mrg if ((flags & GOACC_FLAG_HOST_FALLBACK) 188 1.1.1.10 mrg /* TODO: a proper pthreads based "multi-core CPU" local device 189 1.1.1.10 mrg implementation. Currently, this is still the same as host-fallback. */ 190 1.1.1.10 mrg || (flags & GOACC_FLAG_LOCAL_DEVICE)) 191 1.1 mrg { 192 1.1.1.8 mrg prof_info.device_type = acc_device_host; 193 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 194 1.1 mrg goacc_save_and_set_bind (acc_device_host); 195 1.1 mrg fn (hostaddrs); 196 1.1 mrg goacc_restore_bind (); 197 1.1.1.8 mrg goto out_prof; 198 1.1 mrg } 199 1.1 mrg else if (acc_device_type (acc_dev->type) == acc_device_host) 200 1.1 mrg { 201 1.1 mrg fn (hostaddrs); 202 1.1.1.8 mrg goto out_prof; 203 1.1 mrg } 204 1.1 mrg 205 1.1.1.2 mrg /* Default: let the runtime choose. */ 206 1.1.1.2 mrg for (i = 0; i != GOMP_DIM_MAX; i++) 207 1.1.1.2 mrg dims[i] = 0; 208 1.1.1.2 mrg 209 1.1.1.2 mrg va_start (ap, kinds); 210 1.1.1.2 mrg /* TODO: This will need amending when device_type is implemented. */ 211 1.1.1.2 mrg while ((tag = va_arg (ap, unsigned)) != 0) 212 1.1.1.2 mrg { 213 1.1.1.2 mrg if (GOMP_LAUNCH_DEVICE (tag)) 214 1.1.1.2 mrg gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", 215 1.1.1.2 mrg GOMP_LAUNCH_DEVICE (tag)); 216 1.1 mrg 217 1.1.1.2 mrg switch (GOMP_LAUNCH_CODE (tag)) 218 1.1.1.2 mrg { 219 1.1.1.2 mrg case GOMP_LAUNCH_DIM: 220 1.1.1.2 mrg { 221 1.1.1.2 mrg unsigned mask = GOMP_LAUNCH_OP (tag); 222 1.1.1.2 mrg 223 1.1.1.2 mrg for (i = 0; i != GOMP_DIM_MAX; i++) 224 1.1.1.2 mrg if (mask & GOMP_DIM_MASK (i)) 225 1.1.1.2 mrg dims[i] = va_arg (ap, unsigned); 226 1.1.1.2 mrg } 227 1.1.1.2 mrg break; 228 1.1.1.2 mrg 229 1.1.1.2 mrg case GOMP_LAUNCH_ASYNC: 230 1.1.1.2 mrg { 231 1.1.1.2 mrg /* Small constant values are encoded in the operand. */ 232 1.1.1.2 mrg async = GOMP_LAUNCH_OP (tag); 233 1.1.1.2 mrg 234 1.1.1.2 mrg if (async == GOMP_LAUNCH_OP_MAX) 235 1.1.1.2 mrg async = va_arg (ap, unsigned); 236 1.1.1.8 mrg 237 1.1.1.8 mrg if (profiling_p) 238 1.1.1.8 mrg { 239 1.1.1.8 mrg prof_info.async = async; 240 1.1.1.8 mrg prof_info.async_queue = prof_info.async; 241 1.1.1.8 mrg } 242 1.1.1.8 mrg 243 1.1.1.2 mrg break; 244 1.1.1.2 mrg } 245 1.1 mrg 246 1.1.1.2 mrg case GOMP_LAUNCH_WAIT: 247 1.1.1.2 mrg { 248 1.1.1.2 mrg unsigned num_waits = GOMP_LAUNCH_OP (tag); 249 1.1.1.7 mrg goacc_wait (async, num_waits, &ap); 250 1.1.1.2 mrg break; 251 1.1.1.2 mrg } 252 1.1.1.2 mrg 253 1.1.1.2 mrg default: 254 1.1.1.2 mrg gomp_fatal ("unrecognized offload code '%d'," 255 1.1.1.2 mrg " libgomp is too old", GOMP_LAUNCH_CODE (tag)); 256 1.1.1.2 mrg } 257 1.1.1.2 mrg } 258 1.1.1.2 mrg va_end (ap); 259 1.1.1.2 mrg 260 1.1 mrg if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) 261 1.1 mrg { 262 1.1 mrg k.host_start = (uintptr_t) fn; 263 1.1 mrg k.host_end = k.host_start + 1; 264 1.1 mrg gomp_mutex_lock (&acc_dev->lock); 265 1.1 mrg tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); 266 1.1 mrg gomp_mutex_unlock (&acc_dev->lock); 267 1.1 mrg 268 1.1 mrg if (tgt_fn_key == NULL) 269 1.1 mrg gomp_fatal ("target function wasn't mapped"); 270 1.1 mrg 271 1.1 mrg tgt_fn = (void (*)) tgt_fn_key->tgt_offset; 272 1.1 mrg } 273 1.1 mrg else 274 1.1 mrg tgt_fn = (void (*)) fn; 275 1.1 mrg 276 1.1.1.8 mrg acc_event_info enter_exit_data_event_info; 277 1.1.1.8 mrg if (profiling_p) 278 1.1.1.8 mrg { 279 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_start; 280 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type 281 1.1.1.8 mrg = prof_info.event_type; 282 1.1.1.8 mrg enter_exit_data_event_info.other_event.valid_bytes 283 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 284 1.1.1.8 mrg enter_exit_data_event_info.other_event.parent_construct 285 1.1.1.8 mrg = compute_construct_event_info.other_event.parent_construct; 286 1.1.1.8 mrg enter_exit_data_event_info.other_event.implicit = 1; 287 1.1.1.8 mrg enter_exit_data_event_info.other_event.tool_info = NULL; 288 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 289 1.1.1.8 mrg &api_info); 290 1.1.1.8 mrg } 291 1.1.1.8 mrg 292 1.1.1.8 mrg goacc_aq aq = get_goacc_asyncqueue (async); 293 1.1.1.8 mrg 294 1.1.1.10 mrg struct target_mem_desc *tgt 295 1.1.1.10 mrg = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, 296 1.1.1.10 mrg GOMP_MAP_VARS_TARGET); 297 1.1.1.10 mrg 298 1.1.1.8 mrg if (profiling_p) 299 1.1.1.8 mrg { 300 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_end; 301 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type 302 1.1.1.8 mrg = prof_info.event_type; 303 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 304 1.1.1.8 mrg &api_info); 305 1.1.1.8 mrg } 306 1.1.1.9 mrg 307 1.1.1.10 mrg void **devaddrs = (void **) tgt->tgt_start; 308 1.1.1.8 mrg if (aq == NULL) 309 1.1.1.8 mrg acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, 310 1.1.1.8 mrg tgt); 311 1.1.1.8 mrg else 312 1.1.1.8 mrg acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, 313 1.1.1.8 mrg dims, tgt, aq); 314 1.1 mrg 315 1.1.1.8 mrg if (profiling_p) 316 1.1.1.8 mrg { 317 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_start; 318 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type; 319 1.1.1.8 mrg enter_exit_data_event_info.other_event.tool_info = NULL; 320 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 321 1.1.1.8 mrg &api_info); 322 1.1.1.8 mrg } 323 1.1 mrg 324 1.1.1.9 mrg /* If running synchronously (aq == NULL), this will unmap immediately. */ 325 1.1.1.9 mrg goacc_unmap_vars (tgt, true, aq); 326 1.1.1.8 mrg 327 1.1.1.8 mrg if (profiling_p) 328 1.1.1.7 mrg { 329 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_end; 330 1.1.1.8 mrg enter_exit_data_event_info.other_event.event_type = prof_info.event_type; 331 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, 332 1.1.1.8 mrg &api_info); 333 1.1.1.7 mrg } 334 1.1 mrg 335 1.1.1.8 mrg out_prof: 336 1.1.1.8 mrg if (profiling_p) 337 1.1.1.8 mrg { 338 1.1.1.8 mrg prof_info.event_type = acc_ev_compute_construct_end; 339 1.1.1.8 mrg compute_construct_event_info.other_event.event_type 340 1.1.1.8 mrg = prof_info.event_type; 341 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, 342 1.1.1.8 mrg &api_info); 343 1.1.1.8 mrg 344 1.1.1.8 mrg thr->prof_info = NULL; 345 1.1.1.8 mrg thr->api_info = NULL; 346 1.1.1.8 mrg } 347 1.1 mrg } 348 1.1 mrg 349 1.1.1.8 mrg /* Legacy entry point (GCC 5). Only provide host fallback execution. */ 350 1.1.1.2 mrg 351 1.1.1.2 mrg void 352 1.1.1.7 mrg GOACC_parallel (int flags_m, void (*fn) (void *), 353 1.1.1.2 mrg size_t mapnum, void **hostaddrs, size_t *sizes, 354 1.1.1.2 mrg unsigned short *kinds, 355 1.1.1.2 mrg int num_gangs, int num_workers, int vector_length, 356 1.1.1.2 mrg int async, int num_waits, ...) 357 1.1.1.2 mrg { 358 1.1.1.2 mrg goacc_save_and_set_bind (acc_device_host); 359 1.1.1.2 mrg fn (hostaddrs); 360 1.1.1.2 mrg goacc_restore_bind (); 361 1.1.1.2 mrg } 362 1.1.1.2 mrg 363 1.1 mrg void 364 1.1.1.7 mrg GOACC_data_start (int flags_m, size_t mapnum, 365 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds) 366 1.1 mrg { 367 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 368 1.1.1.7 mrg 369 1.1 mrg struct target_mem_desc *tgt; 370 1.1 mrg 371 1.1 mrg #ifdef HAVE_INTTYPES_H 372 1.1 mrg gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 373 1.1 mrg __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 374 1.1 mrg #else 375 1.1 mrg gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 376 1.1 mrg __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 377 1.1 mrg #endif 378 1.1 mrg 379 1.1 mrg goacc_lazy_initialize (); 380 1.1 mrg 381 1.1 mrg struct goacc_thread *thr = goacc_thread (); 382 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev; 383 1.1 mrg 384 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 385 1.1.1.8 mrg 386 1.1.1.8 mrg acc_prof_info prof_info; 387 1.1.1.8 mrg if (profiling_p) 388 1.1.1.8 mrg { 389 1.1.1.8 mrg thr->prof_info = &prof_info; 390 1.1.1.8 mrg 391 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_start; 392 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 393 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 394 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type); 395 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id; 396 1.1.1.8 mrg prof_info.thread_id = -1; 397 1.1.1.8 mrg prof_info.async = acc_async_sync; /* Always synchronous. */ 398 1.1.1.8 mrg prof_info.async_queue = prof_info.async; 399 1.1.1.8 mrg prof_info.src_file = NULL; 400 1.1.1.8 mrg prof_info.func_name = NULL; 401 1.1.1.8 mrg prof_info.line_no = -1; 402 1.1.1.8 mrg prof_info.end_line_no = -1; 403 1.1.1.8 mrg prof_info.func_line_no = -1; 404 1.1.1.8 mrg prof_info.func_end_line_no = -1; 405 1.1.1.8 mrg } 406 1.1.1.8 mrg acc_event_info enter_data_event_info; 407 1.1.1.8 mrg if (profiling_p) 408 1.1.1.8 mrg { 409 1.1.1.8 mrg enter_data_event_info.other_event.event_type 410 1.1.1.8 mrg = prof_info.event_type; 411 1.1.1.8 mrg enter_data_event_info.other_event.valid_bytes 412 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 413 1.1.1.8 mrg enter_data_event_info.other_event.parent_construct = acc_construct_data; 414 1.1.1.8 mrg for (int i = 0; i < mapnum; ++i) 415 1.1.1.8 mrg if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR 416 1.1.1.8 mrg || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 417 1.1.1.8 mrg { 418 1.1.1.8 mrg /* If there is one such data mapping kind, then this is actually an 419 1.1.1.8 mrg OpenACC 'host_data' construct. (GCC maps the OpenACC 420 1.1.1.8 mrg 'host_data' construct to the OpenACC 'data' construct.) Apart 421 1.1.1.8 mrg from artificial test cases (such as an OpenACC 'host_data' 422 1.1.1.8 mrg construct's (implicit) device initialization when there hasn't 423 1.1.1.8 mrg been any device data be set up before...), there can't really 424 1.1.1.8 mrg any meaningful events be generated from OpenACC 'host_data' 425 1.1.1.8 mrg constructs, though. */ 426 1.1.1.8 mrg enter_data_event_info.other_event.parent_construct 427 1.1.1.8 mrg = acc_construct_host_data; 428 1.1.1.8 mrg break; 429 1.1.1.8 mrg } 430 1.1.1.8 mrg enter_data_event_info.other_event.implicit = 0; 431 1.1.1.8 mrg enter_data_event_info.other_event.tool_info = NULL; 432 1.1.1.8 mrg } 433 1.1.1.8 mrg acc_api_info api_info; 434 1.1.1.8 mrg if (profiling_p) 435 1.1.1.8 mrg { 436 1.1.1.8 mrg thr->api_info = &api_info; 437 1.1.1.8 mrg 438 1.1.1.8 mrg api_info.device_api = acc_device_api_none; 439 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 440 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 441 1.1.1.8 mrg api_info.vendor = -1; 442 1.1.1.8 mrg api_info.device_handle = NULL; 443 1.1.1.8 mrg api_info.context_handle = NULL; 444 1.1.1.8 mrg api_info.async_handle = NULL; 445 1.1.1.8 mrg } 446 1.1.1.8 mrg 447 1.1.1.8 mrg if (profiling_p) 448 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); 449 1.1.1.8 mrg 450 1.1 mrg /* Host fallback or 'do nothing'. */ 451 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 452 1.1.1.10 mrg || (flags & GOACC_FLAG_HOST_FALLBACK) 453 1.1.1.10 mrg || (flags & GOACC_FLAG_LOCAL_DEVICE)) 454 1.1 mrg { 455 1.1.1.8 mrg prof_info.device_type = acc_device_host; 456 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 457 1.1.1.9 mrg tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); 458 1.1 mrg tgt->prev = thr->mapped_data; 459 1.1 mrg thr->mapped_data = tgt; 460 1.1 mrg 461 1.1.1.8 mrg goto out_prof; 462 1.1 mrg } 463 1.1 mrg 464 1.1 mrg gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 465 1.1.1.9 mrg tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, 466 1.1.1.9 mrg true, 0); 467 1.1 mrg gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 468 1.1 mrg tgt->prev = thr->mapped_data; 469 1.1 mrg thr->mapped_data = tgt; 470 1.1.1.8 mrg 471 1.1.1.8 mrg out_prof: 472 1.1.1.8 mrg if (profiling_p) 473 1.1.1.8 mrg { 474 1.1.1.8 mrg prof_info.event_type = acc_ev_enter_data_end; 475 1.1.1.8 mrg enter_data_event_info.other_event.event_type = prof_info.event_type; 476 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); 477 1.1.1.8 mrg 478 1.1.1.8 mrg thr->prof_info = NULL; 479 1.1.1.8 mrg thr->api_info = NULL; 480 1.1.1.8 mrg } 481 1.1 mrg } 482 1.1 mrg 483 1.1 mrg void 484 1.1 mrg GOACC_data_end (void) 485 1.1 mrg { 486 1.1 mrg struct goacc_thread *thr = goacc_thread (); 487 1.1.1.8 mrg struct gomp_device_descr *acc_dev = thr->dev; 488 1.1 mrg struct target_mem_desc *tgt = thr->mapped_data; 489 1.1 mrg 490 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 491 1.1 mrg 492 1.1.1.8 mrg acc_prof_info prof_info; 493 1.1.1.8 mrg if (profiling_p) 494 1.1 mrg { 495 1.1.1.8 mrg thr->prof_info = &prof_info; 496 1.1 mrg 497 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_start; 498 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 499 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 500 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type); 501 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id; 502 1.1.1.8 mrg prof_info.thread_id = -1; 503 1.1.1.8 mrg prof_info.async = acc_async_sync; /* Always synchronous. */ 504 1.1.1.8 mrg prof_info.async_queue = prof_info.async; 505 1.1.1.8 mrg prof_info.src_file = NULL; 506 1.1.1.8 mrg prof_info.func_name = NULL; 507 1.1.1.8 mrg prof_info.line_no = -1; 508 1.1.1.8 mrg prof_info.end_line_no = -1; 509 1.1.1.8 mrg prof_info.func_line_no = -1; 510 1.1.1.8 mrg prof_info.func_end_line_no = -1; 511 1.1.1.8 mrg } 512 1.1.1.8 mrg acc_event_info exit_data_event_info; 513 1.1.1.8 mrg if (profiling_p) 514 1.1.1.8 mrg { 515 1.1.1.8 mrg exit_data_event_info.other_event.event_type 516 1.1.1.8 mrg = prof_info.event_type; 517 1.1.1.8 mrg exit_data_event_info.other_event.valid_bytes 518 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 519 1.1.1.8 mrg exit_data_event_info.other_event.parent_construct = acc_construct_data; 520 1.1.1.8 mrg exit_data_event_info.other_event.implicit = 0; 521 1.1.1.8 mrg exit_data_event_info.other_event.tool_info = NULL; 522 1.1.1.8 mrg } 523 1.1.1.8 mrg acc_api_info api_info; 524 1.1.1.8 mrg if (profiling_p) 525 1.1.1.8 mrg { 526 1.1.1.8 mrg thr->api_info = &api_info; 527 1.1.1.8 mrg 528 1.1.1.8 mrg api_info.device_api = acc_device_api_none; 529 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 530 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 531 1.1.1.8 mrg api_info.vendor = -1; 532 1.1.1.8 mrg api_info.device_handle = NULL; 533 1.1.1.8 mrg api_info.context_handle = NULL; 534 1.1.1.8 mrg api_info.async_handle = NULL; 535 1.1 mrg } 536 1.1 mrg 537 1.1.1.8 mrg if (profiling_p) 538 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); 539 1.1.1.7 mrg 540 1.1.1.8 mrg gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 541 1.1.1.8 mrg thr->mapped_data = tgt->prev; 542 1.1.1.9 mrg goacc_unmap_vars (tgt, true, NULL); 543 1.1.1.8 mrg gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 544 1.1 mrg 545 1.1.1.8 mrg if (profiling_p) 546 1.1 mrg { 547 1.1.1.8 mrg prof_info.event_type = acc_ev_exit_data_end; 548 1.1.1.8 mrg exit_data_event_info.other_event.event_type = prof_info.event_type; 549 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); 550 1.1 mrg 551 1.1.1.8 mrg thr->prof_info = NULL; 552 1.1.1.8 mrg thr->api_info = NULL; 553 1.1 mrg } 554 1.1 mrg } 555 1.1 mrg 556 1.1 mrg void 557 1.1.1.7 mrg GOACC_update (int flags_m, size_t mapnum, 558 1.1 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds, 559 1.1 mrg int async, int num_waits, ...) 560 1.1 mrg { 561 1.1.1.7 mrg int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 562 1.1.1.7 mrg 563 1.1 mrg size_t i; 564 1.1 mrg 565 1.1 mrg goacc_lazy_initialize (); 566 1.1 mrg 567 1.1 mrg struct goacc_thread *thr = goacc_thread (); 568 1.1 mrg struct gomp_device_descr *acc_dev = thr->dev; 569 1.1 mrg 570 1.1.1.8 mrg bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); 571 1.1.1.8 mrg 572 1.1.1.8 mrg acc_prof_info prof_info; 573 1.1.1.8 mrg if (profiling_p) 574 1.1.1.8 mrg { 575 1.1.1.8 mrg thr->prof_info = &prof_info; 576 1.1.1.8 mrg 577 1.1.1.8 mrg prof_info.event_type = acc_ev_update_start; 578 1.1.1.8 mrg prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; 579 1.1.1.8 mrg prof_info.version = _ACC_PROF_INFO_VERSION; 580 1.1.1.8 mrg prof_info.device_type = acc_device_type (acc_dev->type); 581 1.1.1.8 mrg prof_info.device_number = acc_dev->target_id; 582 1.1.1.8 mrg prof_info.thread_id = -1; 583 1.1.1.8 mrg prof_info.async = async; 584 1.1.1.8 mrg prof_info.async_queue = prof_info.async; 585 1.1.1.8 mrg prof_info.src_file = NULL; 586 1.1.1.8 mrg prof_info.func_name = NULL; 587 1.1.1.8 mrg prof_info.line_no = -1; 588 1.1.1.8 mrg prof_info.end_line_no = -1; 589 1.1.1.8 mrg prof_info.func_line_no = -1; 590 1.1.1.8 mrg prof_info.func_end_line_no = -1; 591 1.1.1.8 mrg } 592 1.1.1.8 mrg acc_event_info update_event_info; 593 1.1.1.8 mrg if (profiling_p) 594 1.1.1.8 mrg { 595 1.1.1.8 mrg update_event_info.other_event.event_type 596 1.1.1.8 mrg = prof_info.event_type; 597 1.1.1.8 mrg update_event_info.other_event.valid_bytes 598 1.1.1.8 mrg = _ACC_OTHER_EVENT_INFO_VALID_BYTES; 599 1.1.1.8 mrg update_event_info.other_event.parent_construct = acc_construct_update; 600 1.1.1.8 mrg update_event_info.other_event.implicit = 0; 601 1.1.1.8 mrg update_event_info.other_event.tool_info = NULL; 602 1.1.1.8 mrg } 603 1.1.1.8 mrg acc_api_info api_info; 604 1.1.1.8 mrg if (profiling_p) 605 1.1.1.8 mrg { 606 1.1.1.8 mrg thr->api_info = &api_info; 607 1.1.1.8 mrg 608 1.1.1.8 mrg api_info.device_api = acc_device_api_none; 609 1.1.1.8 mrg api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; 610 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 611 1.1.1.8 mrg api_info.vendor = -1; 612 1.1.1.8 mrg api_info.device_handle = NULL; 613 1.1.1.8 mrg api_info.context_handle = NULL; 614 1.1.1.8 mrg api_info.async_handle = NULL; 615 1.1.1.8 mrg } 616 1.1.1.8 mrg 617 1.1.1.8 mrg if (profiling_p) 618 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); 619 1.1.1.8 mrg 620 1.1 mrg if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 621 1.1.1.7 mrg || (flags & GOACC_FLAG_HOST_FALLBACK)) 622 1.1.1.8 mrg { 623 1.1.1.8 mrg prof_info.device_type = acc_device_host; 624 1.1.1.8 mrg api_info.device_type = prof_info.device_type; 625 1.1.1.8 mrg 626 1.1.1.8 mrg goto out_prof; 627 1.1.1.8 mrg } 628 1.1 mrg 629 1.1.1.2 mrg if (num_waits) 630 1.1 mrg { 631 1.1 mrg va_list ap; 632 1.1 mrg 633 1.1 mrg va_start (ap, num_waits); 634 1.1.1.2 mrg goacc_wait (async, num_waits, &ap); 635 1.1 mrg va_end (ap); 636 1.1 mrg } 637 1.1 mrg 638 1.1.1.7 mrg bool update_device = false; 639 1.1 mrg for (i = 0; i < mapnum; ++i) 640 1.1 mrg { 641 1.1 mrg unsigned char kind = kinds[i] & 0xff; 642 1.1 mrg 643 1.1 mrg switch (kind) 644 1.1 mrg { 645 1.1 mrg case GOMP_MAP_POINTER: 646 1.1 mrg case GOMP_MAP_TO_PSET: 647 1.1 mrg break; 648 1.1 mrg 649 1.1.1.7 mrg case GOMP_MAP_ALWAYS_POINTER: 650 1.1.1.7 mrg if (update_device) 651 1.1.1.7 mrg { 652 1.1.1.7 mrg /* Save the contents of the host pointer. */ 653 1.1.1.7 mrg void *dptr = acc_deviceptr (hostaddrs[i-1]); 654 1.1.1.7 mrg uintptr_t t = *(uintptr_t *) hostaddrs[i]; 655 1.1.1.7 mrg 656 1.1.1.7 mrg /* Update the contents of the host pointer to reflect 657 1.1.1.7 mrg the value of the allocated device memory in the 658 1.1.1.7 mrg previous pointer. */ 659 1.1.1.7 mrg *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; 660 1.1.1.8 mrg /* TODO: verify that we really cannot use acc_update_device_async 661 1.1.1.8 mrg here. */ 662 1.1.1.7 mrg acc_update_device (hostaddrs[i], sizeof (uintptr_t)); 663 1.1.1.7 mrg 664 1.1.1.7 mrg /* Restore the host pointer. */ 665 1.1.1.7 mrg *(uintptr_t *) hostaddrs[i] = t; 666 1.1.1.7 mrg update_device = false; 667 1.1.1.7 mrg } 668 1.1.1.7 mrg break; 669 1.1.1.7 mrg 670 1.1.1.7 mrg case GOMP_MAP_TO: 671 1.1.1.7 mrg if (!acc_is_present (hostaddrs[i], sizes[i])) 672 1.1.1.7 mrg { 673 1.1.1.7 mrg update_device = false; 674 1.1.1.7 mrg break; 675 1.1.1.7 mrg } 676 1.1.1.7 mrg /* Fallthru */ 677 1.1 mrg case GOMP_MAP_FORCE_TO: 678 1.1.1.7 mrg update_device = true; 679 1.1.1.8 mrg acc_update_device_async (hostaddrs[i], sizes[i], async); 680 1.1 mrg break; 681 1.1 mrg 682 1.1.1.7 mrg case GOMP_MAP_FROM: 683 1.1.1.7 mrg if (!acc_is_present (hostaddrs[i], sizes[i])) 684 1.1.1.7 mrg { 685 1.1.1.7 mrg update_device = false; 686 1.1.1.7 mrg break; 687 1.1.1.7 mrg } 688 1.1.1.7 mrg /* Fallthru */ 689 1.1 mrg case GOMP_MAP_FORCE_FROM: 690 1.1.1.7 mrg update_device = false; 691 1.1.1.8 mrg acc_update_self_async (hostaddrs[i], sizes[i], async); 692 1.1 mrg break; 693 1.1 mrg 694 1.1 mrg default: 695 1.1 mrg gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); 696 1.1 mrg break; 697 1.1 mrg } 698 1.1 mrg } 699 1.1 mrg 700 1.1.1.8 mrg out_prof: 701 1.1.1.8 mrg if (profiling_p) 702 1.1.1.2 mrg { 703 1.1.1.8 mrg prof_info.event_type = acc_ev_update_end; 704 1.1.1.8 mrg update_event_info.other_event.event_type = prof_info.event_type; 705 1.1.1.8 mrg goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); 706 1.1 mrg 707 1.1.1.8 mrg thr->prof_info = NULL; 708 1.1.1.8 mrg thr->api_info = NULL; 709 1.1.1.2 mrg } 710 1.1 mrg } 711 1.1 mrg 712 1.1.1.8 mrg 713 1.1.1.8 mrg /* Legacy entry point (GCC 5). */ 714 1.1.1.8 mrg 715 1.1 mrg int 716 1.1 mrg GOACC_get_num_threads (void) 717 1.1 mrg { 718 1.1 mrg return 1; 719 1.1 mrg } 720 1.1 mrg 721 1.1.1.8 mrg /* Legacy entry point (GCC 5). */ 722 1.1.1.8 mrg 723 1.1 mrg int 724 1.1 mrg GOACC_get_thread_num (void) 725 1.1 mrg { 726 1.1 mrg return 0; 727 1.1 mrg } 728