1 1.1.1.9 mrg /* Copyright (C) 2013-2024 Free Software Foundation, Inc. 2 1.1.1.2 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>. 3 1.1.1.2 mrg 4 1.1.1.2 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1.1.2 mrg (libgomp). 6 1.1.1.2 mrg 7 1.1.1.2 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1.1.2 mrg under the terms of the GNU General Public License as published by 9 1.1.1.2 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1.1.2 mrg any later version. 11 1.1.1.2 mrg 12 1.1.1.2 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1.1.2 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1.1.2 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1.1.2 mrg more details. 16 1.1.1.2 mrg 17 1.1.1.2 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1.1.2 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1.1.2 mrg 3.1, as published by the Free Software Foundation. 20 1.1.1.2 mrg 21 1.1.1.2 mrg You should have received a copy of the GNU General Public License and 22 1.1.1.2 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1.1.2 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1.1.2 mrg <http://www.gnu.org/licenses/>. */ 25 1.1.1.2 mrg 26 1.1.1.2 mrg #include "libgomp.h" 27 1.1.1.9 mrg #include "libgomp-nvptx.h" /* For struct rev_offload + GOMP_REV_OFFLOAD_VAR. */ 28 1.1.1.2 mrg #include <limits.h> 29 1.1.1.2 mrg 30 1.1.1.8 mrg extern int __gomp_team_num __attribute__((shared)); 31 1.1.1.9 mrg extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; 32 1.1.1.9 mrg volatile struct rev_offload *GOMP_REV_OFFLOAD_VAR; 33 1.1.1.8 mrg 34 1.1.1.8 mrg bool 35 1.1.1.8 mrg GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, 36 1.1.1.8 mrg unsigned int thread_limit, bool first) 37 1.1.1.2 mrg { 38 1.1.1.8 mrg unsigned int num_blocks, block_id; 39 1.1.1.8 mrg asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks)); 40 1.1.1.8 mrg if (!first) 41 1.1.1.8 mrg { 42 1.1.1.8 mrg unsigned int team_num; 43 1.1.1.8 mrg if (num_blocks > gomp_num_teams_var) 44 1.1.1.8 mrg return false; 45 1.1.1.8 mrg team_num = __gomp_team_num; 46 1.1.1.8 mrg if (team_num > gomp_num_teams_var - num_blocks) 47 1.1.1.8 mrg return false; 48 1.1.1.8 mrg __gomp_team_num = team_num + num_blocks; 49 1.1.1.8 mrg return true; 50 1.1.1.8 mrg } 51 1.1.1.2 mrg if (thread_limit) 52 1.1.1.2 mrg { 53 1.1.1.2 mrg struct gomp_task_icv *icv = gomp_icv (true); 54 1.1.1.2 mrg icv->thread_limit_var 55 1.1.1.2 mrg = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 56 1.1.1.2 mrg } 57 1.1.1.8 mrg if (!num_teams_upper) 58 1.1.1.9 mrg num_teams_upper = ((GOMP_ADDITIONAL_ICVS.nteams > 0 59 1.1.1.9 mrg && num_blocks > GOMP_ADDITIONAL_ICVS.nteams) 60 1.1.1.9 mrg ? GOMP_ADDITIONAL_ICVS.nteams : num_blocks); 61 1.1.1.8 mrg else if (num_blocks < num_teams_lower) 62 1.1.1.8 mrg num_teams_upper = num_teams_lower; 63 1.1.1.8 mrg else if (num_blocks < num_teams_upper) 64 1.1.1.8 mrg num_teams_upper = num_blocks; 65 1.1.1.2 mrg asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id)); 66 1.1.1.8 mrg if (block_id >= num_teams_upper) 67 1.1.1.8 mrg return false; 68 1.1.1.8 mrg __gomp_team_num = block_id; 69 1.1.1.8 mrg gomp_num_teams_var = num_teams_upper - 1; 70 1.1.1.8 mrg return true; 71 1.1.1.2 mrg } 72 1.1.1.6 mrg 73 1.1.1.6 mrg int 74 1.1.1.6 mrg omp_pause_resource (omp_pause_resource_t kind, int device_num) 75 1.1.1.6 mrg { 76 1.1.1.6 mrg (void) kind; 77 1.1.1.6 mrg (void) device_num; 78 1.1.1.6 mrg return -1; 79 1.1.1.6 mrg } 80 1.1.1.6 mrg 81 1.1.1.6 mrg int 82 1.1.1.6 mrg omp_pause_resource_all (omp_pause_resource_t kind) 83 1.1.1.6 mrg { 84 1.1.1.6 mrg (void) kind; 85 1.1.1.6 mrg return -1; 86 1.1.1.6 mrg } 87 1.1.1.6 mrg 88 1.1.1.6 mrg ialias (omp_pause_resource) 89 1.1.1.6 mrg ialias (omp_pause_resource_all) 90 1.1.1.8 mrg 91 1.1.1.8 mrg void 92 1.1.1.8 mrg GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 93 1.1.1.8 mrg void **hostaddrs, size_t *sizes, unsigned short *kinds, 94 1.1.1.8 mrg unsigned int flags, void **depend, void **args) 95 1.1.1.8 mrg { 96 1.1.1.9 mrg static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */ 97 1.1.1.8 mrg (void) flags; 98 1.1.1.8 mrg (void) depend; 99 1.1.1.8 mrg (void) args; 100 1.1.1.9 mrg 101 1.1.1.9 mrg if (device != GOMP_DEVICE_HOST_FALLBACK 102 1.1.1.9 mrg || fn == NULL 103 1.1.1.9 mrg || GOMP_REV_OFFLOAD_VAR == NULL) 104 1.1.1.9 mrg return; 105 1.1.1.9 mrg 106 1.1.1.9 mrg gomp_mutex_lock (&lock); 107 1.1.1.9 mrg 108 1.1.1.9 mrg GOMP_REV_OFFLOAD_VAR->mapnum = mapnum; 109 1.1.1.9 mrg GOMP_REV_OFFLOAD_VAR->addrs = (uint64_t) hostaddrs; 110 1.1.1.9 mrg GOMP_REV_OFFLOAD_VAR->sizes = (uint64_t) sizes; 111 1.1.1.9 mrg GOMP_REV_OFFLOAD_VAR->kinds = (uint64_t) kinds; 112 1.1.1.9 mrg GOMP_REV_OFFLOAD_VAR->dev_num = GOMP_ADDITIONAL_ICVS.device_num; 113 1.1.1.9 mrg 114 1.1.1.9 mrg /* Set 'fn' to trigger processing on the host; wait for completion, 115 1.1.1.9 mrg which is flagged by setting 'fn' back to 0 on the host. */ 116 1.1.1.9 mrg uint64_t addr_struct_fn = (uint64_t) &GOMP_REV_OFFLOAD_VAR->fn; 117 1.1.1.9 mrg #if __PTX_SM__ >= 700 118 1.1.1.9 mrg asm volatile ("st.global.release.sys.u64 [%0], %1;" 119 1.1.1.9 mrg : : "r"(addr_struct_fn), "r" (fn) : "memory"); 120 1.1.1.9 mrg #else 121 1.1.1.9 mrg __sync_synchronize (); /* membar.sys */ 122 1.1.1.9 mrg asm volatile ("st.volatile.global.u64 [%0], %1;" 123 1.1.1.9 mrg : : "r"(addr_struct_fn), "r" (fn) : "memory"); 124 1.1.1.9 mrg #endif 125 1.1.1.9 mrg 126 1.1.1.9 mrg #if __PTX_SM__ >= 700 127 1.1.1.9 mrg uint64_t fn2; 128 1.1.1.9 mrg do 129 1.1.1.9 mrg { 130 1.1.1.9 mrg asm volatile ("ld.acquire.sys.global.u64 %0, [%1];" 131 1.1.1.9 mrg : "=r" (fn2) : "r" (addr_struct_fn) : "memory"); 132 1.1.1.9 mrg } 133 1.1.1.9 mrg while (fn2 != 0); 134 1.1.1.9 mrg #else 135 1.1.1.9 mrg /* ld.global.u64 %r64,[__gomp_rev_offload_var]; 136 1.1.1.9 mrg ld.u64 %r36,[%r64]; 137 1.1.1.9 mrg membar.sys; */ 138 1.1.1.9 mrg while (__atomic_load_n (&GOMP_REV_OFFLOAD_VAR->fn, __ATOMIC_ACQUIRE) != 0) 139 1.1.1.9 mrg ; /* spin */ 140 1.1.1.9 mrg #endif 141 1.1.1.9 mrg 142 1.1.1.9 mrg gomp_mutex_unlock (&lock); 143 1.1.1.8 mrg } 144 1.1.1.8 mrg 145 1.1.1.8 mrg void 146 1.1.1.8 mrg GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 147 1.1.1.8 mrg size_t *sizes, unsigned short *kinds) 148 1.1.1.8 mrg { 149 1.1.1.8 mrg (void) device; 150 1.1.1.8 mrg (void) mapnum; 151 1.1.1.8 mrg (void) hostaddrs; 152 1.1.1.8 mrg (void) sizes; 153 1.1.1.8 mrg (void) kinds; 154 1.1.1.8 mrg __builtin_unreachable (); 155 1.1.1.8 mrg } 156 1.1.1.8 mrg 157 1.1.1.8 mrg void 158 1.1.1.8 mrg GOMP_target_end_data (void) 159 1.1.1.8 mrg { 160 1.1.1.8 mrg __builtin_unreachable (); 161 1.1.1.8 mrg } 162 1.1.1.8 mrg 163 1.1.1.8 mrg void 164 1.1.1.8 mrg GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 165 1.1.1.8 mrg size_t *sizes, unsigned short *kinds, 166 1.1.1.8 mrg unsigned int flags, void **depend) 167 1.1.1.8 mrg { 168 1.1.1.8 mrg (void) device; 169 1.1.1.8 mrg (void) mapnum; 170 1.1.1.8 mrg (void) hostaddrs; 171 1.1.1.8 mrg (void) sizes; 172 1.1.1.8 mrg (void) kinds; 173 1.1.1.8 mrg (void) flags; 174 1.1.1.8 mrg (void) depend; 175 1.1.1.8 mrg __builtin_unreachable (); 176 1.1.1.8 mrg } 177 1.1.1.8 mrg 178 1.1.1.8 mrg void 179 1.1.1.8 mrg GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 180 1.1.1.8 mrg size_t *sizes, unsigned short *kinds, 181 1.1.1.8 mrg unsigned int flags, void **depend) 182 1.1.1.8 mrg { 183 1.1.1.8 mrg (void) device; 184 1.1.1.8 mrg (void) mapnum; 185 1.1.1.8 mrg (void) hostaddrs; 186 1.1.1.8 mrg (void) sizes; 187 1.1.1.8 mrg (void) kinds; 188 1.1.1.8 mrg (void) flags; 189 1.1.1.8 mrg (void) depend; 190 1.1.1.8 mrg __builtin_unreachable (); 191 1.1.1.8 mrg } 192