1 1.7 mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc. 2 1.3 mrg Contributed by Alexander Monakov <amonakov (at) ispras.ru> 3 1.3 mrg 4 1.3 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.3 mrg (libgomp). 6 1.3 mrg 7 1.3 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.3 mrg under the terms of the GNU General Public License as published by 9 1.3 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.3 mrg any later version. 11 1.3 mrg 12 1.3 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.3 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.3 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.3 mrg more details. 16 1.3 mrg 17 1.3 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.3 mrg permissions described in the GCC Runtime Library Exception, version 19 1.3 mrg 3.1, as published by the Free Software Foundation. 20 1.3 mrg 21 1.3 mrg You should have received a copy of the GNU General Public License and 22 1.3 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.3 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.3 mrg <http://www.gnu.org/licenses/>. */ 25 1.3 mrg 26 1.6 mrg /* This file handles maintenance of threads on NVPTX. */ 27 1.3 mrg 28 1.3 mrg #if defined __nvptx_softstack__ && defined __nvptx_unisimt__ 29 1.3 mrg 30 1.3 mrg #include "libgomp.h" 31 1.3 mrg #include <stdlib.h> 32 1.3 mrg #include <string.h> 33 1.3 mrg 34 1.3 mrg struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); 35 1.7 mrg int __gomp_team_num __attribute__((shared,nocommon)); 36 1.3 mrg 37 1.3 mrg static void gomp_thread_start (struct gomp_thread_pool *); 38 1.3 mrg 39 1.3 mrg 40 1.3 mrg /* This externally visible function handles target region entry. It 41 1.3 mrg sets up a per-team thread pool and transfers control by calling FN (FN_DATA) 42 1.3 mrg in the master thread or gomp_thread_start in other threads. 43 1.3 mrg 44 1.3 mrg The name of this function is part of the interface with the compiler: for 45 1.3 mrg each target region, GCC emits a PTX .kernel function that sets up soft-stack 46 1.3 mrg and uniform-simt state and calls this function, passing in FN the original 47 1.3 mrg function outlined for the target region. */ 48 1.3 mrg 49 1.3 mrg void 50 1.3 mrg gomp_nvptx_main (void (*fn) (void *), void *fn_data) 51 1.3 mrg { 52 1.3 mrg int tid, ntids; 53 1.3 mrg asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); 54 1.3 mrg asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids)); 55 1.3 mrg if (tid == 0) 56 1.3 mrg { 57 1.3 mrg gomp_global_icv.nthreads_var = ntids; 58 1.7 mrg gomp_global_icv.thread_limit_var = ntids; 59 1.3 mrg /* Starting additional threads is not supported. */ 60 1.3 mrg gomp_global_icv.dyn_var = true; 61 1.3 mrg 62 1.7 mrg __gomp_team_num = 0; 63 1.3 mrg nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); 64 1.3 mrg memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); 65 1.3 mrg 66 1.3 mrg struct gomp_thread_pool *pool = alloca (sizeof (*pool)); 67 1.3 mrg pool->threads = alloca (ntids * sizeof (*pool->threads)); 68 1.3 mrg for (tid = 0; tid < ntids; tid++) 69 1.3 mrg pool->threads[tid] = nvptx_thrs + tid; 70 1.3 mrg pool->threads_size = ntids; 71 1.3 mrg pool->threads_used = ntids; 72 1.3 mrg pool->threads_busy = 1; 73 1.3 mrg pool->last_team = NULL; 74 1.3 mrg gomp_simple_barrier_init (&pool->threads_dock, ntids); 75 1.3 mrg 76 1.3 mrg nvptx_thrs[0].thread_pool = pool; 77 1.3 mrg asm ("bar.sync 0;"); 78 1.3 mrg fn (fn_data); 79 1.3 mrg 80 1.3 mrg gomp_free_thread (nvptx_thrs); 81 1.3 mrg } 82 1.3 mrg else 83 1.3 mrg { 84 1.3 mrg asm ("bar.sync 0;"); 85 1.3 mrg gomp_thread_start (nvptx_thrs[0].thread_pool); 86 1.3 mrg } 87 1.3 mrg } 88 1.3 mrg 89 1.3 mrg /* This function contains the idle loop in which a thread waits 90 1.3 mrg to be called up to become part of a team. */ 91 1.3 mrg 92 1.3 mrg static void 93 1.3 mrg gomp_thread_start (struct gomp_thread_pool *pool) 94 1.3 mrg { 95 1.3 mrg struct gomp_thread *thr = gomp_thread (); 96 1.3 mrg 97 1.3 mrg gomp_sem_init (&thr->release, 0); 98 1.3 mrg thr->thread_pool = pool; 99 1.3 mrg 100 1.3 mrg do 101 1.3 mrg { 102 1.3 mrg gomp_simple_barrier_wait (&pool->threads_dock); 103 1.3 mrg if (!thr->fn) 104 1.3 mrg continue; 105 1.3 mrg thr->fn (thr->data); 106 1.3 mrg thr->fn = NULL; 107 1.3 mrg 108 1.3 mrg struct gomp_task *task = thr->task; 109 1.3 mrg gomp_team_barrier_wait_final (&thr->ts.team->barrier); 110 1.3 mrg gomp_finish_task (task); 111 1.3 mrg } 112 1.3 mrg /* Work around an NVIDIA driver bug: when generating sm_50 machine code, 113 1.3 mrg it can trash stack pointer R1 in loops lacking exit edges. Add a cheap 114 1.3 mrg artificial exit that the driver would not be able to optimize out. */ 115 1.3 mrg while (nvptx_thrs); 116 1.3 mrg } 117 1.3 mrg 118 1.3 mrg /* Launch a team. */ 119 1.3 mrg 120 1.3 mrg void 121 1.3 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 122 1.5 mrg unsigned flags, struct gomp_team *team, 123 1.5 mrg struct gomp_taskgroup *taskgroup) 124 1.3 mrg { 125 1.3 mrg struct gomp_thread *thr, *nthr; 126 1.3 mrg struct gomp_task *task; 127 1.3 mrg struct gomp_task_icv *icv; 128 1.3 mrg struct gomp_thread_pool *pool; 129 1.3 mrg unsigned long nthreads_var; 130 1.3 mrg 131 1.3 mrg thr = gomp_thread (); 132 1.3 mrg pool = thr->thread_pool; 133 1.3 mrg task = thr->task; 134 1.3 mrg icv = task ? &task->icv : &gomp_global_icv; 135 1.3 mrg 136 1.3 mrg /* Always save the previous state, even if this isn't a nested team. 137 1.3 mrg In particular, we should save any work share state from an outer 138 1.3 mrg orphaned work share construct. */ 139 1.3 mrg team->prev_ts = thr->ts; 140 1.3 mrg 141 1.3 mrg thr->ts.team = team; 142 1.3 mrg thr->ts.team_id = 0; 143 1.3 mrg ++thr->ts.level; 144 1.3 mrg if (nthreads > 1) 145 1.3 mrg ++thr->ts.active_level; 146 1.3 mrg thr->ts.work_share = &team->work_shares[0]; 147 1.3 mrg thr->ts.last_work_share = NULL; 148 1.3 mrg thr->ts.single_count = 0; 149 1.3 mrg thr->ts.static_trip = 0; 150 1.3 mrg thr->task = &team->implicit_task[0]; 151 1.3 mrg nthreads_var = icv->nthreads_var; 152 1.3 mrg gomp_init_task (thr->task, task, icv); 153 1.3 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var; 154 1.5 mrg team->implicit_task[0].taskgroup = taskgroup; 155 1.3 mrg 156 1.3 mrg if (nthreads == 1) 157 1.3 mrg return; 158 1.3 mrg 159 1.3 mrg /* Release existing idle threads. */ 160 1.3 mrg for (unsigned i = 1; i < nthreads; ++i) 161 1.3 mrg { 162 1.3 mrg nthr = pool->threads[i]; 163 1.3 mrg nthr->ts.team = team; 164 1.3 mrg nthr->ts.work_share = &team->work_shares[0]; 165 1.3 mrg nthr->ts.last_work_share = NULL; 166 1.3 mrg nthr->ts.team_id = i; 167 1.3 mrg nthr->ts.level = team->prev_ts.level + 1; 168 1.3 mrg nthr->ts.active_level = thr->ts.active_level; 169 1.3 mrg nthr->ts.single_count = 0; 170 1.3 mrg nthr->ts.static_trip = 0; 171 1.3 mrg nthr->task = &team->implicit_task[i]; 172 1.3 mrg gomp_init_task (nthr->task, task, icv); 173 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var; 174 1.5 mrg team->implicit_task[i].taskgroup = taskgroup; 175 1.3 mrg nthr->fn = fn; 176 1.3 mrg nthr->data = data; 177 1.3 mrg team->ordered_release[i] = &nthr->release; 178 1.3 mrg } 179 1.3 mrg 180 1.3 mrg gomp_simple_barrier_wait (&pool->threads_dock); 181 1.3 mrg } 182 1.3 mrg 183 1.5 mrg int 184 1.5 mrg gomp_pause_host (void) 185 1.5 mrg { 186 1.5 mrg return -1; 187 1.5 mrg } 188 1.5 mrg 189 1.3 mrg #include "../../team.c" 190 1.3 mrg #endif 191