team.c revision 1.7 1 1.7 mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2 1.3 mrg Contributed by Alexander Monakov <amonakov (at) ispras.ru>
3 1.3 mrg
4 1.3 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.3 mrg (libgomp).
6 1.3 mrg
7 1.3 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.3 mrg under the terms of the GNU General Public License as published by
9 1.3 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.3 mrg any later version.
11 1.3 mrg
12 1.3 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.3 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.3 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.3 mrg more details.
16 1.3 mrg
17 1.3 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.3 mrg permissions described in the GCC Runtime Library Exception, version
19 1.3 mrg 3.1, as published by the Free Software Foundation.
20 1.3 mrg
21 1.3 mrg You should have received a copy of the GNU General Public License and
22 1.3 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.3 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.3 mrg <http://www.gnu.org/licenses/>. */
25 1.3 mrg
26 1.6 mrg /* This file handles maintenance of threads on NVPTX. */
27 1.3 mrg
28 1.3 mrg #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
29 1.3 mrg
30 1.3 mrg #include "libgomp.h"
31 1.3 mrg #include <stdlib.h>
32 1.3 mrg #include <string.h>
33 1.3 mrg
34 1.3 mrg struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
35 1.7 mrg int __gomp_team_num __attribute__((shared,nocommon));
36 1.3 mrg
37 1.3 mrg static void gomp_thread_start (struct gomp_thread_pool *);
38 1.3 mrg
39 1.3 mrg
40 1.3 mrg /* This externally visible function handles target region entry. It
41 1.3 mrg sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
42 1.3 mrg in the master thread or gomp_thread_start in other threads.
43 1.3 mrg
44 1.3 mrg The name of this function is part of the interface with the compiler: for
45 1.3 mrg each target region, GCC emits a PTX .kernel function that sets up soft-stack
46 1.3 mrg and uniform-simt state and calls this function, passing in FN the original
47 1.3 mrg function outlined for the target region. */
48 1.3 mrg
49 1.3 mrg void
50 1.3 mrg gomp_nvptx_main (void (*fn) (void *), void *fn_data)
51 1.3 mrg {
52 1.3 mrg int tid, ntids;
53 1.3 mrg asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
54 1.3 mrg asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
55 1.3 mrg if (tid == 0)
56 1.3 mrg {
57 1.3 mrg gomp_global_icv.nthreads_var = ntids;
58 1.7 mrg gomp_global_icv.thread_limit_var = ntids;
59 1.3 mrg /* Starting additional threads is not supported. */
60 1.3 mrg gomp_global_icv.dyn_var = true;
61 1.3 mrg
62 1.7 mrg __gomp_team_num = 0;
63 1.3 mrg nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
64 1.3 mrg memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
65 1.3 mrg
66 1.3 mrg struct gomp_thread_pool *pool = alloca (sizeof (*pool));
67 1.3 mrg pool->threads = alloca (ntids * sizeof (*pool->threads));
68 1.3 mrg for (tid = 0; tid < ntids; tid++)
69 1.3 mrg pool->threads[tid] = nvptx_thrs + tid;
70 1.3 mrg pool->threads_size = ntids;
71 1.3 mrg pool->threads_used = ntids;
72 1.3 mrg pool->threads_busy = 1;
73 1.3 mrg pool->last_team = NULL;
74 1.3 mrg gomp_simple_barrier_init (&pool->threads_dock, ntids);
75 1.3 mrg
76 1.3 mrg nvptx_thrs[0].thread_pool = pool;
77 1.3 mrg asm ("bar.sync 0;");
78 1.3 mrg fn (fn_data);
79 1.3 mrg
80 1.3 mrg gomp_free_thread (nvptx_thrs);
81 1.3 mrg }
82 1.3 mrg else
83 1.3 mrg {
84 1.3 mrg asm ("bar.sync 0;");
85 1.3 mrg gomp_thread_start (nvptx_thrs[0].thread_pool);
86 1.3 mrg }
87 1.3 mrg }
88 1.3 mrg
89 1.3 mrg /* This function contains the idle loop in which a thread waits
90 1.3 mrg to be called up to become part of a team. */
91 1.3 mrg
92 1.3 mrg static void
93 1.3 mrg gomp_thread_start (struct gomp_thread_pool *pool)
94 1.3 mrg {
95 1.3 mrg struct gomp_thread *thr = gomp_thread ();
96 1.3 mrg
97 1.3 mrg gomp_sem_init (&thr->release, 0);
98 1.3 mrg thr->thread_pool = pool;
99 1.3 mrg
100 1.3 mrg do
101 1.3 mrg {
102 1.3 mrg gomp_simple_barrier_wait (&pool->threads_dock);
103 1.3 mrg if (!thr->fn)
104 1.3 mrg continue;
105 1.3 mrg thr->fn (thr->data);
106 1.3 mrg thr->fn = NULL;
107 1.3 mrg
108 1.3 mrg struct gomp_task *task = thr->task;
109 1.3 mrg gomp_team_barrier_wait_final (&thr->ts.team->barrier);
110 1.3 mrg gomp_finish_task (task);
111 1.3 mrg }
112 1.3 mrg /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
113 1.3 mrg it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
114 1.3 mrg artificial exit that the driver would not be able to optimize out. */
115 1.3 mrg while (nvptx_thrs);
116 1.3 mrg }
117 1.3 mrg
118 1.3 mrg /* Launch a team. */
119 1.3 mrg
120 1.3 mrg void
121 1.3 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
122 1.5 mrg unsigned flags, struct gomp_team *team,
123 1.5 mrg struct gomp_taskgroup *taskgroup)
124 1.3 mrg {
125 1.3 mrg struct gomp_thread *thr, *nthr;
126 1.3 mrg struct gomp_task *task;
127 1.3 mrg struct gomp_task_icv *icv;
128 1.3 mrg struct gomp_thread_pool *pool;
129 1.3 mrg unsigned long nthreads_var;
130 1.3 mrg
131 1.3 mrg thr = gomp_thread ();
132 1.3 mrg pool = thr->thread_pool;
133 1.3 mrg task = thr->task;
134 1.3 mrg icv = task ? &task->icv : &gomp_global_icv;
135 1.3 mrg
136 1.3 mrg /* Always save the previous state, even if this isn't a nested team.
137 1.3 mrg In particular, we should save any work share state from an outer
138 1.3 mrg orphaned work share construct. */
139 1.3 mrg team->prev_ts = thr->ts;
140 1.3 mrg
141 1.3 mrg thr->ts.team = team;
142 1.3 mrg thr->ts.team_id = 0;
143 1.3 mrg ++thr->ts.level;
144 1.3 mrg if (nthreads > 1)
145 1.3 mrg ++thr->ts.active_level;
146 1.3 mrg thr->ts.work_share = &team->work_shares[0];
147 1.3 mrg thr->ts.last_work_share = NULL;
148 1.3 mrg thr->ts.single_count = 0;
149 1.3 mrg thr->ts.static_trip = 0;
150 1.3 mrg thr->task = &team->implicit_task[0];
151 1.3 mrg nthreads_var = icv->nthreads_var;
152 1.3 mrg gomp_init_task (thr->task, task, icv);
153 1.3 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var;
154 1.5 mrg team->implicit_task[0].taskgroup = taskgroup;
155 1.3 mrg
156 1.3 mrg if (nthreads == 1)
157 1.3 mrg return;
158 1.3 mrg
159 1.3 mrg /* Release existing idle threads. */
160 1.3 mrg for (unsigned i = 1; i < nthreads; ++i)
161 1.3 mrg {
162 1.3 mrg nthr = pool->threads[i];
163 1.3 mrg nthr->ts.team = team;
164 1.3 mrg nthr->ts.work_share = &team->work_shares[0];
165 1.3 mrg nthr->ts.last_work_share = NULL;
166 1.3 mrg nthr->ts.team_id = i;
167 1.3 mrg nthr->ts.level = team->prev_ts.level + 1;
168 1.3 mrg nthr->ts.active_level = thr->ts.active_level;
169 1.3 mrg nthr->ts.single_count = 0;
170 1.3 mrg nthr->ts.static_trip = 0;
171 1.3 mrg nthr->task = &team->implicit_task[i];
172 1.3 mrg gomp_init_task (nthr->task, task, icv);
173 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
174 1.5 mrg team->implicit_task[i].taskgroup = taskgroup;
175 1.3 mrg nthr->fn = fn;
176 1.3 mrg nthr->data = data;
177 1.3 mrg team->ordered_release[i] = &nthr->release;
178 1.3 mrg }
179 1.3 mrg
180 1.3 mrg gomp_simple_barrier_wait (&pool->threads_dock);
181 1.3 mrg }
182 1.3 mrg
183 1.5 mrg int
184 1.5 mrg gomp_pause_host (void)
185 1.5 mrg {
186 1.5 mrg return -1;
187 1.5 mrg }
188 1.5 mrg
189 1.3 mrg #include "../../team.c"
190 1.3 mrg #endif
191