team.c revision 1.1.1.3 1 1.1.1.3 mrg /* Copyright (C) 2015-2018 Free Software Foundation, Inc.
2 1.1.1.2 mrg Contributed by Alexander Monakov <amonakov (at) ispras.ru>
3 1.1.1.2 mrg
4 1.1.1.2 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1.1.2 mrg (libgomp).
6 1.1.1.2 mrg
7 1.1.1.2 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1.1.2 mrg under the terms of the GNU General Public License as published by
9 1.1.1.2 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1.1.2 mrg any later version.
11 1.1.1.2 mrg
12 1.1.1.2 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1.1.2 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1.1.2 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1.1.2 mrg more details.
16 1.1.1.2 mrg
17 1.1.1.2 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1.1.2 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1.1.2 mrg 3.1, as published by the Free Software Foundation.
20 1.1.1.2 mrg
21 1.1.1.2 mrg You should have received a copy of the GNU General Public License and
22 1.1.1.2 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1.1.2 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1.1.2 mrg <http://www.gnu.org/licenses/>. */
25 1.1.1.2 mrg
26 1.1.1.2 mrg /* This file handles maintainance of threads on NVPTX. */
27 1.1.1.2 mrg
28 1.1.1.2 mrg #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
29 1.1.1.2 mrg
30 1.1.1.2 mrg #include "libgomp.h"
31 1.1.1.2 mrg #include <stdlib.h>
32 1.1.1.2 mrg #include <string.h>
33 1.1.1.2 mrg
34 1.1.1.2 mrg struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
35 1.1.1.2 mrg
36 1.1.1.2 mrg static void gomp_thread_start (struct gomp_thread_pool *);
37 1.1.1.2 mrg
38 1.1.1.2 mrg
39 1.1.1.2 mrg /* This externally visible function handles target region entry. It
40 1.1.1.2 mrg sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
41 1.1.1.2 mrg in the master thread or gomp_thread_start in other threads.
42 1.1.1.2 mrg
43 1.1.1.2 mrg The name of this function is part of the interface with the compiler: for
44 1.1.1.2 mrg each target region, GCC emits a PTX .kernel function that sets up soft-stack
45 1.1.1.2 mrg and uniform-simt state and calls this function, passing in FN the original
46 1.1.1.2 mrg function outlined for the target region. */
47 1.1.1.2 mrg
48 1.1.1.2 mrg void
49 1.1.1.2 mrg gomp_nvptx_main (void (*fn) (void *), void *fn_data)
50 1.1.1.2 mrg {
51 1.1.1.2 mrg int tid, ntids;
52 1.1.1.2 mrg asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
53 1.1.1.2 mrg asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
54 1.1.1.2 mrg if (tid == 0)
55 1.1.1.2 mrg {
56 1.1.1.2 mrg gomp_global_icv.nthreads_var = ntids;
57 1.1.1.2 mrg /* Starting additional threads is not supported. */
58 1.1.1.2 mrg gomp_global_icv.dyn_var = true;
59 1.1.1.2 mrg
60 1.1.1.2 mrg nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
61 1.1.1.2 mrg memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
62 1.1.1.2 mrg
63 1.1.1.2 mrg struct gomp_thread_pool *pool = alloca (sizeof (*pool));
64 1.1.1.2 mrg pool->threads = alloca (ntids * sizeof (*pool->threads));
65 1.1.1.2 mrg for (tid = 0; tid < ntids; tid++)
66 1.1.1.2 mrg pool->threads[tid] = nvptx_thrs + tid;
67 1.1.1.2 mrg pool->threads_size = ntids;
68 1.1.1.2 mrg pool->threads_used = ntids;
69 1.1.1.2 mrg pool->threads_busy = 1;
70 1.1.1.2 mrg pool->last_team = NULL;
71 1.1.1.2 mrg gomp_simple_barrier_init (&pool->threads_dock, ntids);
72 1.1.1.2 mrg
73 1.1.1.2 mrg nvptx_thrs[0].thread_pool = pool;
74 1.1.1.2 mrg asm ("bar.sync 0;");
75 1.1.1.2 mrg fn (fn_data);
76 1.1.1.2 mrg
77 1.1.1.2 mrg gomp_free_thread (nvptx_thrs);
78 1.1.1.2 mrg }
79 1.1.1.2 mrg else
80 1.1.1.2 mrg {
81 1.1.1.2 mrg asm ("bar.sync 0;");
82 1.1.1.2 mrg gomp_thread_start (nvptx_thrs[0].thread_pool);
83 1.1.1.2 mrg }
84 1.1.1.2 mrg }
85 1.1.1.2 mrg
86 1.1.1.2 mrg /* This function contains the idle loop in which a thread waits
87 1.1.1.2 mrg to be called up to become part of a team. */
88 1.1.1.2 mrg
89 1.1.1.2 mrg static void
90 1.1.1.2 mrg gomp_thread_start (struct gomp_thread_pool *pool)
91 1.1.1.2 mrg {
92 1.1.1.2 mrg struct gomp_thread *thr = gomp_thread ();
93 1.1.1.2 mrg
94 1.1.1.2 mrg gomp_sem_init (&thr->release, 0);
95 1.1.1.2 mrg thr->thread_pool = pool;
96 1.1.1.2 mrg
97 1.1.1.2 mrg do
98 1.1.1.2 mrg {
99 1.1.1.2 mrg gomp_simple_barrier_wait (&pool->threads_dock);
100 1.1.1.2 mrg if (!thr->fn)
101 1.1.1.2 mrg continue;
102 1.1.1.2 mrg thr->fn (thr->data);
103 1.1.1.2 mrg thr->fn = NULL;
104 1.1.1.2 mrg
105 1.1.1.2 mrg struct gomp_task *task = thr->task;
106 1.1.1.2 mrg gomp_team_barrier_wait_final (&thr->ts.team->barrier);
107 1.1.1.2 mrg gomp_finish_task (task);
108 1.1.1.2 mrg }
109 1.1.1.2 mrg /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
110 1.1.1.2 mrg it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
111 1.1.1.2 mrg artificial exit that the driver would not be able to optimize out. */
112 1.1.1.2 mrg while (nvptx_thrs);
113 1.1.1.2 mrg }
114 1.1.1.2 mrg
115 1.1.1.2 mrg /* Launch a team. */
116 1.1.1.2 mrg
117 1.1.1.2 mrg void
118 1.1.1.2 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
119 1.1.1.2 mrg unsigned flags, struct gomp_team *team)
120 1.1.1.2 mrg {
121 1.1.1.2 mrg struct gomp_thread *thr, *nthr;
122 1.1.1.2 mrg struct gomp_task *task;
123 1.1.1.2 mrg struct gomp_task_icv *icv;
124 1.1.1.2 mrg struct gomp_thread_pool *pool;
125 1.1.1.2 mrg unsigned long nthreads_var;
126 1.1.1.2 mrg
127 1.1.1.2 mrg thr = gomp_thread ();
128 1.1.1.2 mrg pool = thr->thread_pool;
129 1.1.1.2 mrg task = thr->task;
130 1.1.1.2 mrg icv = task ? &task->icv : &gomp_global_icv;
131 1.1.1.2 mrg
132 1.1.1.2 mrg /* Always save the previous state, even if this isn't a nested team.
133 1.1.1.2 mrg In particular, we should save any work share state from an outer
134 1.1.1.2 mrg orphaned work share construct. */
135 1.1.1.2 mrg team->prev_ts = thr->ts;
136 1.1.1.2 mrg
137 1.1.1.2 mrg thr->ts.team = team;
138 1.1.1.2 mrg thr->ts.team_id = 0;
139 1.1.1.2 mrg ++thr->ts.level;
140 1.1.1.2 mrg if (nthreads > 1)
141 1.1.1.2 mrg ++thr->ts.active_level;
142 1.1.1.2 mrg thr->ts.work_share = &team->work_shares[0];
143 1.1.1.2 mrg thr->ts.last_work_share = NULL;
144 1.1.1.2 mrg thr->ts.single_count = 0;
145 1.1.1.2 mrg thr->ts.static_trip = 0;
146 1.1.1.2 mrg thr->task = &team->implicit_task[0];
147 1.1.1.2 mrg nthreads_var = icv->nthreads_var;
148 1.1.1.2 mrg gomp_init_task (thr->task, task, icv);
149 1.1.1.2 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var;
150 1.1.1.2 mrg
151 1.1.1.2 mrg if (nthreads == 1)
152 1.1.1.2 mrg return;
153 1.1.1.2 mrg
154 1.1.1.2 mrg /* Release existing idle threads. */
155 1.1.1.2 mrg for (unsigned i = 1; i < nthreads; ++i)
156 1.1.1.2 mrg {
157 1.1.1.2 mrg nthr = pool->threads[i];
158 1.1.1.2 mrg nthr->ts.team = team;
159 1.1.1.2 mrg nthr->ts.work_share = &team->work_shares[0];
160 1.1.1.2 mrg nthr->ts.last_work_share = NULL;
161 1.1.1.2 mrg nthr->ts.team_id = i;
162 1.1.1.2 mrg nthr->ts.level = team->prev_ts.level + 1;
163 1.1.1.2 mrg nthr->ts.active_level = thr->ts.active_level;
164 1.1.1.2 mrg nthr->ts.single_count = 0;
165 1.1.1.2 mrg nthr->ts.static_trip = 0;
166 1.1.1.2 mrg nthr->task = &team->implicit_task[i];
167 1.1.1.2 mrg gomp_init_task (nthr->task, task, icv);
168 1.1.1.2 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
169 1.1.1.2 mrg nthr->fn = fn;
170 1.1.1.2 mrg nthr->data = data;
171 1.1.1.2 mrg team->ordered_release[i] = &nthr->release;
172 1.1.1.2 mrg }
173 1.1.1.2 mrg
174 1.1.1.2 mrg gomp_simple_barrier_wait (&pool->threads_dock);
175 1.1.1.2 mrg }
176 1.1.1.2 mrg
177 1.1.1.2 mrg #include "../../team.c"
178 1.1.1.2 mrg #endif
179