team.c revision 1.1.1.1.4.3 1 1.1.1.1.4.3 martin /* Copyright (C) 2015-2017 Free Software Foundation, Inc.
2 1.1.1.1.4.3 martin Contributed by Alexander Monakov <amonakov (at) ispras.ru>
3 1.1.1.1.4.3 martin
4 1.1.1.1.4.3 martin This file is part of the GNU Offloading and Multi Processing Library
5 1.1.1.1.4.3 martin (libgomp).
6 1.1.1.1.4.3 martin
7 1.1.1.1.4.3 martin Libgomp is free software; you can redistribute it and/or modify it
8 1.1.1.1.4.3 martin under the terms of the GNU General Public License as published by
9 1.1.1.1.4.3 martin the Free Software Foundation; either version 3, or (at your option)
10 1.1.1.1.4.3 martin any later version.
11 1.1.1.1.4.3 martin
12 1.1.1.1.4.3 martin Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1.1.1.4.3 martin WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1.1.1.4.3 martin FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1.1.1.4.3 martin more details.
16 1.1.1.1.4.3 martin
17 1.1.1.1.4.3 martin Under Section 7 of GPL version 3, you are granted additional
18 1.1.1.1.4.3 martin permissions described in the GCC Runtime Library Exception, version
19 1.1.1.1.4.3 martin 3.1, as published by the Free Software Foundation.
20 1.1.1.1.4.3 martin
21 1.1.1.1.4.3 martin You should have received a copy of the GNU General Public License and
22 1.1.1.1.4.3 martin a copy of the GCC Runtime Library Exception along with this program;
23 1.1.1.1.4.3 martin see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1.1.1.4.3 martin <http://www.gnu.org/licenses/>. */
25 1.1.1.1.4.3 martin
26 1.1.1.1.4.3 martin /* This file handles maintainance of threads on NVPTX. */
27 1.1.1.1.4.3 martin
28 1.1.1.1.4.3 martin #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
29 1.1.1.1.4.3 martin
30 1.1.1.1.4.3 martin #include "libgomp.h"
31 1.1.1.1.4.3 martin #include <stdlib.h>
32 1.1.1.1.4.3 martin #include <string.h>
33 1.1.1.1.4.3 martin
34 1.1.1.1.4.3 martin struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
35 1.1.1.1.4.3 martin
36 1.1.1.1.4.3 martin static void gomp_thread_start (struct gomp_thread_pool *);
37 1.1.1.1.4.3 martin
38 1.1.1.1.4.3 martin
39 1.1.1.1.4.3 martin /* This externally visible function handles target region entry. It
40 1.1.1.1.4.3 martin sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
41 1.1.1.1.4.3 martin in the master thread or gomp_thread_start in other threads.
42 1.1.1.1.4.3 martin
43 1.1.1.1.4.3 martin The name of this function is part of the interface with the compiler: for
44 1.1.1.1.4.3 martin each target region, GCC emits a PTX .kernel function that sets up soft-stack
45 1.1.1.1.4.3 martin and uniform-simt state and calls this function, passing in FN the original
46 1.1.1.1.4.3 martin function outlined for the target region. */
47 1.1.1.1.4.3 martin
48 1.1.1.1.4.3 martin void
49 1.1.1.1.4.3 martin gomp_nvptx_main (void (*fn) (void *), void *fn_data)
50 1.1.1.1.4.3 martin {
51 1.1.1.1.4.3 martin int tid, ntids;
52 1.1.1.1.4.3 martin asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
53 1.1.1.1.4.3 martin asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
54 1.1.1.1.4.3 martin if (tid == 0)
55 1.1.1.1.4.3 martin {
56 1.1.1.1.4.3 martin gomp_global_icv.nthreads_var = ntids;
57 1.1.1.1.4.3 martin /* Starting additional threads is not supported. */
58 1.1.1.1.4.3 martin gomp_global_icv.dyn_var = true;
59 1.1.1.1.4.3 martin
60 1.1.1.1.4.3 martin nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
61 1.1.1.1.4.3 martin memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
62 1.1.1.1.4.3 martin
63 1.1.1.1.4.3 martin struct gomp_thread_pool *pool = alloca (sizeof (*pool));
64 1.1.1.1.4.3 martin pool->threads = alloca (ntids * sizeof (*pool->threads));
65 1.1.1.1.4.3 martin for (tid = 0; tid < ntids; tid++)
66 1.1.1.1.4.3 martin pool->threads[tid] = nvptx_thrs + tid;
67 1.1.1.1.4.3 martin pool->threads_size = ntids;
68 1.1.1.1.4.3 martin pool->threads_used = ntids;
69 1.1.1.1.4.3 martin pool->threads_busy = 1;
70 1.1.1.1.4.3 martin pool->last_team = NULL;
71 1.1.1.1.4.3 martin gomp_simple_barrier_init (&pool->threads_dock, ntids);
72 1.1.1.1.4.3 martin
73 1.1.1.1.4.3 martin nvptx_thrs[0].thread_pool = pool;
74 1.1.1.1.4.3 martin asm ("bar.sync 0;");
75 1.1.1.1.4.3 martin fn (fn_data);
76 1.1.1.1.4.3 martin
77 1.1.1.1.4.3 martin gomp_free_thread (nvptx_thrs);
78 1.1.1.1.4.3 martin }
79 1.1.1.1.4.3 martin else
80 1.1.1.1.4.3 martin {
81 1.1.1.1.4.3 martin asm ("bar.sync 0;");
82 1.1.1.1.4.3 martin gomp_thread_start (nvptx_thrs[0].thread_pool);
83 1.1.1.1.4.3 martin }
84 1.1.1.1.4.3 martin }
85 1.1.1.1.4.3 martin
86 1.1.1.1.4.3 martin /* This function contains the idle loop in which a thread waits
87 1.1.1.1.4.3 martin to be called up to become part of a team. */
88 1.1.1.1.4.3 martin
89 1.1.1.1.4.3 martin static void
90 1.1.1.1.4.3 martin gomp_thread_start (struct gomp_thread_pool *pool)
91 1.1.1.1.4.3 martin {
92 1.1.1.1.4.3 martin struct gomp_thread *thr = gomp_thread ();
93 1.1.1.1.4.3 martin
94 1.1.1.1.4.3 martin gomp_sem_init (&thr->release, 0);
95 1.1.1.1.4.3 martin thr->thread_pool = pool;
96 1.1.1.1.4.3 martin
97 1.1.1.1.4.3 martin do
98 1.1.1.1.4.3 martin {
99 1.1.1.1.4.3 martin gomp_simple_barrier_wait (&pool->threads_dock);
100 1.1.1.1.4.3 martin if (!thr->fn)
101 1.1.1.1.4.3 martin continue;
102 1.1.1.1.4.3 martin thr->fn (thr->data);
103 1.1.1.1.4.3 martin thr->fn = NULL;
104 1.1.1.1.4.3 martin
105 1.1.1.1.4.3 martin struct gomp_task *task = thr->task;
106 1.1.1.1.4.3 martin gomp_team_barrier_wait_final (&thr->ts.team->barrier);
107 1.1.1.1.4.3 martin gomp_finish_task (task);
108 1.1.1.1.4.3 martin }
109 1.1.1.1.4.3 martin /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
110 1.1.1.1.4.3 martin it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
111 1.1.1.1.4.3 martin artificial exit that the driver would not be able to optimize out. */
112 1.1.1.1.4.3 martin while (nvptx_thrs);
113 1.1.1.1.4.3 martin }
114 1.1.1.1.4.3 martin
115 1.1.1.1.4.3 martin /* Launch a team. */
116 1.1.1.1.4.3 martin
117 1.1.1.1.4.3 martin void
118 1.1.1.1.4.3 martin gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
119 1.1.1.1.4.3 martin unsigned flags, struct gomp_team *team)
120 1.1.1.1.4.3 martin {
121 1.1.1.1.4.3 martin struct gomp_thread *thr, *nthr;
122 1.1.1.1.4.3 martin struct gomp_task *task;
123 1.1.1.1.4.3 martin struct gomp_task_icv *icv;
124 1.1.1.1.4.3 martin struct gomp_thread_pool *pool;
125 1.1.1.1.4.3 martin unsigned long nthreads_var;
126 1.1.1.1.4.3 martin
127 1.1.1.1.4.3 martin thr = gomp_thread ();
128 1.1.1.1.4.3 martin pool = thr->thread_pool;
129 1.1.1.1.4.3 martin task = thr->task;
130 1.1.1.1.4.3 martin icv = task ? &task->icv : &gomp_global_icv;
131 1.1.1.1.4.3 martin
132 1.1.1.1.4.3 martin /* Always save the previous state, even if this isn't a nested team.
133 1.1.1.1.4.3 martin In particular, we should save any work share state from an outer
134 1.1.1.1.4.3 martin orphaned work share construct. */
135 1.1.1.1.4.3 martin team->prev_ts = thr->ts;
136 1.1.1.1.4.3 martin
137 1.1.1.1.4.3 martin thr->ts.team = team;
138 1.1.1.1.4.3 martin thr->ts.team_id = 0;
139 1.1.1.1.4.3 martin ++thr->ts.level;
140 1.1.1.1.4.3 martin if (nthreads > 1)
141 1.1.1.1.4.3 martin ++thr->ts.active_level;
142 1.1.1.1.4.3 martin thr->ts.work_share = &team->work_shares[0];
143 1.1.1.1.4.3 martin thr->ts.last_work_share = NULL;
144 1.1.1.1.4.3 martin thr->ts.single_count = 0;
145 1.1.1.1.4.3 martin thr->ts.static_trip = 0;
146 1.1.1.1.4.3 martin thr->task = &team->implicit_task[0];
147 1.1.1.1.4.3 martin nthreads_var = icv->nthreads_var;
148 1.1.1.1.4.3 martin gomp_init_task (thr->task, task, icv);
149 1.1.1.1.4.3 martin team->implicit_task[0].icv.nthreads_var = nthreads_var;
150 1.1.1.1.4.3 martin
151 1.1.1.1.4.3 martin if (nthreads == 1)
152 1.1.1.1.4.3 martin return;
153 1.1.1.1.4.3 martin
154 1.1.1.1.4.3 martin /* Release existing idle threads. */
155 1.1.1.1.4.3 martin for (unsigned i = 1; i < nthreads; ++i)
156 1.1.1.1.4.3 martin {
157 1.1.1.1.4.3 martin nthr = pool->threads[i];
158 1.1.1.1.4.3 martin nthr->ts.team = team;
159 1.1.1.1.4.3 martin nthr->ts.work_share = &team->work_shares[0];
160 1.1.1.1.4.3 martin nthr->ts.last_work_share = NULL;
161 1.1.1.1.4.3 martin nthr->ts.team_id = i;
162 1.1.1.1.4.3 martin nthr->ts.level = team->prev_ts.level + 1;
163 1.1.1.1.4.3 martin nthr->ts.active_level = thr->ts.active_level;
164 1.1.1.1.4.3 martin nthr->ts.single_count = 0;
165 1.1.1.1.4.3 martin nthr->ts.static_trip = 0;
166 1.1.1.1.4.3 martin nthr->task = &team->implicit_task[i];
167 1.1.1.1.4.3 martin gomp_init_task (nthr->task, task, icv);
168 1.1.1.1.4.3 martin team->implicit_task[i].icv.nthreads_var = nthreads_var;
169 1.1.1.1.4.3 martin nthr->fn = fn;
170 1.1.1.1.4.3 martin nthr->data = data;
171 1.1.1.1.4.3 martin team->ordered_release[i] = &nthr->release;
172 1.1.1.1.4.3 martin }
173 1.1.1.1.4.3 martin
174 1.1.1.1.4.3 martin gomp_simple_barrier_wait (&pool->threads_dock);
175 1.1.1.1.4.3 martin }
176 1.1.1.1.4.3 martin
177 1.1.1.1.4.3 martin #include "../../team.c"
178 1.1.1.1.4.3 martin #endif
179