team.c revision 1.1.1.1.4.1 1 1.1.1.1.4.1 christos /* Copyright (C) 2015-2017 Free Software Foundation, Inc.
2 1.1.1.1.4.1 christos Contributed by Alexander Monakov <amonakov (at) ispras.ru>
3 1.1.1.1.4.1 christos
4 1.1.1.1.4.1 christos This file is part of the GNU Offloading and Multi Processing Library
5 1.1.1.1.4.1 christos (libgomp).
6 1.1.1.1.4.1 christos
7 1.1.1.1.4.1 christos Libgomp is free software; you can redistribute it and/or modify it
8 1.1.1.1.4.1 christos under the terms of the GNU General Public License as published by
9 1.1.1.1.4.1 christos the Free Software Foundation; either version 3, or (at your option)
10 1.1.1.1.4.1 christos any later version.
11 1.1.1.1.4.1 christos
12 1.1.1.1.4.1 christos Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1.1.1.4.1 christos WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1.1.1.4.1 christos FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1.1.1.4.1 christos more details.
16 1.1.1.1.4.1 christos
17 1.1.1.1.4.1 christos Under Section 7 of GPL version 3, you are granted additional
18 1.1.1.1.4.1 christos permissions described in the GCC Runtime Library Exception, version
19 1.1.1.1.4.1 christos 3.1, as published by the Free Software Foundation.
20 1.1.1.1.4.1 christos
21 1.1.1.1.4.1 christos You should have received a copy of the GNU General Public License and
22 1.1.1.1.4.1 christos a copy of the GCC Runtime Library Exception along with this program;
23 1.1.1.1.4.1 christos see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1.1.1.4.1 christos <http://www.gnu.org/licenses/>. */
25 1.1.1.1.4.1 christos
26 1.1.1.1.4.1 christos /* This file handles maintainance of threads on NVPTX. */
27 1.1.1.1.4.1 christos
28 1.1.1.1.4.1 christos #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
29 1.1.1.1.4.1 christos
30 1.1.1.1.4.1 christos #include "libgomp.h"
31 1.1.1.1.4.1 christos #include <stdlib.h>
32 1.1.1.1.4.1 christos #include <string.h>
33 1.1.1.1.4.1 christos
34 1.1.1.1.4.1 christos struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
35 1.1.1.1.4.1 christos
36 1.1.1.1.4.1 christos static void gomp_thread_start (struct gomp_thread_pool *);
37 1.1.1.1.4.1 christos
38 1.1.1.1.4.1 christos
39 1.1.1.1.4.1 christos /* This externally visible function handles target region entry. It
40 1.1.1.1.4.1 christos sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
41 1.1.1.1.4.1 christos in the master thread or gomp_thread_start in other threads.
42 1.1.1.1.4.1 christos
43 1.1.1.1.4.1 christos The name of this function is part of the interface with the compiler: for
44 1.1.1.1.4.1 christos each target region, GCC emits a PTX .kernel function that sets up soft-stack
45 1.1.1.1.4.1 christos and uniform-simt state and calls this function, passing in FN the original
46 1.1.1.1.4.1 christos function outlined for the target region. */
47 1.1.1.1.4.1 christos
48 1.1.1.1.4.1 christos void
49 1.1.1.1.4.1 christos gomp_nvptx_main (void (*fn) (void *), void *fn_data)
50 1.1.1.1.4.1 christos {
51 1.1.1.1.4.1 christos int tid, ntids;
52 1.1.1.1.4.1 christos asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
53 1.1.1.1.4.1 christos asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
54 1.1.1.1.4.1 christos if (tid == 0)
55 1.1.1.1.4.1 christos {
56 1.1.1.1.4.1 christos gomp_global_icv.nthreads_var = ntids;
57 1.1.1.1.4.1 christos /* Starting additional threads is not supported. */
58 1.1.1.1.4.1 christos gomp_global_icv.dyn_var = true;
59 1.1.1.1.4.1 christos
60 1.1.1.1.4.1 christos nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
61 1.1.1.1.4.1 christos memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
62 1.1.1.1.4.1 christos
63 1.1.1.1.4.1 christos struct gomp_thread_pool *pool = alloca (sizeof (*pool));
64 1.1.1.1.4.1 christos pool->threads = alloca (ntids * sizeof (*pool->threads));
65 1.1.1.1.4.1 christos for (tid = 0; tid < ntids; tid++)
66 1.1.1.1.4.1 christos pool->threads[tid] = nvptx_thrs + tid;
67 1.1.1.1.4.1 christos pool->threads_size = ntids;
68 1.1.1.1.4.1 christos pool->threads_used = ntids;
69 1.1.1.1.4.1 christos pool->threads_busy = 1;
70 1.1.1.1.4.1 christos pool->last_team = NULL;
71 1.1.1.1.4.1 christos gomp_simple_barrier_init (&pool->threads_dock, ntids);
72 1.1.1.1.4.1 christos
73 1.1.1.1.4.1 christos nvptx_thrs[0].thread_pool = pool;
74 1.1.1.1.4.1 christos asm ("bar.sync 0;");
75 1.1.1.1.4.1 christos fn (fn_data);
76 1.1.1.1.4.1 christos
77 1.1.1.1.4.1 christos gomp_free_thread (nvptx_thrs);
78 1.1.1.1.4.1 christos }
79 1.1.1.1.4.1 christos else
80 1.1.1.1.4.1 christos {
81 1.1.1.1.4.1 christos asm ("bar.sync 0;");
82 1.1.1.1.4.1 christos gomp_thread_start (nvptx_thrs[0].thread_pool);
83 1.1.1.1.4.1 christos }
84 1.1.1.1.4.1 christos }
85 1.1.1.1.4.1 christos
86 1.1.1.1.4.1 christos /* This function contains the idle loop in which a thread waits
87 1.1.1.1.4.1 christos to be called up to become part of a team. */
88 1.1.1.1.4.1 christos
89 1.1.1.1.4.1 christos static void
90 1.1.1.1.4.1 christos gomp_thread_start (struct gomp_thread_pool *pool)
91 1.1.1.1.4.1 christos {
92 1.1.1.1.4.1 christos struct gomp_thread *thr = gomp_thread ();
93 1.1.1.1.4.1 christos
94 1.1.1.1.4.1 christos gomp_sem_init (&thr->release, 0);
95 1.1.1.1.4.1 christos thr->thread_pool = pool;
96 1.1.1.1.4.1 christos
97 1.1.1.1.4.1 christos do
98 1.1.1.1.4.1 christos {
99 1.1.1.1.4.1 christos gomp_simple_barrier_wait (&pool->threads_dock);
100 1.1.1.1.4.1 christos if (!thr->fn)
101 1.1.1.1.4.1 christos continue;
102 1.1.1.1.4.1 christos thr->fn (thr->data);
103 1.1.1.1.4.1 christos thr->fn = NULL;
104 1.1.1.1.4.1 christos
105 1.1.1.1.4.1 christos struct gomp_task *task = thr->task;
106 1.1.1.1.4.1 christos gomp_team_barrier_wait_final (&thr->ts.team->barrier);
107 1.1.1.1.4.1 christos gomp_finish_task (task);
108 1.1.1.1.4.1 christos }
109 1.1.1.1.4.1 christos /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
110 1.1.1.1.4.1 christos it can trash stack pointer R1 in loops lacking exit edges. Add a cheap
111 1.1.1.1.4.1 christos artificial exit that the driver would not be able to optimize out. */
112 1.1.1.1.4.1 christos while (nvptx_thrs);
113 1.1.1.1.4.1 christos }
114 1.1.1.1.4.1 christos
115 1.1.1.1.4.1 christos /* Launch a team. */
116 1.1.1.1.4.1 christos
117 1.1.1.1.4.1 christos void
118 1.1.1.1.4.1 christos gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
119 1.1.1.1.4.1 christos unsigned flags, struct gomp_team *team)
120 1.1.1.1.4.1 christos {
121 1.1.1.1.4.1 christos struct gomp_thread *thr, *nthr;
122 1.1.1.1.4.1 christos struct gomp_task *task;
123 1.1.1.1.4.1 christos struct gomp_task_icv *icv;
124 1.1.1.1.4.1 christos struct gomp_thread_pool *pool;
125 1.1.1.1.4.1 christos unsigned long nthreads_var;
126 1.1.1.1.4.1 christos
127 1.1.1.1.4.1 christos thr = gomp_thread ();
128 1.1.1.1.4.1 christos pool = thr->thread_pool;
129 1.1.1.1.4.1 christos task = thr->task;
130 1.1.1.1.4.1 christos icv = task ? &task->icv : &gomp_global_icv;
131 1.1.1.1.4.1 christos
132 1.1.1.1.4.1 christos /* Always save the previous state, even if this isn't a nested team.
133 1.1.1.1.4.1 christos In particular, we should save any work share state from an outer
134 1.1.1.1.4.1 christos orphaned work share construct. */
135 1.1.1.1.4.1 christos team->prev_ts = thr->ts;
136 1.1.1.1.4.1 christos
137 1.1.1.1.4.1 christos thr->ts.team = team;
138 1.1.1.1.4.1 christos thr->ts.team_id = 0;
139 1.1.1.1.4.1 christos ++thr->ts.level;
140 1.1.1.1.4.1 christos if (nthreads > 1)
141 1.1.1.1.4.1 christos ++thr->ts.active_level;
142 1.1.1.1.4.1 christos thr->ts.work_share = &team->work_shares[0];
143 1.1.1.1.4.1 christos thr->ts.last_work_share = NULL;
144 1.1.1.1.4.1 christos thr->ts.single_count = 0;
145 1.1.1.1.4.1 christos thr->ts.static_trip = 0;
146 1.1.1.1.4.1 christos thr->task = &team->implicit_task[0];
147 1.1.1.1.4.1 christos nthreads_var = icv->nthreads_var;
148 1.1.1.1.4.1 christos gomp_init_task (thr->task, task, icv);
149 1.1.1.1.4.1 christos team->implicit_task[0].icv.nthreads_var = nthreads_var;
150 1.1.1.1.4.1 christos
151 1.1.1.1.4.1 christos if (nthreads == 1)
152 1.1.1.1.4.1 christos return;
153 1.1.1.1.4.1 christos
154 1.1.1.1.4.1 christos /* Release existing idle threads. */
155 1.1.1.1.4.1 christos for (unsigned i = 1; i < nthreads; ++i)
156 1.1.1.1.4.1 christos {
157 1.1.1.1.4.1 christos nthr = pool->threads[i];
158 1.1.1.1.4.1 christos nthr->ts.team = team;
159 1.1.1.1.4.1 christos nthr->ts.work_share = &team->work_shares[0];
160 1.1.1.1.4.1 christos nthr->ts.last_work_share = NULL;
161 1.1.1.1.4.1 christos nthr->ts.team_id = i;
162 1.1.1.1.4.1 christos nthr->ts.level = team->prev_ts.level + 1;
163 1.1.1.1.4.1 christos nthr->ts.active_level = thr->ts.active_level;
164 1.1.1.1.4.1 christos nthr->ts.single_count = 0;
165 1.1.1.1.4.1 christos nthr->ts.static_trip = 0;
166 1.1.1.1.4.1 christos nthr->task = &team->implicit_task[i];
167 1.1.1.1.4.1 christos gomp_init_task (nthr->task, task, icv);
168 1.1.1.1.4.1 christos team->implicit_task[i].icv.nthreads_var = nthreads_var;
169 1.1.1.1.4.1 christos nthr->fn = fn;
170 1.1.1.1.4.1 christos nthr->data = data;
171 1.1.1.1.4.1 christos team->ordered_release[i] = &nthr->release;
172 1.1.1.1.4.1 christos }
173 1.1.1.1.4.1 christos
174 1.1.1.1.4.1 christos gomp_simple_barrier_wait (&pool->threads_dock);
175 1.1.1.1.4.1 christos }
176 1.1.1.1.4.1 christos
177 1.1.1.1.4.1 christos #include "../../team.c"
178 1.1.1.1.4.1 christos #endif
179