team.c revision 1.12 1 1.12 mrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Richard Henderson <rth (at) redhat.com>.
3 1.1 mrg
4 1.5 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.5 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.11 mrg /* This file handles the maintenance of threads in response to team
27 1.1 mrg creation and termination. */
28 1.1 mrg
29 1.1 mrg #include "libgomp.h"
30 1.6 mrg #include "pool.h"
31 1.1 mrg #include <stdlib.h>
32 1.1 mrg #include <string.h>
33 1.1 mrg
34 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
35 1.1 mrg pthread_attr_t gomp_thread_attr;
36 1.1 mrg
37 1.1 mrg /* This key is for the thread destructor. */
38 1.1 mrg pthread_key_t gomp_thread_destructor;
39 1.1 mrg
40 1.1 mrg
41 1.1 mrg /* This is the libgomp per-thread data structure. */
42 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS
43 1.1 mrg __thread struct gomp_thread gomp_tls_data;
44 1.1 mrg #else
45 1.1 mrg pthread_key_t gomp_tls_key;
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg
49 1.1 mrg /* This structure is used to communicate across pthread_create. */
50 1.1 mrg
51 1.1 mrg struct gomp_thread_start_data
52 1.1 mrg {
53 1.1 mrg void (*fn) (void *);
54 1.1 mrg void *fn_data;
55 1.1 mrg struct gomp_team_state ts;
56 1.1 mrg struct gomp_task *task;
57 1.1 mrg struct gomp_thread_pool *thread_pool;
58 1.5 mrg unsigned int place;
59 1.12 mrg unsigned int num_teams;
60 1.12 mrg unsigned int team_num;
61 1.1 mrg bool nested;
62 1.10 mrg pthread_t handle;
63 1.1 mrg };
64 1.1 mrg
65 1.1 mrg
66 1.1 mrg /* This function is a pthread_create entry point. This contains the idle
67 1.1 mrg loop in which a thread waits to be called up to become part of a team. */
68 1.1 mrg
69 1.1 mrg static void *
70 1.1 mrg gomp_thread_start (void *xdata)
71 1.1 mrg {
72 1.1 mrg struct gomp_thread_start_data *data = xdata;
73 1.1 mrg struct gomp_thread *thr;
74 1.1 mrg struct gomp_thread_pool *pool;
75 1.1 mrg void (*local_fn) (void *);
76 1.1 mrg void *local_data;
77 1.1 mrg
78 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS
79 1.1 mrg thr = &gomp_tls_data;
80 1.1 mrg #else
81 1.1 mrg struct gomp_thread local_thr;
82 1.1 mrg thr = &local_thr;
83 1.1 mrg #endif
84 1.1 mrg gomp_sem_init (&thr->release, 0);
85 1.1 mrg
86 1.1 mrg /* Extract what we need from data. */
87 1.1 mrg local_fn = data->fn;
88 1.1 mrg local_data = data->fn_data;
89 1.1 mrg thr->thread_pool = data->thread_pool;
90 1.1 mrg thr->ts = data->ts;
91 1.1 mrg thr->task = data->task;
92 1.5 mrg thr->place = data->place;
93 1.12 mrg thr->num_teams = data->num_teams;
94 1.12 mrg thr->team_num = data->team_num;
95 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
96 1.10 mrg thr->handle = data->handle;
97 1.10 mrg #endif
98 1.12 mrg #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 1.12 mrg pthread_setspecific (gomp_tls_key, thr);
100 1.12 mrg #endif
101 1.1 mrg
102 1.1 mrg thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
103 1.1 mrg
104 1.1 mrg /* Make thread pool local. */
105 1.1 mrg pool = thr->thread_pool;
106 1.1 mrg
107 1.1 mrg if (data->nested)
108 1.1 mrg {
109 1.1 mrg struct gomp_team *team = thr->ts.team;
110 1.1 mrg struct gomp_task *task = thr->task;
111 1.1 mrg
112 1.1 mrg gomp_barrier_wait (&team->barrier);
113 1.1 mrg
114 1.1 mrg local_fn (local_data);
115 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
116 1.1 mrg gomp_finish_task (task);
117 1.1 mrg gomp_barrier_wait_last (&team->barrier);
118 1.1 mrg }
119 1.1 mrg else
120 1.1 mrg {
121 1.1 mrg pool->threads[thr->ts.team_id] = thr;
122 1.1 mrg
123 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
124 1.1 mrg do
125 1.1 mrg {
126 1.1 mrg struct gomp_team *team = thr->ts.team;
127 1.1 mrg struct gomp_task *task = thr->task;
128 1.1 mrg
129 1.1 mrg local_fn (local_data);
130 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
131 1.1 mrg gomp_finish_task (task);
132 1.1 mrg
133 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
134 1.1 mrg
135 1.1 mrg local_fn = thr->fn;
136 1.1 mrg local_data = thr->data;
137 1.1 mrg thr->fn = NULL;
138 1.1 mrg }
139 1.1 mrg while (local_fn);
140 1.1 mrg }
141 1.1 mrg
142 1.1 mrg gomp_sem_destroy (&thr->release);
143 1.10 mrg pthread_detach (pthread_self ());
144 1.5 mrg thr->thread_pool = NULL;
145 1.5 mrg thr->task = NULL;
146 1.1 mrg return NULL;
147 1.1 mrg }
148 1.8 mrg #endif
149 1.1 mrg
150 1.6 mrg static inline struct gomp_team *
151 1.6 mrg get_last_team (unsigned nthreads)
152 1.6 mrg {
153 1.6 mrg struct gomp_thread *thr = gomp_thread ();
154 1.6 mrg if (thr->ts.team == NULL)
155 1.6 mrg {
156 1.6 mrg struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 1.6 mrg struct gomp_team *last_team = pool->last_team;
158 1.6 mrg if (last_team != NULL && last_team->nthreads == nthreads)
159 1.6 mrg {
160 1.6 mrg pool->last_team = NULL;
161 1.6 mrg return last_team;
162 1.6 mrg }
163 1.6 mrg }
164 1.6 mrg return NULL;
165 1.6 mrg }
166 1.1 mrg
167 1.1 mrg /* Create a new team data structure. */
168 1.1 mrg
169 1.1 mrg struct gomp_team *
170 1.1 mrg gomp_new_team (unsigned nthreads)
171 1.1 mrg {
172 1.1 mrg struct gomp_team *team;
173 1.1 mrg int i;
174 1.1 mrg
175 1.6 mrg team = get_last_team (nthreads);
176 1.6 mrg if (team == NULL)
177 1.6 mrg {
178 1.6 mrg size_t extra = sizeof (team->ordered_release[0])
179 1.6 mrg + sizeof (team->implicit_task[0]);
180 1.12 mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 1.12 mrg team = gomp_aligned_alloc (__alignof (struct gomp_team),
182 1.12 mrg sizeof (*team) + nthreads * extra);
183 1.12 mrg #else
184 1.11 mrg team = team_malloc (sizeof (*team) + nthreads * extra);
185 1.12 mrg #endif
186 1.6 mrg
187 1.6 mrg #ifndef HAVE_SYNC_BUILTINS
188 1.6 mrg gomp_mutex_init (&team->work_share_list_free_lock);
189 1.6 mrg #endif
190 1.6 mrg gomp_barrier_init (&team->barrier, nthreads);
191 1.6 mrg gomp_mutex_init (&team->task_lock);
192 1.6 mrg
193 1.6 mrg team->nthreads = nthreads;
194 1.6 mrg }
195 1.1 mrg
196 1.1 mrg team->work_share_chunk = 8;
197 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
198 1.1 mrg team->single_count = 0;
199 1.1 mrg #endif
200 1.5 mrg team->work_shares_to_free = &team->work_shares[0];
201 1.10 mrg gomp_init_work_share (&team->work_shares[0], 0, nthreads);
202 1.1 mrg team->work_shares[0].next_alloc = NULL;
203 1.1 mrg team->work_share_list_free = NULL;
204 1.1 mrg team->work_share_list_alloc = &team->work_shares[1];
205 1.1 mrg for (i = 1; i < 7; i++)
206 1.1 mrg team->work_shares[i].next_free = &team->work_shares[i + 1];
207 1.1 mrg team->work_shares[i].next_free = NULL;
208 1.1 mrg
209 1.1 mrg gomp_sem_init (&team->master_release, 0);
210 1.1 mrg team->ordered_release = (void *) &team->implicit_task[nthreads];
211 1.1 mrg team->ordered_release[0] = &team->master_release;
212 1.1 mrg
213 1.6 mrg priority_queue_init (&team->task_queue);
214 1.1 mrg team->task_count = 0;
215 1.5 mrg team->task_queued_count = 0;
216 1.1 mrg team->task_running_count = 0;
217 1.5 mrg team->work_share_cancelled = 0;
218 1.5 mrg team->team_cancelled = 0;
219 1.1 mrg
220 1.12 mrg team->task_detach_count = 0;
221 1.12 mrg
222 1.1 mrg return team;
223 1.1 mrg }
224 1.1 mrg
225 1.1 mrg
226 1.1 mrg /* Free a team data structure. */
227 1.1 mrg
228 1.1 mrg static void
229 1.1 mrg free_team (struct gomp_team *team)
230 1.1 mrg {
231 1.6 mrg #ifndef HAVE_SYNC_BUILTINS
232 1.6 mrg gomp_mutex_destroy (&team->work_share_list_free_lock);
233 1.6 mrg #endif
234 1.1 mrg gomp_barrier_destroy (&team->barrier);
235 1.1 mrg gomp_mutex_destroy (&team->task_lock);
236 1.6 mrg priority_queue_free (&team->task_queue);
237 1.11 mrg team_free (team);
238 1.1 mrg }
239 1.1 mrg
240 1.1 mrg static void
241 1.1 mrg gomp_free_pool_helper (void *thread_pool)
242 1.1 mrg {
243 1.5 mrg struct gomp_thread *thr = gomp_thread ();
244 1.1 mrg struct gomp_thread_pool *pool
245 1.1 mrg = (struct gomp_thread_pool *) thread_pool;
246 1.8 mrg gomp_simple_barrier_wait_last (&pool->threads_dock);
247 1.5 mrg gomp_sem_destroy (&thr->release);
248 1.5 mrg thr->thread_pool = NULL;
249 1.5 mrg thr->task = NULL;
250 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
251 1.10 mrg pthread_detach (pthread_self ());
252 1.1 mrg pthread_exit (NULL);
253 1.8 mrg #elif defined(__nvptx__)
254 1.8 mrg asm ("exit;");
255 1.11 mrg #elif defined(__AMDGCN__)
256 1.11 mrg asm ("s_dcache_wb\n\t"
257 1.11 mrg "s_endpgm");
258 1.8 mrg #else
259 1.8 mrg #error gomp_free_pool_helper must terminate the thread
260 1.8 mrg #endif
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg /* Free a thread pool and release its threads. */
264 1.1 mrg
265 1.5 mrg void
266 1.1 mrg gomp_free_thread (void *arg __attribute__((unused)))
267 1.1 mrg {
268 1.1 mrg struct gomp_thread *thr = gomp_thread ();
269 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool;
270 1.1 mrg if (pool)
271 1.1 mrg {
272 1.1 mrg if (pool->threads_used > 0)
273 1.1 mrg {
274 1.1 mrg int i;
275 1.1 mrg for (i = 1; i < pool->threads_used; i++)
276 1.1 mrg {
277 1.1 mrg struct gomp_thread *nthr = pool->threads[i];
278 1.1 mrg nthr->fn = gomp_free_pool_helper;
279 1.1 mrg nthr->data = pool;
280 1.1 mrg }
281 1.1 mrg /* This barrier undocks threads docked on pool->threads_dock. */
282 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
283 1.1 mrg /* And this waits till all threads have called gomp_barrier_wait_last
284 1.1 mrg in gomp_free_pool_helper. */
285 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
286 1.1 mrg /* Now it is safe to destroy the barrier and free the pool. */
287 1.8 mrg gomp_simple_barrier_destroy (&pool->threads_dock);
288 1.3 mrg
289 1.3 mrg #ifdef HAVE_SYNC_BUILTINS
290 1.3 mrg __sync_fetch_and_add (&gomp_managed_threads,
291 1.3 mrg 1L - pool->threads_used);
292 1.3 mrg #else
293 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
294 1.3 mrg gomp_managed_threads -= pool->threads_used - 1L;
295 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
296 1.3 mrg #endif
297 1.1 mrg }
298 1.1 mrg if (pool->last_team)
299 1.1 mrg free_team (pool->last_team);
300 1.8 mrg #ifndef __nvptx__
301 1.11 mrg team_free (pool->threads);
302 1.11 mrg team_free (pool);
303 1.8 mrg #endif
304 1.1 mrg thr->thread_pool = NULL;
305 1.1 mrg }
306 1.6 mrg if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
307 1.6 mrg gomp_team_end ();
308 1.1 mrg if (thr->task != NULL)
309 1.1 mrg {
310 1.1 mrg struct gomp_task *task = thr->task;
311 1.1 mrg gomp_end_task ();
312 1.1 mrg free (task);
313 1.1 mrg }
314 1.1 mrg }
315 1.1 mrg
316 1.1 mrg /* Launch a team. */
317 1.1 mrg
318 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
319 1.1 mrg void
320 1.1 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
321 1.10 mrg unsigned flags, struct gomp_team *team,
322 1.10 mrg struct gomp_taskgroup *taskgroup)
323 1.1 mrg {
324 1.12 mrg struct gomp_thread_start_data *start_data = NULL;
325 1.1 mrg struct gomp_thread *thr, *nthr;
326 1.1 mrg struct gomp_task *task;
327 1.1 mrg struct gomp_task_icv *icv;
328 1.1 mrg bool nested;
329 1.1 mrg struct gomp_thread_pool *pool;
330 1.1 mrg unsigned i, n, old_threads_used = 0;
331 1.1 mrg pthread_attr_t thread_attr, *attr;
332 1.3 mrg unsigned long nthreads_var;
333 1.5 mrg char bind, bind_var;
334 1.5 mrg unsigned int s = 0, rest = 0, p = 0, k = 0;
335 1.5 mrg unsigned int affinity_count = 0;
336 1.5 mrg struct gomp_thread **affinity_thr = NULL;
337 1.10 mrg bool force_display = false;
338 1.1 mrg
339 1.1 mrg thr = gomp_thread ();
340 1.6 mrg nested = thr->ts.level;
341 1.1 mrg pool = thr->thread_pool;
342 1.1 mrg task = thr->task;
343 1.1 mrg icv = task ? &task->icv : &gomp_global_icv;
344 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
345 1.10 mrg {
346 1.10 mrg gomp_init_affinity ();
347 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
348 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
349 1.10 mrg thr->place);
350 1.10 mrg }
351 1.1 mrg
352 1.1 mrg /* Always save the previous state, even if this isn't a nested team.
353 1.1 mrg In particular, we should save any work share state from an outer
354 1.1 mrg orphaned work share construct. */
355 1.1 mrg team->prev_ts = thr->ts;
356 1.1 mrg
357 1.1 mrg thr->ts.team = team;
358 1.1 mrg thr->ts.team_id = 0;
359 1.1 mrg ++thr->ts.level;
360 1.1 mrg if (nthreads > 1)
361 1.1 mrg ++thr->ts.active_level;
362 1.1 mrg thr->ts.work_share = &team->work_shares[0];
363 1.1 mrg thr->ts.last_work_share = NULL;
364 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
365 1.1 mrg thr->ts.single_count = 0;
366 1.1 mrg #endif
367 1.1 mrg thr->ts.static_trip = 0;
368 1.1 mrg thr->task = &team->implicit_task[0];
369 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
370 1.10 mrg thr->handle = pthread_self ();
371 1.10 mrg #endif
372 1.3 mrg nthreads_var = icv->nthreads_var;
373 1.3 mrg if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
374 1.3 mrg && thr->ts.level < gomp_nthreads_var_list_len)
375 1.3 mrg nthreads_var = gomp_nthreads_var_list[thr->ts.level];
376 1.5 mrg bind_var = icv->bind_var;
377 1.5 mrg if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
378 1.5 mrg bind_var = flags & 7;
379 1.5 mrg bind = bind_var;
380 1.5 mrg if (__builtin_expect (gomp_bind_var_list != NULL, 0)
381 1.5 mrg && thr->ts.level < gomp_bind_var_list_len)
382 1.5 mrg bind_var = gomp_bind_var_list[thr->ts.level];
383 1.1 mrg gomp_init_task (thr->task, task, icv);
384 1.10 mrg thr->task->taskgroup = taskgroup;
385 1.3 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var;
386 1.5 mrg team->implicit_task[0].icv.bind_var = bind_var;
387 1.1 mrg
388 1.1 mrg if (nthreads == 1)
389 1.1 mrg return;
390 1.1 mrg
391 1.1 mrg i = 1;
392 1.1 mrg
393 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
394 1.5 mrg {
395 1.5 mrg /* Depending on chosen proc_bind model, set subpartition
396 1.5 mrg for the master thread and initialize helper variables
397 1.5 mrg P and optionally S, K and/or REST used by later place
398 1.5 mrg computation for each additional thread. */
399 1.5 mrg p = thr->place - 1;
400 1.5 mrg switch (bind)
401 1.5 mrg {
402 1.5 mrg case omp_proc_bind_true:
403 1.5 mrg case omp_proc_bind_close:
404 1.5 mrg if (nthreads > thr->ts.place_partition_len)
405 1.5 mrg {
406 1.5 mrg /* T > P. S threads will be placed in each place,
407 1.5 mrg and the final REM threads placed one by one
408 1.5 mrg into the already occupied places. */
409 1.5 mrg s = nthreads / thr->ts.place_partition_len;
410 1.5 mrg rest = nthreads % thr->ts.place_partition_len;
411 1.5 mrg }
412 1.5 mrg else
413 1.5 mrg s = 1;
414 1.5 mrg k = 1;
415 1.5 mrg break;
416 1.5 mrg case omp_proc_bind_master:
417 1.5 mrg /* Each thread will be bound to master's place. */
418 1.5 mrg break;
419 1.5 mrg case omp_proc_bind_spread:
420 1.5 mrg if (nthreads <= thr->ts.place_partition_len)
421 1.5 mrg {
422 1.5 mrg /* T <= P. Each subpartition will have in between s
423 1.5 mrg and s+1 places (subpartitions starting at or
424 1.5 mrg after rest will have s places, earlier s+1 places),
425 1.5 mrg each thread will be bound to the first place in
426 1.5 mrg its subpartition (except for the master thread
427 1.5 mrg that can be bound to another place in its
428 1.5 mrg subpartition). */
429 1.5 mrg s = thr->ts.place_partition_len / nthreads;
430 1.5 mrg rest = thr->ts.place_partition_len % nthreads;
431 1.5 mrg rest = (s + 1) * rest + thr->ts.place_partition_off;
432 1.5 mrg if (p < rest)
433 1.5 mrg {
434 1.5 mrg p -= (p - thr->ts.place_partition_off) % (s + 1);
435 1.5 mrg thr->ts.place_partition_len = s + 1;
436 1.5 mrg }
437 1.5 mrg else
438 1.5 mrg {
439 1.5 mrg p -= (p - rest) % s;
440 1.5 mrg thr->ts.place_partition_len = s;
441 1.5 mrg }
442 1.5 mrg thr->ts.place_partition_off = p;
443 1.5 mrg }
444 1.5 mrg else
445 1.5 mrg {
446 1.5 mrg /* T > P. Each subpartition will have just a single
447 1.5 mrg place and we'll place between s and s+1
448 1.5 mrg threads into each subpartition. */
449 1.5 mrg s = nthreads / thr->ts.place_partition_len;
450 1.5 mrg rest = nthreads % thr->ts.place_partition_len;
451 1.5 mrg thr->ts.place_partition_off = p;
452 1.5 mrg thr->ts.place_partition_len = 1;
453 1.5 mrg k = 1;
454 1.5 mrg }
455 1.5 mrg break;
456 1.5 mrg }
457 1.5 mrg }
458 1.5 mrg else
459 1.5 mrg bind = omp_proc_bind_false;
460 1.5 mrg
461 1.1 mrg /* We only allow the reuse of idle threads for non-nested PARALLEL
462 1.1 mrg regions. This appears to be implied by the semantics of
463 1.1 mrg threadprivate variables, but perhaps that's reading too much into
464 1.1 mrg things. Certainly it does prevent any locking problems, since
465 1.1 mrg only the initial program thread will modify gomp_threads. */
466 1.1 mrg if (!nested)
467 1.1 mrg {
468 1.1 mrg old_threads_used = pool->threads_used;
469 1.1 mrg
470 1.1 mrg if (nthreads <= old_threads_used)
471 1.1 mrg n = nthreads;
472 1.1 mrg else if (old_threads_used == 0)
473 1.1 mrg {
474 1.1 mrg n = 0;
475 1.8 mrg gomp_simple_barrier_init (&pool->threads_dock, nthreads);
476 1.1 mrg }
477 1.1 mrg else
478 1.1 mrg {
479 1.1 mrg n = old_threads_used;
480 1.1 mrg
481 1.1 mrg /* Increase the barrier threshold to make sure all new
482 1.1 mrg threads arrive before the team is released. */
483 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg /* Not true yet, but soon will be. We're going to release all
487 1.1 mrg threads from the dock, and those that aren't part of the
488 1.1 mrg team will exit. */
489 1.1 mrg pool->threads_used = nthreads;
490 1.1 mrg
491 1.5 mrg /* If necessary, expand the size of the gomp_threads array. It is
492 1.5 mrg expected that changes in the number of threads are rare, thus we
493 1.5 mrg make no effort to expand gomp_threads_size geometrically. */
494 1.5 mrg if (nthreads >= pool->threads_size)
495 1.5 mrg {
496 1.5 mrg pool->threads_size = nthreads + 1;
497 1.5 mrg pool->threads
498 1.5 mrg = gomp_realloc (pool->threads,
499 1.5 mrg pool->threads_size
500 1.10 mrg * sizeof (struct gomp_thread *));
501 1.10 mrg /* Add current (master) thread to threads[]. */
502 1.10 mrg pool->threads[0] = thr;
503 1.5 mrg }
504 1.5 mrg
505 1.1 mrg /* Release existing idle threads. */
506 1.1 mrg for (; i < n; ++i)
507 1.1 mrg {
508 1.5 mrg unsigned int place_partition_off = thr->ts.place_partition_off;
509 1.5 mrg unsigned int place_partition_len = thr->ts.place_partition_len;
510 1.5 mrg unsigned int place = 0;
511 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
512 1.5 mrg {
513 1.5 mrg switch (bind)
514 1.5 mrg {
515 1.5 mrg case omp_proc_bind_true:
516 1.5 mrg case omp_proc_bind_close:
517 1.5 mrg if (k == s)
518 1.5 mrg {
519 1.5 mrg ++p;
520 1.5 mrg if (p == (team->prev_ts.place_partition_off
521 1.5 mrg + team->prev_ts.place_partition_len))
522 1.5 mrg p = team->prev_ts.place_partition_off;
523 1.5 mrg k = 1;
524 1.5 mrg if (i == nthreads - rest)
525 1.5 mrg s = 1;
526 1.5 mrg }
527 1.5 mrg else
528 1.5 mrg ++k;
529 1.5 mrg break;
530 1.5 mrg case omp_proc_bind_master:
531 1.5 mrg break;
532 1.5 mrg case omp_proc_bind_spread:
533 1.5 mrg if (k == 0)
534 1.5 mrg {
535 1.5 mrg /* T <= P. */
536 1.5 mrg if (p < rest)
537 1.5 mrg p += s + 1;
538 1.5 mrg else
539 1.5 mrg p += s;
540 1.5 mrg if (p == (team->prev_ts.place_partition_off
541 1.5 mrg + team->prev_ts.place_partition_len))
542 1.5 mrg p = team->prev_ts.place_partition_off;
543 1.5 mrg place_partition_off = p;
544 1.5 mrg if (p < rest)
545 1.5 mrg place_partition_len = s + 1;
546 1.5 mrg else
547 1.5 mrg place_partition_len = s;
548 1.5 mrg }
549 1.5 mrg else
550 1.5 mrg {
551 1.5 mrg /* T > P. */
552 1.5 mrg if (k == s)
553 1.5 mrg {
554 1.5 mrg ++p;
555 1.5 mrg if (p == (team->prev_ts.place_partition_off
556 1.5 mrg + team->prev_ts.place_partition_len))
557 1.5 mrg p = team->prev_ts.place_partition_off;
558 1.5 mrg k = 1;
559 1.5 mrg if (i == nthreads - rest)
560 1.5 mrg s = 1;
561 1.5 mrg }
562 1.5 mrg else
563 1.5 mrg ++k;
564 1.5 mrg place_partition_off = p;
565 1.5 mrg place_partition_len = 1;
566 1.5 mrg }
567 1.5 mrg break;
568 1.5 mrg }
569 1.5 mrg if (affinity_thr != NULL
570 1.5 mrg || (bind != omp_proc_bind_true
571 1.5 mrg && pool->threads[i]->place != p + 1)
572 1.5 mrg || pool->threads[i]->place <= place_partition_off
573 1.5 mrg || pool->threads[i]->place > (place_partition_off
574 1.5 mrg + place_partition_len))
575 1.5 mrg {
576 1.5 mrg unsigned int l;
577 1.10 mrg force_display = true;
578 1.5 mrg if (affinity_thr == NULL)
579 1.5 mrg {
580 1.5 mrg unsigned int j;
581 1.5 mrg
582 1.5 mrg if (team->prev_ts.place_partition_len > 64)
583 1.5 mrg affinity_thr
584 1.5 mrg = gomp_malloc (team->prev_ts.place_partition_len
585 1.5 mrg * sizeof (struct gomp_thread *));
586 1.5 mrg else
587 1.5 mrg affinity_thr
588 1.5 mrg = gomp_alloca (team->prev_ts.place_partition_len
589 1.5 mrg * sizeof (struct gomp_thread *));
590 1.5 mrg memset (affinity_thr, '\0',
591 1.5 mrg team->prev_ts.place_partition_len
592 1.5 mrg * sizeof (struct gomp_thread *));
593 1.5 mrg for (j = i; j < old_threads_used; j++)
594 1.5 mrg {
595 1.5 mrg if (pool->threads[j]->place
596 1.5 mrg > team->prev_ts.place_partition_off
597 1.5 mrg && (pool->threads[j]->place
598 1.5 mrg <= (team->prev_ts.place_partition_off
599 1.5 mrg + team->prev_ts.place_partition_len)))
600 1.5 mrg {
601 1.5 mrg l = pool->threads[j]->place - 1
602 1.5 mrg - team->prev_ts.place_partition_off;
603 1.5 mrg pool->threads[j]->data = affinity_thr[l];
604 1.5 mrg affinity_thr[l] = pool->threads[j];
605 1.5 mrg }
606 1.5 mrg pool->threads[j] = NULL;
607 1.5 mrg }
608 1.5 mrg if (nthreads > old_threads_used)
609 1.5 mrg memset (&pool->threads[old_threads_used],
610 1.5 mrg '\0', ((nthreads - old_threads_used)
611 1.5 mrg * sizeof (struct gomp_thread *)));
612 1.5 mrg n = nthreads;
613 1.5 mrg affinity_count = old_threads_used - i;
614 1.5 mrg }
615 1.5 mrg if (affinity_count == 0)
616 1.5 mrg break;
617 1.5 mrg l = p;
618 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off]
619 1.5 mrg == NULL)
620 1.5 mrg {
621 1.5 mrg if (bind != omp_proc_bind_true)
622 1.5 mrg continue;
623 1.5 mrg for (l = place_partition_off;
624 1.5 mrg l < place_partition_off + place_partition_len;
625 1.5 mrg l++)
626 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off]
627 1.5 mrg != NULL)
628 1.5 mrg break;
629 1.5 mrg if (l == place_partition_off + place_partition_len)
630 1.5 mrg continue;
631 1.5 mrg }
632 1.5 mrg nthr = affinity_thr[l - team->prev_ts.place_partition_off];
633 1.5 mrg affinity_thr[l - team->prev_ts.place_partition_off]
634 1.5 mrg = (struct gomp_thread *) nthr->data;
635 1.5 mrg affinity_count--;
636 1.5 mrg pool->threads[i] = nthr;
637 1.5 mrg }
638 1.5 mrg else
639 1.5 mrg nthr = pool->threads[i];
640 1.5 mrg place = p + 1;
641 1.5 mrg }
642 1.5 mrg else
643 1.5 mrg nthr = pool->threads[i];
644 1.1 mrg nthr->ts.team = team;
645 1.1 mrg nthr->ts.work_share = &team->work_shares[0];
646 1.1 mrg nthr->ts.last_work_share = NULL;
647 1.1 mrg nthr->ts.team_id = i;
648 1.1 mrg nthr->ts.level = team->prev_ts.level + 1;
649 1.1 mrg nthr->ts.active_level = thr->ts.active_level;
650 1.5 mrg nthr->ts.place_partition_off = place_partition_off;
651 1.5 mrg nthr->ts.place_partition_len = place_partition_len;
652 1.12 mrg nthr->ts.def_allocator = thr->ts.def_allocator;
653 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
654 1.1 mrg nthr->ts.single_count = 0;
655 1.1 mrg #endif
656 1.1 mrg nthr->ts.static_trip = 0;
657 1.12 mrg nthr->num_teams = thr->num_teams;
658 1.12 mrg nthr->team_num = thr->team_num;
659 1.1 mrg nthr->task = &team->implicit_task[i];
660 1.5 mrg nthr->place = place;
661 1.1 mrg gomp_init_task (nthr->task, task, icv);
662 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
663 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var;
664 1.10 mrg nthr->task->taskgroup = taskgroup;
665 1.1 mrg nthr->fn = fn;
666 1.1 mrg nthr->data = data;
667 1.1 mrg team->ordered_release[i] = &nthr->release;
668 1.1 mrg }
669 1.1 mrg
670 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0))
671 1.5 mrg {
672 1.5 mrg /* If AFFINITY_THR is non-NULL just because we had to
673 1.5 mrg permute some threads in the pool, but we've managed
674 1.5 mrg to find exactly as many old threads as we'd find
675 1.5 mrg without affinity, we don't need to handle this
676 1.5 mrg specially anymore. */
677 1.5 mrg if (nthreads <= old_threads_used
678 1.5 mrg ? (affinity_count == old_threads_used - nthreads)
679 1.5 mrg : (i == old_threads_used))
680 1.5 mrg {
681 1.5 mrg if (team->prev_ts.place_partition_len > 64)
682 1.5 mrg free (affinity_thr);
683 1.5 mrg affinity_thr = NULL;
684 1.5 mrg affinity_count = 0;
685 1.5 mrg }
686 1.5 mrg else
687 1.5 mrg {
688 1.5 mrg i = 1;
689 1.5 mrg /* We are going to compute the places/subpartitions
690 1.5 mrg again from the beginning. So, we need to reinitialize
691 1.5 mrg vars modified by the switch (bind) above inside
692 1.5 mrg of the loop, to the state they had after the initial
693 1.5 mrg switch (bind). */
694 1.5 mrg switch (bind)
695 1.5 mrg {
696 1.5 mrg case omp_proc_bind_true:
697 1.5 mrg case omp_proc_bind_close:
698 1.5 mrg if (nthreads > thr->ts.place_partition_len)
699 1.5 mrg /* T > P. S has been changed, so needs
700 1.5 mrg to be recomputed. */
701 1.5 mrg s = nthreads / thr->ts.place_partition_len;
702 1.5 mrg k = 1;
703 1.5 mrg p = thr->place - 1;
704 1.5 mrg break;
705 1.5 mrg case omp_proc_bind_master:
706 1.5 mrg /* No vars have been changed. */
707 1.5 mrg break;
708 1.5 mrg case omp_proc_bind_spread:
709 1.5 mrg p = thr->ts.place_partition_off;
710 1.5 mrg if (k != 0)
711 1.5 mrg {
712 1.5 mrg /* T > P. */
713 1.5 mrg s = nthreads / team->prev_ts.place_partition_len;
714 1.5 mrg k = 1;
715 1.5 mrg }
716 1.5 mrg break;
717 1.5 mrg }
718 1.5 mrg
719 1.5 mrg /* Increase the barrier threshold to make sure all new
720 1.5 mrg threads and all the threads we're going to let die
721 1.5 mrg arrive before the team is released. */
722 1.5 mrg if (affinity_count)
723 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock,
724 1.8 mrg nthreads + affinity_count);
725 1.5 mrg }
726 1.5 mrg }
727 1.5 mrg
728 1.1 mrg if (i == nthreads)
729 1.1 mrg goto do_release;
730 1.1 mrg
731 1.1 mrg }
732 1.1 mrg
733 1.5 mrg if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
734 1.1 mrg {
735 1.5 mrg long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
736 1.1 mrg
737 1.1 mrg if (old_threads_used == 0)
738 1.1 mrg --diff;
739 1.1 mrg
740 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
741 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff);
742 1.1 mrg #else
743 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
744 1.1 mrg gomp_managed_threads += diff;
745 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
746 1.1 mrg #endif
747 1.1 mrg }
748 1.1 mrg
749 1.1 mrg attr = &gomp_thread_attr;
750 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
751 1.1 mrg {
752 1.1 mrg size_t stacksize;
753 1.1 mrg pthread_attr_init (&thread_attr);
754 1.1 mrg if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
755 1.1 mrg pthread_attr_setstacksize (&thread_attr, stacksize);
756 1.1 mrg attr = &thread_attr;
757 1.1 mrg }
758 1.1 mrg
759 1.1 mrg start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
760 1.10 mrg * (nthreads - i));
761 1.1 mrg
762 1.1 mrg /* Launch new threads. */
763 1.5 mrg for (; i < nthreads; ++i)
764 1.1 mrg {
765 1.1 mrg int err;
766 1.1 mrg
767 1.5 mrg start_data->ts.place_partition_off = thr->ts.place_partition_off;
768 1.5 mrg start_data->ts.place_partition_len = thr->ts.place_partition_len;
769 1.5 mrg start_data->place = 0;
770 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
771 1.5 mrg {
772 1.5 mrg switch (bind)
773 1.5 mrg {
774 1.5 mrg case omp_proc_bind_true:
775 1.5 mrg case omp_proc_bind_close:
776 1.5 mrg if (k == s)
777 1.5 mrg {
778 1.5 mrg ++p;
779 1.5 mrg if (p == (team->prev_ts.place_partition_off
780 1.5 mrg + team->prev_ts.place_partition_len))
781 1.5 mrg p = team->prev_ts.place_partition_off;
782 1.5 mrg k = 1;
783 1.5 mrg if (i == nthreads - rest)
784 1.5 mrg s = 1;
785 1.5 mrg }
786 1.5 mrg else
787 1.5 mrg ++k;
788 1.5 mrg break;
789 1.5 mrg case omp_proc_bind_master:
790 1.5 mrg break;
791 1.5 mrg case omp_proc_bind_spread:
792 1.5 mrg if (k == 0)
793 1.5 mrg {
794 1.5 mrg /* T <= P. */
795 1.5 mrg if (p < rest)
796 1.5 mrg p += s + 1;
797 1.5 mrg else
798 1.5 mrg p += s;
799 1.5 mrg if (p == (team->prev_ts.place_partition_off
800 1.5 mrg + team->prev_ts.place_partition_len))
801 1.5 mrg p = team->prev_ts.place_partition_off;
802 1.5 mrg start_data->ts.place_partition_off = p;
803 1.5 mrg if (p < rest)
804 1.5 mrg start_data->ts.place_partition_len = s + 1;
805 1.5 mrg else
806 1.5 mrg start_data->ts.place_partition_len = s;
807 1.5 mrg }
808 1.5 mrg else
809 1.5 mrg {
810 1.5 mrg /* T > P. */
811 1.5 mrg if (k == s)
812 1.5 mrg {
813 1.5 mrg ++p;
814 1.5 mrg if (p == (team->prev_ts.place_partition_off
815 1.5 mrg + team->prev_ts.place_partition_len))
816 1.5 mrg p = team->prev_ts.place_partition_off;
817 1.5 mrg k = 1;
818 1.5 mrg if (i == nthreads - rest)
819 1.5 mrg s = 1;
820 1.5 mrg }
821 1.5 mrg else
822 1.5 mrg ++k;
823 1.5 mrg start_data->ts.place_partition_off = p;
824 1.5 mrg start_data->ts.place_partition_len = 1;
825 1.5 mrg }
826 1.5 mrg break;
827 1.5 mrg }
828 1.5 mrg start_data->place = p + 1;
829 1.5 mrg if (affinity_thr != NULL && pool->threads[i] != NULL)
830 1.5 mrg continue;
831 1.5 mrg gomp_init_thread_affinity (attr, p);
832 1.5 mrg }
833 1.5 mrg
834 1.1 mrg start_data->fn = fn;
835 1.1 mrg start_data->fn_data = data;
836 1.1 mrg start_data->ts.team = team;
837 1.1 mrg start_data->ts.work_share = &team->work_shares[0];
838 1.1 mrg start_data->ts.last_work_share = NULL;
839 1.1 mrg start_data->ts.team_id = i;
840 1.1 mrg start_data->ts.level = team->prev_ts.level + 1;
841 1.1 mrg start_data->ts.active_level = thr->ts.active_level;
842 1.12 mrg start_data->ts.def_allocator = thr->ts.def_allocator;
843 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
844 1.1 mrg start_data->ts.single_count = 0;
845 1.1 mrg #endif
846 1.1 mrg start_data->ts.static_trip = 0;
847 1.12 mrg start_data->num_teams = thr->num_teams;
848 1.12 mrg start_data->team_num = thr->team_num;
849 1.1 mrg start_data->task = &team->implicit_task[i];
850 1.1 mrg gomp_init_task (start_data->task, task, icv);
851 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
852 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var;
853 1.10 mrg start_data->task->taskgroup = taskgroup;
854 1.1 mrg start_data->thread_pool = pool;
855 1.1 mrg start_data->nested = nested;
856 1.1 mrg
857 1.6 mrg attr = gomp_adjust_thread_attr (attr, &thread_attr);
858 1.10 mrg err = pthread_create (&start_data->handle, attr, gomp_thread_start,
859 1.10 mrg start_data);
860 1.10 mrg start_data++;
861 1.1 mrg if (err != 0)
862 1.1 mrg gomp_fatal ("Thread creation failed: %s", strerror (err));
863 1.1 mrg }
864 1.1 mrg
865 1.6 mrg if (__builtin_expect (attr == &thread_attr, 0))
866 1.1 mrg pthread_attr_destroy (&thread_attr);
867 1.1 mrg
868 1.1 mrg do_release:
869 1.8 mrg if (nested)
870 1.8 mrg gomp_barrier_wait (&team->barrier);
871 1.8 mrg else
872 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
873 1.1 mrg
874 1.1 mrg /* Decrease the barrier threshold to match the number of threads
875 1.1 mrg that should arrive back at the end of this team. The extra
876 1.1 mrg threads should be exiting. Note that we arrange for this test
877 1.5 mrg to never be true for nested teams. If AFFINITY_COUNT is non-zero,
878 1.5 mrg the barrier as well as gomp_managed_threads was temporarily
879 1.5 mrg set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
880 1.5 mrg AFFINITY_COUNT if non-zero will be always at least
881 1.5 mrg OLD_THREADS_COUNT - NTHREADS. */
882 1.5 mrg if (__builtin_expect (nthreads < old_threads_used, 0)
883 1.5 mrg || __builtin_expect (affinity_count, 0))
884 1.1 mrg {
885 1.1 mrg long diff = (long) nthreads - (long) old_threads_used;
886 1.1 mrg
887 1.5 mrg if (affinity_count)
888 1.5 mrg diff = -affinity_count;
889 1.5 mrg
890 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
891 1.1 mrg
892 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
893 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff);
894 1.1 mrg #else
895 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
896 1.1 mrg gomp_managed_threads += diff;
897 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
898 1.1 mrg #endif
899 1.1 mrg }
900 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0))
901 1.10 mrg {
902 1.10 mrg if (nested
903 1.10 mrg || nthreads != old_threads_used
904 1.10 mrg || force_display)
905 1.10 mrg {
906 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
907 1.10 mrg thr->place);
908 1.10 mrg if (nested)
909 1.10 mrg {
910 1.10 mrg start_data -= nthreads - 1;
911 1.10 mrg for (i = 1; i < nthreads; ++i)
912 1.10 mrg {
913 1.10 mrg gomp_display_affinity_thread (
914 1.10 mrg #ifdef LIBGOMP_USE_PTHREADS
915 1.10 mrg start_data->handle,
916 1.10 mrg #else
917 1.10 mrg gomp_thread_self (),
918 1.10 mrg #endif
919 1.10 mrg &start_data->ts,
920 1.10 mrg start_data->place);
921 1.10 mrg start_data++;
922 1.10 mrg }
923 1.10 mrg }
924 1.10 mrg else
925 1.10 mrg {
926 1.10 mrg for (i = 1; i < nthreads; ++i)
927 1.10 mrg {
928 1.10 mrg gomp_thread_handle handle
929 1.10 mrg = gomp_thread_to_pthread_t (pool->threads[i]);
930 1.10 mrg gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
931 1.10 mrg pool->threads[i]->place);
932 1.10 mrg }
933 1.10 mrg }
934 1.10 mrg }
935 1.10 mrg }
936 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0)
937 1.5 mrg && team->prev_ts.place_partition_len > 64)
938 1.5 mrg free (affinity_thr);
939 1.1 mrg }
940 1.8 mrg #endif
941 1.1 mrg
942 1.1 mrg
943 1.1 mrg /* Terminate the current team. This is only to be called by the master
944 1.1 mrg thread. We assume that we must wait for the other threads. */
945 1.1 mrg
946 1.1 mrg void
947 1.1 mrg gomp_team_end (void)
948 1.1 mrg {
949 1.1 mrg struct gomp_thread *thr = gomp_thread ();
950 1.1 mrg struct gomp_team *team = thr->ts.team;
951 1.1 mrg
952 1.5 mrg /* This barrier handles all pending explicit threads.
953 1.5 mrg As #pragma omp cancel parallel might get awaited count in
954 1.5 mrg team->barrier in a inconsistent state, we need to use a different
955 1.5 mrg counter here. */
956 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
957 1.5 mrg if (__builtin_expect (team->team_cancelled, 0))
958 1.5 mrg {
959 1.5 mrg struct gomp_work_share *ws = team->work_shares_to_free;
960 1.5 mrg do
961 1.5 mrg {
962 1.5 mrg struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
963 1.5 mrg if (next_ws == NULL)
964 1.5 mrg gomp_ptrlock_set (&ws->next_ws, ws);
965 1.5 mrg gomp_fini_work_share (ws);
966 1.5 mrg ws = next_ws;
967 1.5 mrg }
968 1.5 mrg while (ws != NULL);
969 1.5 mrg }
970 1.5 mrg else
971 1.5 mrg gomp_fini_work_share (thr->ts.work_share);
972 1.1 mrg
973 1.1 mrg gomp_end_task ();
974 1.1 mrg thr->ts = team->prev_ts;
975 1.1 mrg
976 1.10 mrg if (__builtin_expect (thr->ts.level != 0, 0))
977 1.1 mrg {
978 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
979 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
980 1.1 mrg #else
981 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
982 1.1 mrg gomp_managed_threads -= team->nthreads - 1L;
983 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
984 1.1 mrg #endif
985 1.1 mrg /* This barrier has gomp_barrier_wait_last counterparts
986 1.1 mrg and ensures the team can be safely destroyed. */
987 1.1 mrg gomp_barrier_wait (&team->barrier);
988 1.1 mrg }
989 1.1 mrg
990 1.1 mrg if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
991 1.1 mrg {
992 1.1 mrg struct gomp_work_share *ws = team->work_shares[0].next_alloc;
993 1.1 mrg do
994 1.1 mrg {
995 1.1 mrg struct gomp_work_share *next_ws = ws->next_alloc;
996 1.1 mrg free (ws);
997 1.1 mrg ws = next_ws;
998 1.1 mrg }
999 1.1 mrg while (ws != NULL);
1000 1.1 mrg }
1001 1.1 mrg gomp_sem_destroy (&team->master_release);
1002 1.1 mrg
1003 1.1 mrg if (__builtin_expect (thr->ts.team != NULL, 0)
1004 1.1 mrg || __builtin_expect (team->nthreads == 1, 0))
1005 1.1 mrg free_team (team);
1006 1.1 mrg else
1007 1.1 mrg {
1008 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool;
1009 1.1 mrg if (pool->last_team)
1010 1.1 mrg free_team (pool->last_team);
1011 1.1 mrg pool->last_team = team;
1012 1.6 mrg gomp_release_thread_pool (pool);
1013 1.1 mrg }
1014 1.1 mrg }
1015 1.1 mrg
1016 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
1017 1.1 mrg
1018 1.1 mrg /* Constructors for this file. */
1019 1.1 mrg
1020 1.1 mrg static void __attribute__((constructor))
1021 1.1 mrg initialize_team (void)
1022 1.1 mrg {
1023 1.5 mrg #if !defined HAVE_TLS && !defined USE_EMUTLS
1024 1.1 mrg static struct gomp_thread initial_thread_tls_data;
1025 1.1 mrg
1026 1.1 mrg pthread_key_create (&gomp_tls_key, NULL);
1027 1.1 mrg pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1028 1.1 mrg #endif
1029 1.1 mrg
1030 1.1 mrg if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1031 1.1 mrg gomp_fatal ("could not create thread pool destructor.");
1032 1.1 mrg }
1033 1.1 mrg
1034 1.1 mrg static void __attribute__((destructor))
1035 1.1 mrg team_destructor (void)
1036 1.1 mrg {
1037 1.1 mrg /* Without this dlclose on libgomp could lead to subsequent
1038 1.1 mrg crashes. */
1039 1.1 mrg pthread_key_delete (gomp_thread_destructor);
1040 1.1 mrg }
1041 1.10 mrg
1042 1.10 mrg /* Similar to gomp_free_pool_helper, but don't detach itself,
1043 1.10 mrg gomp_pause_host will pthread_join those threads. */
1044 1.10 mrg
1045 1.10 mrg static void
1046 1.10 mrg gomp_pause_pool_helper (void *thread_pool)
1047 1.10 mrg {
1048 1.10 mrg struct gomp_thread *thr = gomp_thread ();
1049 1.10 mrg struct gomp_thread_pool *pool
1050 1.10 mrg = (struct gomp_thread_pool *) thread_pool;
1051 1.10 mrg gomp_simple_barrier_wait_last (&pool->threads_dock);
1052 1.10 mrg gomp_sem_destroy (&thr->release);
1053 1.10 mrg thr->thread_pool = NULL;
1054 1.10 mrg thr->task = NULL;
1055 1.10 mrg pthread_exit (NULL);
1056 1.10 mrg }
1057 1.10 mrg
1058 1.10 mrg /* Free a thread pool and release its threads. Return non-zero on
1059 1.10 mrg failure. */
1060 1.10 mrg
1061 1.10 mrg int
1062 1.10 mrg gomp_pause_host (void)
1063 1.10 mrg {
1064 1.10 mrg struct gomp_thread *thr = gomp_thread ();
1065 1.10 mrg struct gomp_thread_pool *pool = thr->thread_pool;
1066 1.10 mrg if (thr->ts.level)
1067 1.10 mrg return -1;
1068 1.10 mrg if (pool)
1069 1.10 mrg {
1070 1.10 mrg if (pool->threads_used > 0)
1071 1.10 mrg {
1072 1.10 mrg int i;
1073 1.10 mrg pthread_t *thrs
1074 1.10 mrg = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1075 1.10 mrg for (i = 1; i < pool->threads_used; i++)
1076 1.10 mrg {
1077 1.10 mrg struct gomp_thread *nthr = pool->threads[i];
1078 1.10 mrg nthr->fn = gomp_pause_pool_helper;
1079 1.10 mrg nthr->data = pool;
1080 1.10 mrg thrs[i] = gomp_thread_to_pthread_t (nthr);
1081 1.10 mrg }
1082 1.10 mrg /* This barrier undocks threads docked on pool->threads_dock. */
1083 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock);
1084 1.10 mrg /* And this waits till all threads have called gomp_barrier_wait_last
1085 1.10 mrg in gomp_pause_pool_helper. */
1086 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock);
1087 1.10 mrg /* Now it is safe to destroy the barrier and free the pool. */
1088 1.10 mrg gomp_simple_barrier_destroy (&pool->threads_dock);
1089 1.10 mrg
1090 1.10 mrg #ifdef HAVE_SYNC_BUILTINS
1091 1.10 mrg __sync_fetch_and_add (&gomp_managed_threads,
1092 1.10 mrg 1L - pool->threads_used);
1093 1.10 mrg #else
1094 1.10 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
1095 1.10 mrg gomp_managed_threads -= pool->threads_used - 1L;
1096 1.10 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
1097 1.10 mrg #endif
1098 1.10 mrg for (i = 1; i < pool->threads_used; i++)
1099 1.10 mrg pthread_join (thrs[i], NULL);
1100 1.10 mrg }
1101 1.10 mrg if (pool->last_team)
1102 1.10 mrg free_team (pool->last_team);
1103 1.10 mrg #ifndef __nvptx__
1104 1.11 mrg team_free (pool->threads);
1105 1.11 mrg team_free (pool);
1106 1.10 mrg #endif
1107 1.10 mrg thr->thread_pool = NULL;
1108 1.10 mrg }
1109 1.10 mrg return 0;
1110 1.10 mrg }
1111 1.8 mrg #endif
1112 1.1 mrg
1113 1.1 mrg struct gomp_task_icv *
1114 1.1 mrg gomp_new_icv (void)
1115 1.1 mrg {
1116 1.1 mrg struct gomp_thread *thr = gomp_thread ();
1117 1.1 mrg struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1118 1.1 mrg gomp_init_task (task, NULL, &gomp_global_icv);
1119 1.1 mrg thr->task = task;
1120 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
1121 1.1 mrg pthread_setspecific (gomp_thread_destructor, thr);
1122 1.8 mrg #endif
1123 1.1 mrg return &task->icv;
1124 1.1 mrg }
1125