team.c revision 1.11 1 1.11 mrg /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Richard Henderson <rth (at) redhat.com>.
3 1.1 mrg
4 1.5 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.5 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.11 mrg /* This file handles the maintenance of threads in response to team
27 1.1 mrg creation and termination. */
28 1.1 mrg
29 1.1 mrg #include "libgomp.h"
30 1.6 mrg #include "pool.h"
31 1.1 mrg #include <stdlib.h>
32 1.1 mrg #include <string.h>
33 1.1 mrg
34 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
35 1.1 mrg pthread_attr_t gomp_thread_attr;
36 1.1 mrg
37 1.1 mrg /* This key is for the thread destructor. */
38 1.1 mrg pthread_key_t gomp_thread_destructor;
39 1.1 mrg
40 1.1 mrg
41 1.1 mrg /* This is the libgomp per-thread data structure. */
42 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS
43 1.1 mrg __thread struct gomp_thread gomp_tls_data;
44 1.1 mrg #else
45 1.1 mrg pthread_key_t gomp_tls_key;
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg
49 1.1 mrg /* This structure is used to communicate across pthread_create. */
50 1.1 mrg
51 1.1 mrg struct gomp_thread_start_data
52 1.1 mrg {
53 1.1 mrg void (*fn) (void *);
54 1.1 mrg void *fn_data;
55 1.1 mrg struct gomp_team_state ts;
56 1.1 mrg struct gomp_task *task;
57 1.1 mrg struct gomp_thread_pool *thread_pool;
58 1.5 mrg unsigned int place;
59 1.1 mrg bool nested;
60 1.10 mrg pthread_t handle;
61 1.1 mrg };
62 1.1 mrg
63 1.1 mrg
64 1.1 mrg /* This function is a pthread_create entry point. This contains the idle
65 1.1 mrg loop in which a thread waits to be called up to become part of a team. */
66 1.1 mrg
67 1.1 mrg static void *
68 1.1 mrg gomp_thread_start (void *xdata)
69 1.1 mrg {
70 1.1 mrg struct gomp_thread_start_data *data = xdata;
71 1.1 mrg struct gomp_thread *thr;
72 1.1 mrg struct gomp_thread_pool *pool;
73 1.1 mrg void (*local_fn) (void *);
74 1.1 mrg void *local_data;
75 1.1 mrg
76 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS
77 1.1 mrg thr = &gomp_tls_data;
78 1.1 mrg #else
79 1.1 mrg struct gomp_thread local_thr;
80 1.1 mrg thr = &local_thr;
81 1.1 mrg pthread_setspecific (gomp_tls_key, thr);
82 1.1 mrg #endif
83 1.1 mrg gomp_sem_init (&thr->release, 0);
84 1.1 mrg
85 1.1 mrg /* Extract what we need from data. */
86 1.1 mrg local_fn = data->fn;
87 1.1 mrg local_data = data->fn_data;
88 1.1 mrg thr->thread_pool = data->thread_pool;
89 1.1 mrg thr->ts = data->ts;
90 1.1 mrg thr->task = data->task;
91 1.5 mrg thr->place = data->place;
92 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
93 1.10 mrg thr->handle = data->handle;
94 1.10 mrg #endif
95 1.1 mrg
96 1.1 mrg thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97 1.1 mrg
98 1.1 mrg /* Make thread pool local. */
99 1.1 mrg pool = thr->thread_pool;
100 1.1 mrg
101 1.1 mrg if (data->nested)
102 1.1 mrg {
103 1.1 mrg struct gomp_team *team = thr->ts.team;
104 1.1 mrg struct gomp_task *task = thr->task;
105 1.1 mrg
106 1.1 mrg gomp_barrier_wait (&team->barrier);
107 1.1 mrg
108 1.1 mrg local_fn (local_data);
109 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
110 1.1 mrg gomp_finish_task (task);
111 1.1 mrg gomp_barrier_wait_last (&team->barrier);
112 1.1 mrg }
113 1.1 mrg else
114 1.1 mrg {
115 1.1 mrg pool->threads[thr->ts.team_id] = thr;
116 1.1 mrg
117 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
118 1.1 mrg do
119 1.1 mrg {
120 1.1 mrg struct gomp_team *team = thr->ts.team;
121 1.1 mrg struct gomp_task *task = thr->task;
122 1.1 mrg
123 1.1 mrg local_fn (local_data);
124 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
125 1.1 mrg gomp_finish_task (task);
126 1.1 mrg
127 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
128 1.1 mrg
129 1.1 mrg local_fn = thr->fn;
130 1.1 mrg local_data = thr->data;
131 1.1 mrg thr->fn = NULL;
132 1.1 mrg }
133 1.1 mrg while (local_fn);
134 1.1 mrg }
135 1.1 mrg
136 1.1 mrg gomp_sem_destroy (&thr->release);
137 1.10 mrg pthread_detach (pthread_self ());
138 1.5 mrg thr->thread_pool = NULL;
139 1.5 mrg thr->task = NULL;
140 1.1 mrg return NULL;
141 1.1 mrg }
142 1.8 mrg #endif
143 1.1 mrg
144 1.6 mrg static inline struct gomp_team *
145 1.6 mrg get_last_team (unsigned nthreads)
146 1.6 mrg {
147 1.6 mrg struct gomp_thread *thr = gomp_thread ();
148 1.6 mrg if (thr->ts.team == NULL)
149 1.6 mrg {
150 1.6 mrg struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 1.6 mrg struct gomp_team *last_team = pool->last_team;
152 1.6 mrg if (last_team != NULL && last_team->nthreads == nthreads)
153 1.6 mrg {
154 1.6 mrg pool->last_team = NULL;
155 1.6 mrg return last_team;
156 1.6 mrg }
157 1.6 mrg }
158 1.6 mrg return NULL;
159 1.6 mrg }
160 1.1 mrg
161 1.1 mrg /* Create a new team data structure. */
162 1.1 mrg
163 1.1 mrg struct gomp_team *
164 1.1 mrg gomp_new_team (unsigned nthreads)
165 1.1 mrg {
166 1.1 mrg struct gomp_team *team;
167 1.1 mrg int i;
168 1.1 mrg
169 1.6 mrg team = get_last_team (nthreads);
170 1.6 mrg if (team == NULL)
171 1.6 mrg {
172 1.6 mrg size_t extra = sizeof (team->ordered_release[0])
173 1.6 mrg + sizeof (team->implicit_task[0]);
174 1.11 mrg team = team_malloc (sizeof (*team) + nthreads * extra);
175 1.6 mrg
176 1.6 mrg #ifndef HAVE_SYNC_BUILTINS
177 1.6 mrg gomp_mutex_init (&team->work_share_list_free_lock);
178 1.6 mrg #endif
179 1.6 mrg gomp_barrier_init (&team->barrier, nthreads);
180 1.6 mrg gomp_mutex_init (&team->task_lock);
181 1.6 mrg
182 1.6 mrg team->nthreads = nthreads;
183 1.6 mrg }
184 1.1 mrg
185 1.1 mrg team->work_share_chunk = 8;
186 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
187 1.1 mrg team->single_count = 0;
188 1.1 mrg #endif
189 1.5 mrg team->work_shares_to_free = &team->work_shares[0];
190 1.10 mrg gomp_init_work_share (&team->work_shares[0], 0, nthreads);
191 1.1 mrg team->work_shares[0].next_alloc = NULL;
192 1.1 mrg team->work_share_list_free = NULL;
193 1.1 mrg team->work_share_list_alloc = &team->work_shares[1];
194 1.1 mrg for (i = 1; i < 7; i++)
195 1.1 mrg team->work_shares[i].next_free = &team->work_shares[i + 1];
196 1.1 mrg team->work_shares[i].next_free = NULL;
197 1.1 mrg
198 1.1 mrg gomp_sem_init (&team->master_release, 0);
199 1.1 mrg team->ordered_release = (void *) &team->implicit_task[nthreads];
200 1.1 mrg team->ordered_release[0] = &team->master_release;
201 1.1 mrg
202 1.6 mrg priority_queue_init (&team->task_queue);
203 1.1 mrg team->task_count = 0;
204 1.5 mrg team->task_queued_count = 0;
205 1.1 mrg team->task_running_count = 0;
206 1.5 mrg team->work_share_cancelled = 0;
207 1.5 mrg team->team_cancelled = 0;
208 1.1 mrg
209 1.1 mrg return team;
210 1.1 mrg }
211 1.1 mrg
212 1.1 mrg
213 1.1 mrg /* Free a team data structure. */
214 1.1 mrg
215 1.1 mrg static void
216 1.1 mrg free_team (struct gomp_team *team)
217 1.1 mrg {
218 1.6 mrg #ifndef HAVE_SYNC_BUILTINS
219 1.6 mrg gomp_mutex_destroy (&team->work_share_list_free_lock);
220 1.6 mrg #endif
221 1.1 mrg gomp_barrier_destroy (&team->barrier);
222 1.1 mrg gomp_mutex_destroy (&team->task_lock);
223 1.6 mrg priority_queue_free (&team->task_queue);
224 1.11 mrg team_free (team);
225 1.1 mrg }
226 1.1 mrg
227 1.1 mrg static void
228 1.1 mrg gomp_free_pool_helper (void *thread_pool)
229 1.1 mrg {
230 1.5 mrg struct gomp_thread *thr = gomp_thread ();
231 1.1 mrg struct gomp_thread_pool *pool
232 1.1 mrg = (struct gomp_thread_pool *) thread_pool;
233 1.8 mrg gomp_simple_barrier_wait_last (&pool->threads_dock);
234 1.5 mrg gomp_sem_destroy (&thr->release);
235 1.5 mrg thr->thread_pool = NULL;
236 1.5 mrg thr->task = NULL;
237 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
238 1.10 mrg pthread_detach (pthread_self ());
239 1.1 mrg pthread_exit (NULL);
240 1.8 mrg #elif defined(__nvptx__)
241 1.8 mrg asm ("exit;");
242 1.11 mrg #elif defined(__AMDGCN__)
243 1.11 mrg asm ("s_dcache_wb\n\t"
244 1.11 mrg "s_endpgm");
245 1.8 mrg #else
246 1.8 mrg #error gomp_free_pool_helper must terminate the thread
247 1.8 mrg #endif
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg /* Free a thread pool and release its threads. */
251 1.1 mrg
252 1.5 mrg void
253 1.1 mrg gomp_free_thread (void *arg __attribute__((unused)))
254 1.1 mrg {
255 1.1 mrg struct gomp_thread *thr = gomp_thread ();
256 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool;
257 1.1 mrg if (pool)
258 1.1 mrg {
259 1.1 mrg if (pool->threads_used > 0)
260 1.1 mrg {
261 1.1 mrg int i;
262 1.1 mrg for (i = 1; i < pool->threads_used; i++)
263 1.1 mrg {
264 1.1 mrg struct gomp_thread *nthr = pool->threads[i];
265 1.1 mrg nthr->fn = gomp_free_pool_helper;
266 1.1 mrg nthr->data = pool;
267 1.1 mrg }
268 1.1 mrg /* This barrier undocks threads docked on pool->threads_dock. */
269 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
270 1.1 mrg /* And this waits till all threads have called gomp_barrier_wait_last
271 1.1 mrg in gomp_free_pool_helper. */
272 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
273 1.1 mrg /* Now it is safe to destroy the barrier and free the pool. */
274 1.8 mrg gomp_simple_barrier_destroy (&pool->threads_dock);
275 1.3 mrg
276 1.3 mrg #ifdef HAVE_SYNC_BUILTINS
277 1.3 mrg __sync_fetch_and_add (&gomp_managed_threads,
278 1.3 mrg 1L - pool->threads_used);
279 1.3 mrg #else
280 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
281 1.3 mrg gomp_managed_threads -= pool->threads_used - 1L;
282 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
283 1.3 mrg #endif
284 1.1 mrg }
285 1.1 mrg if (pool->last_team)
286 1.1 mrg free_team (pool->last_team);
287 1.8 mrg #ifndef __nvptx__
288 1.11 mrg team_free (pool->threads);
289 1.11 mrg team_free (pool);
290 1.8 mrg #endif
291 1.1 mrg thr->thread_pool = NULL;
292 1.1 mrg }
293 1.6 mrg if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
294 1.6 mrg gomp_team_end ();
295 1.1 mrg if (thr->task != NULL)
296 1.1 mrg {
297 1.1 mrg struct gomp_task *task = thr->task;
298 1.1 mrg gomp_end_task ();
299 1.1 mrg free (task);
300 1.1 mrg }
301 1.1 mrg }
302 1.1 mrg
303 1.1 mrg /* Launch a team. */
304 1.1 mrg
305 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
306 1.1 mrg void
307 1.1 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
308 1.10 mrg unsigned flags, struct gomp_team *team,
309 1.10 mrg struct gomp_taskgroup *taskgroup)
310 1.1 mrg {
311 1.1 mrg struct gomp_thread_start_data *start_data;
312 1.1 mrg struct gomp_thread *thr, *nthr;
313 1.1 mrg struct gomp_task *task;
314 1.1 mrg struct gomp_task_icv *icv;
315 1.1 mrg bool nested;
316 1.1 mrg struct gomp_thread_pool *pool;
317 1.1 mrg unsigned i, n, old_threads_used = 0;
318 1.1 mrg pthread_attr_t thread_attr, *attr;
319 1.3 mrg unsigned long nthreads_var;
320 1.5 mrg char bind, bind_var;
321 1.5 mrg unsigned int s = 0, rest = 0, p = 0, k = 0;
322 1.5 mrg unsigned int affinity_count = 0;
323 1.5 mrg struct gomp_thread **affinity_thr = NULL;
324 1.10 mrg bool force_display = false;
325 1.1 mrg
326 1.1 mrg thr = gomp_thread ();
327 1.6 mrg nested = thr->ts.level;
328 1.1 mrg pool = thr->thread_pool;
329 1.1 mrg task = thr->task;
330 1.1 mrg icv = task ? &task->icv : &gomp_global_icv;
331 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
332 1.10 mrg {
333 1.10 mrg gomp_init_affinity ();
334 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
335 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
336 1.10 mrg thr->place);
337 1.10 mrg }
338 1.1 mrg
339 1.1 mrg /* Always save the previous state, even if this isn't a nested team.
340 1.1 mrg In particular, we should save any work share state from an outer
341 1.1 mrg orphaned work share construct. */
342 1.1 mrg team->prev_ts = thr->ts;
343 1.1 mrg
344 1.1 mrg thr->ts.team = team;
345 1.1 mrg thr->ts.team_id = 0;
346 1.1 mrg ++thr->ts.level;
347 1.1 mrg if (nthreads > 1)
348 1.1 mrg ++thr->ts.active_level;
349 1.1 mrg thr->ts.work_share = &team->work_shares[0];
350 1.1 mrg thr->ts.last_work_share = NULL;
351 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
352 1.1 mrg thr->ts.single_count = 0;
353 1.1 mrg #endif
354 1.1 mrg thr->ts.static_trip = 0;
355 1.1 mrg thr->task = &team->implicit_task[0];
356 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
357 1.10 mrg thr->handle = pthread_self ();
358 1.10 mrg #endif
359 1.3 mrg nthreads_var = icv->nthreads_var;
360 1.3 mrg if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
361 1.3 mrg && thr->ts.level < gomp_nthreads_var_list_len)
362 1.3 mrg nthreads_var = gomp_nthreads_var_list[thr->ts.level];
363 1.5 mrg bind_var = icv->bind_var;
364 1.5 mrg if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
365 1.5 mrg bind_var = flags & 7;
366 1.5 mrg bind = bind_var;
367 1.5 mrg if (__builtin_expect (gomp_bind_var_list != NULL, 0)
368 1.5 mrg && thr->ts.level < gomp_bind_var_list_len)
369 1.5 mrg bind_var = gomp_bind_var_list[thr->ts.level];
370 1.1 mrg gomp_init_task (thr->task, task, icv);
371 1.10 mrg thr->task->taskgroup = taskgroup;
372 1.3 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var;
373 1.5 mrg team->implicit_task[0].icv.bind_var = bind_var;
374 1.1 mrg
375 1.1 mrg if (nthreads == 1)
376 1.1 mrg return;
377 1.1 mrg
378 1.1 mrg i = 1;
379 1.1 mrg
380 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
381 1.5 mrg {
382 1.5 mrg /* Depending on chosen proc_bind model, set subpartition
383 1.5 mrg for the master thread and initialize helper variables
384 1.5 mrg P and optionally S, K and/or REST used by later place
385 1.5 mrg computation for each additional thread. */
386 1.5 mrg p = thr->place - 1;
387 1.5 mrg switch (bind)
388 1.5 mrg {
389 1.5 mrg case omp_proc_bind_true:
390 1.5 mrg case omp_proc_bind_close:
391 1.5 mrg if (nthreads > thr->ts.place_partition_len)
392 1.5 mrg {
393 1.5 mrg /* T > P. S threads will be placed in each place,
394 1.5 mrg and the final REM threads placed one by one
395 1.5 mrg into the already occupied places. */
396 1.5 mrg s = nthreads / thr->ts.place_partition_len;
397 1.5 mrg rest = nthreads % thr->ts.place_partition_len;
398 1.5 mrg }
399 1.5 mrg else
400 1.5 mrg s = 1;
401 1.5 mrg k = 1;
402 1.5 mrg break;
403 1.5 mrg case omp_proc_bind_master:
404 1.5 mrg /* Each thread will be bound to master's place. */
405 1.5 mrg break;
406 1.5 mrg case omp_proc_bind_spread:
407 1.5 mrg if (nthreads <= thr->ts.place_partition_len)
408 1.5 mrg {
409 1.5 mrg /* T <= P. Each subpartition will have in between s
410 1.5 mrg and s+1 places (subpartitions starting at or
411 1.5 mrg after rest will have s places, earlier s+1 places),
412 1.5 mrg each thread will be bound to the first place in
413 1.5 mrg its subpartition (except for the master thread
414 1.5 mrg that can be bound to another place in its
415 1.5 mrg subpartition). */
416 1.5 mrg s = thr->ts.place_partition_len / nthreads;
417 1.5 mrg rest = thr->ts.place_partition_len % nthreads;
418 1.5 mrg rest = (s + 1) * rest + thr->ts.place_partition_off;
419 1.5 mrg if (p < rest)
420 1.5 mrg {
421 1.5 mrg p -= (p - thr->ts.place_partition_off) % (s + 1);
422 1.5 mrg thr->ts.place_partition_len = s + 1;
423 1.5 mrg }
424 1.5 mrg else
425 1.5 mrg {
426 1.5 mrg p -= (p - rest) % s;
427 1.5 mrg thr->ts.place_partition_len = s;
428 1.5 mrg }
429 1.5 mrg thr->ts.place_partition_off = p;
430 1.5 mrg }
431 1.5 mrg else
432 1.5 mrg {
433 1.5 mrg /* T > P. Each subpartition will have just a single
434 1.5 mrg place and we'll place between s and s+1
435 1.5 mrg threads into each subpartition. */
436 1.5 mrg s = nthreads / thr->ts.place_partition_len;
437 1.5 mrg rest = nthreads % thr->ts.place_partition_len;
438 1.5 mrg thr->ts.place_partition_off = p;
439 1.5 mrg thr->ts.place_partition_len = 1;
440 1.5 mrg k = 1;
441 1.5 mrg }
442 1.5 mrg break;
443 1.5 mrg }
444 1.5 mrg }
445 1.5 mrg else
446 1.5 mrg bind = omp_proc_bind_false;
447 1.5 mrg
448 1.1 mrg /* We only allow the reuse of idle threads for non-nested PARALLEL
449 1.1 mrg regions. This appears to be implied by the semantics of
450 1.1 mrg threadprivate variables, but perhaps that's reading too much into
451 1.1 mrg things. Certainly it does prevent any locking problems, since
452 1.1 mrg only the initial program thread will modify gomp_threads. */
453 1.1 mrg if (!nested)
454 1.1 mrg {
455 1.1 mrg old_threads_used = pool->threads_used;
456 1.1 mrg
457 1.1 mrg if (nthreads <= old_threads_used)
458 1.1 mrg n = nthreads;
459 1.1 mrg else if (old_threads_used == 0)
460 1.1 mrg {
461 1.1 mrg n = 0;
462 1.8 mrg gomp_simple_barrier_init (&pool->threads_dock, nthreads);
463 1.1 mrg }
464 1.1 mrg else
465 1.1 mrg {
466 1.1 mrg n = old_threads_used;
467 1.1 mrg
468 1.1 mrg /* Increase the barrier threshold to make sure all new
469 1.1 mrg threads arrive before the team is released. */
470 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
471 1.1 mrg }
472 1.1 mrg
473 1.1 mrg /* Not true yet, but soon will be. We're going to release all
474 1.1 mrg threads from the dock, and those that aren't part of the
475 1.1 mrg team will exit. */
476 1.1 mrg pool->threads_used = nthreads;
477 1.1 mrg
478 1.5 mrg /* If necessary, expand the size of the gomp_threads array. It is
479 1.5 mrg expected that changes in the number of threads are rare, thus we
480 1.5 mrg make no effort to expand gomp_threads_size geometrically. */
481 1.5 mrg if (nthreads >= pool->threads_size)
482 1.5 mrg {
483 1.5 mrg pool->threads_size = nthreads + 1;
484 1.5 mrg pool->threads
485 1.5 mrg = gomp_realloc (pool->threads,
486 1.5 mrg pool->threads_size
487 1.10 mrg * sizeof (struct gomp_thread *));
488 1.10 mrg /* Add current (master) thread to threads[]. */
489 1.10 mrg pool->threads[0] = thr;
490 1.5 mrg }
491 1.5 mrg
492 1.1 mrg /* Release existing idle threads. */
493 1.1 mrg for (; i < n; ++i)
494 1.1 mrg {
495 1.5 mrg unsigned int place_partition_off = thr->ts.place_partition_off;
496 1.5 mrg unsigned int place_partition_len = thr->ts.place_partition_len;
497 1.5 mrg unsigned int place = 0;
498 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
499 1.5 mrg {
500 1.5 mrg switch (bind)
501 1.5 mrg {
502 1.5 mrg case omp_proc_bind_true:
503 1.5 mrg case omp_proc_bind_close:
504 1.5 mrg if (k == s)
505 1.5 mrg {
506 1.5 mrg ++p;
507 1.5 mrg if (p == (team->prev_ts.place_partition_off
508 1.5 mrg + team->prev_ts.place_partition_len))
509 1.5 mrg p = team->prev_ts.place_partition_off;
510 1.5 mrg k = 1;
511 1.5 mrg if (i == nthreads - rest)
512 1.5 mrg s = 1;
513 1.5 mrg }
514 1.5 mrg else
515 1.5 mrg ++k;
516 1.5 mrg break;
517 1.5 mrg case omp_proc_bind_master:
518 1.5 mrg break;
519 1.5 mrg case omp_proc_bind_spread:
520 1.5 mrg if (k == 0)
521 1.5 mrg {
522 1.5 mrg /* T <= P. */
523 1.5 mrg if (p < rest)
524 1.5 mrg p += s + 1;
525 1.5 mrg else
526 1.5 mrg p += s;
527 1.5 mrg if (p == (team->prev_ts.place_partition_off
528 1.5 mrg + team->prev_ts.place_partition_len))
529 1.5 mrg p = team->prev_ts.place_partition_off;
530 1.5 mrg place_partition_off = p;
531 1.5 mrg if (p < rest)
532 1.5 mrg place_partition_len = s + 1;
533 1.5 mrg else
534 1.5 mrg place_partition_len = s;
535 1.5 mrg }
536 1.5 mrg else
537 1.5 mrg {
538 1.5 mrg /* T > P. */
539 1.5 mrg if (k == s)
540 1.5 mrg {
541 1.5 mrg ++p;
542 1.5 mrg if (p == (team->prev_ts.place_partition_off
543 1.5 mrg + team->prev_ts.place_partition_len))
544 1.5 mrg p = team->prev_ts.place_partition_off;
545 1.5 mrg k = 1;
546 1.5 mrg if (i == nthreads - rest)
547 1.5 mrg s = 1;
548 1.5 mrg }
549 1.5 mrg else
550 1.5 mrg ++k;
551 1.5 mrg place_partition_off = p;
552 1.5 mrg place_partition_len = 1;
553 1.5 mrg }
554 1.5 mrg break;
555 1.5 mrg }
556 1.5 mrg if (affinity_thr != NULL
557 1.5 mrg || (bind != omp_proc_bind_true
558 1.5 mrg && pool->threads[i]->place != p + 1)
559 1.5 mrg || pool->threads[i]->place <= place_partition_off
560 1.5 mrg || pool->threads[i]->place > (place_partition_off
561 1.5 mrg + place_partition_len))
562 1.5 mrg {
563 1.5 mrg unsigned int l;
564 1.10 mrg force_display = true;
565 1.5 mrg if (affinity_thr == NULL)
566 1.5 mrg {
567 1.5 mrg unsigned int j;
568 1.5 mrg
569 1.5 mrg if (team->prev_ts.place_partition_len > 64)
570 1.5 mrg affinity_thr
571 1.5 mrg = gomp_malloc (team->prev_ts.place_partition_len
572 1.5 mrg * sizeof (struct gomp_thread *));
573 1.5 mrg else
574 1.5 mrg affinity_thr
575 1.5 mrg = gomp_alloca (team->prev_ts.place_partition_len
576 1.5 mrg * sizeof (struct gomp_thread *));
577 1.5 mrg memset (affinity_thr, '\0',
578 1.5 mrg team->prev_ts.place_partition_len
579 1.5 mrg * sizeof (struct gomp_thread *));
580 1.5 mrg for (j = i; j < old_threads_used; j++)
581 1.5 mrg {
582 1.5 mrg if (pool->threads[j]->place
583 1.5 mrg > team->prev_ts.place_partition_off
584 1.5 mrg && (pool->threads[j]->place
585 1.5 mrg <= (team->prev_ts.place_partition_off
586 1.5 mrg + team->prev_ts.place_partition_len)))
587 1.5 mrg {
588 1.5 mrg l = pool->threads[j]->place - 1
589 1.5 mrg - team->prev_ts.place_partition_off;
590 1.5 mrg pool->threads[j]->data = affinity_thr[l];
591 1.5 mrg affinity_thr[l] = pool->threads[j];
592 1.5 mrg }
593 1.5 mrg pool->threads[j] = NULL;
594 1.5 mrg }
595 1.5 mrg if (nthreads > old_threads_used)
596 1.5 mrg memset (&pool->threads[old_threads_used],
597 1.5 mrg '\0', ((nthreads - old_threads_used)
598 1.5 mrg * sizeof (struct gomp_thread *)));
599 1.5 mrg n = nthreads;
600 1.5 mrg affinity_count = old_threads_used - i;
601 1.5 mrg }
602 1.5 mrg if (affinity_count == 0)
603 1.5 mrg break;
604 1.5 mrg l = p;
605 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off]
606 1.5 mrg == NULL)
607 1.5 mrg {
608 1.5 mrg if (bind != omp_proc_bind_true)
609 1.5 mrg continue;
610 1.5 mrg for (l = place_partition_off;
611 1.5 mrg l < place_partition_off + place_partition_len;
612 1.5 mrg l++)
613 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off]
614 1.5 mrg != NULL)
615 1.5 mrg break;
616 1.5 mrg if (l == place_partition_off + place_partition_len)
617 1.5 mrg continue;
618 1.5 mrg }
619 1.5 mrg nthr = affinity_thr[l - team->prev_ts.place_partition_off];
620 1.5 mrg affinity_thr[l - team->prev_ts.place_partition_off]
621 1.5 mrg = (struct gomp_thread *) nthr->data;
622 1.5 mrg affinity_count--;
623 1.5 mrg pool->threads[i] = nthr;
624 1.5 mrg }
625 1.5 mrg else
626 1.5 mrg nthr = pool->threads[i];
627 1.5 mrg place = p + 1;
628 1.5 mrg }
629 1.5 mrg else
630 1.5 mrg nthr = pool->threads[i];
631 1.1 mrg nthr->ts.team = team;
632 1.1 mrg nthr->ts.work_share = &team->work_shares[0];
633 1.1 mrg nthr->ts.last_work_share = NULL;
634 1.1 mrg nthr->ts.team_id = i;
635 1.1 mrg nthr->ts.level = team->prev_ts.level + 1;
636 1.1 mrg nthr->ts.active_level = thr->ts.active_level;
637 1.5 mrg nthr->ts.place_partition_off = place_partition_off;
638 1.5 mrg nthr->ts.place_partition_len = place_partition_len;
639 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
640 1.1 mrg nthr->ts.single_count = 0;
641 1.1 mrg #endif
642 1.1 mrg nthr->ts.static_trip = 0;
643 1.1 mrg nthr->task = &team->implicit_task[i];
644 1.5 mrg nthr->place = place;
645 1.1 mrg gomp_init_task (nthr->task, task, icv);
646 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
647 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var;
648 1.10 mrg nthr->task->taskgroup = taskgroup;
649 1.1 mrg nthr->fn = fn;
650 1.1 mrg nthr->data = data;
651 1.1 mrg team->ordered_release[i] = &nthr->release;
652 1.1 mrg }
653 1.1 mrg
654 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0))
655 1.5 mrg {
656 1.5 mrg /* If AFFINITY_THR is non-NULL just because we had to
657 1.5 mrg permute some threads in the pool, but we've managed
658 1.5 mrg to find exactly as many old threads as we'd find
659 1.5 mrg without affinity, we don't need to handle this
660 1.5 mrg specially anymore. */
661 1.5 mrg if (nthreads <= old_threads_used
662 1.5 mrg ? (affinity_count == old_threads_used - nthreads)
663 1.5 mrg : (i == old_threads_used))
664 1.5 mrg {
665 1.5 mrg if (team->prev_ts.place_partition_len > 64)
666 1.5 mrg free (affinity_thr);
667 1.5 mrg affinity_thr = NULL;
668 1.5 mrg affinity_count = 0;
669 1.5 mrg }
670 1.5 mrg else
671 1.5 mrg {
672 1.5 mrg i = 1;
673 1.5 mrg /* We are going to compute the places/subpartitions
674 1.5 mrg again from the beginning. So, we need to reinitialize
675 1.5 mrg vars modified by the switch (bind) above inside
676 1.5 mrg of the loop, to the state they had after the initial
677 1.5 mrg switch (bind). */
678 1.5 mrg switch (bind)
679 1.5 mrg {
680 1.5 mrg case omp_proc_bind_true:
681 1.5 mrg case omp_proc_bind_close:
682 1.5 mrg if (nthreads > thr->ts.place_partition_len)
683 1.5 mrg /* T > P. S has been changed, so needs
684 1.5 mrg to be recomputed. */
685 1.5 mrg s = nthreads / thr->ts.place_partition_len;
686 1.5 mrg k = 1;
687 1.5 mrg p = thr->place - 1;
688 1.5 mrg break;
689 1.5 mrg case omp_proc_bind_master:
690 1.5 mrg /* No vars have been changed. */
691 1.5 mrg break;
692 1.5 mrg case omp_proc_bind_spread:
693 1.5 mrg p = thr->ts.place_partition_off;
694 1.5 mrg if (k != 0)
695 1.5 mrg {
696 1.5 mrg /* T > P. */
697 1.5 mrg s = nthreads / team->prev_ts.place_partition_len;
698 1.5 mrg k = 1;
699 1.5 mrg }
700 1.5 mrg break;
701 1.5 mrg }
702 1.5 mrg
703 1.5 mrg /* Increase the barrier threshold to make sure all new
704 1.5 mrg threads and all the threads we're going to let die
705 1.5 mrg arrive before the team is released. */
706 1.5 mrg if (affinity_count)
707 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock,
708 1.8 mrg nthreads + affinity_count);
709 1.5 mrg }
710 1.5 mrg }
711 1.5 mrg
712 1.1 mrg if (i == nthreads)
713 1.1 mrg goto do_release;
714 1.1 mrg
715 1.1 mrg }
716 1.1 mrg
717 1.5 mrg if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
718 1.1 mrg {
719 1.5 mrg long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
720 1.1 mrg
721 1.1 mrg if (old_threads_used == 0)
722 1.1 mrg --diff;
723 1.1 mrg
724 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
725 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff);
726 1.1 mrg #else
727 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
728 1.1 mrg gomp_managed_threads += diff;
729 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
730 1.1 mrg #endif
731 1.1 mrg }
732 1.1 mrg
733 1.1 mrg attr = &gomp_thread_attr;
734 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
735 1.1 mrg {
736 1.1 mrg size_t stacksize;
737 1.1 mrg pthread_attr_init (&thread_attr);
738 1.1 mrg if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
739 1.1 mrg pthread_attr_setstacksize (&thread_attr, stacksize);
740 1.1 mrg attr = &thread_attr;
741 1.1 mrg }
742 1.1 mrg
743 1.1 mrg start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
744 1.10 mrg * (nthreads - i));
745 1.1 mrg
746 1.1 mrg /* Launch new threads. */
747 1.5 mrg for (; i < nthreads; ++i)
748 1.1 mrg {
749 1.1 mrg int err;
750 1.1 mrg
751 1.5 mrg start_data->ts.place_partition_off = thr->ts.place_partition_off;
752 1.5 mrg start_data->ts.place_partition_len = thr->ts.place_partition_len;
753 1.5 mrg start_data->place = 0;
754 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0))
755 1.5 mrg {
756 1.5 mrg switch (bind)
757 1.5 mrg {
758 1.5 mrg case omp_proc_bind_true:
759 1.5 mrg case omp_proc_bind_close:
760 1.5 mrg if (k == s)
761 1.5 mrg {
762 1.5 mrg ++p;
763 1.5 mrg if (p == (team->prev_ts.place_partition_off
764 1.5 mrg + team->prev_ts.place_partition_len))
765 1.5 mrg p = team->prev_ts.place_partition_off;
766 1.5 mrg k = 1;
767 1.5 mrg if (i == nthreads - rest)
768 1.5 mrg s = 1;
769 1.5 mrg }
770 1.5 mrg else
771 1.5 mrg ++k;
772 1.5 mrg break;
773 1.5 mrg case omp_proc_bind_master:
774 1.5 mrg break;
775 1.5 mrg case omp_proc_bind_spread:
776 1.5 mrg if (k == 0)
777 1.5 mrg {
778 1.5 mrg /* T <= P. */
779 1.5 mrg if (p < rest)
780 1.5 mrg p += s + 1;
781 1.5 mrg else
782 1.5 mrg p += s;
783 1.5 mrg if (p == (team->prev_ts.place_partition_off
784 1.5 mrg + team->prev_ts.place_partition_len))
785 1.5 mrg p = team->prev_ts.place_partition_off;
786 1.5 mrg start_data->ts.place_partition_off = p;
787 1.5 mrg if (p < rest)
788 1.5 mrg start_data->ts.place_partition_len = s + 1;
789 1.5 mrg else
790 1.5 mrg start_data->ts.place_partition_len = s;
791 1.5 mrg }
792 1.5 mrg else
793 1.5 mrg {
794 1.5 mrg /* T > P. */
795 1.5 mrg if (k == s)
796 1.5 mrg {
797 1.5 mrg ++p;
798 1.5 mrg if (p == (team->prev_ts.place_partition_off
799 1.5 mrg + team->prev_ts.place_partition_len))
800 1.5 mrg p = team->prev_ts.place_partition_off;
801 1.5 mrg k = 1;
802 1.5 mrg if (i == nthreads - rest)
803 1.5 mrg s = 1;
804 1.5 mrg }
805 1.5 mrg else
806 1.5 mrg ++k;
807 1.5 mrg start_data->ts.place_partition_off = p;
808 1.5 mrg start_data->ts.place_partition_len = 1;
809 1.5 mrg }
810 1.5 mrg break;
811 1.5 mrg }
812 1.5 mrg start_data->place = p + 1;
813 1.5 mrg if (affinity_thr != NULL && pool->threads[i] != NULL)
814 1.5 mrg continue;
815 1.5 mrg gomp_init_thread_affinity (attr, p);
816 1.5 mrg }
817 1.5 mrg
818 1.1 mrg start_data->fn = fn;
819 1.1 mrg start_data->fn_data = data;
820 1.1 mrg start_data->ts.team = team;
821 1.1 mrg start_data->ts.work_share = &team->work_shares[0];
822 1.1 mrg start_data->ts.last_work_share = NULL;
823 1.1 mrg start_data->ts.team_id = i;
824 1.1 mrg start_data->ts.level = team->prev_ts.level + 1;
825 1.1 mrg start_data->ts.active_level = thr->ts.active_level;
826 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
827 1.1 mrg start_data->ts.single_count = 0;
828 1.1 mrg #endif
829 1.1 mrg start_data->ts.static_trip = 0;
830 1.1 mrg start_data->task = &team->implicit_task[i];
831 1.1 mrg gomp_init_task (start_data->task, task, icv);
832 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var;
833 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var;
834 1.10 mrg start_data->task->taskgroup = taskgroup;
835 1.1 mrg start_data->thread_pool = pool;
836 1.1 mrg start_data->nested = nested;
837 1.1 mrg
838 1.6 mrg attr = gomp_adjust_thread_attr (attr, &thread_attr);
839 1.10 mrg err = pthread_create (&start_data->handle, attr, gomp_thread_start,
840 1.10 mrg start_data);
841 1.10 mrg start_data++;
842 1.1 mrg if (err != 0)
843 1.1 mrg gomp_fatal ("Thread creation failed: %s", strerror (err));
844 1.1 mrg }
845 1.1 mrg
846 1.6 mrg if (__builtin_expect (attr == &thread_attr, 0))
847 1.1 mrg pthread_attr_destroy (&thread_attr);
848 1.1 mrg
849 1.1 mrg do_release:
850 1.8 mrg if (nested)
851 1.8 mrg gomp_barrier_wait (&team->barrier);
852 1.8 mrg else
853 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock);
854 1.1 mrg
855 1.1 mrg /* Decrease the barrier threshold to match the number of threads
856 1.1 mrg that should arrive back at the end of this team. The extra
857 1.1 mrg threads should be exiting. Note that we arrange for this test
858 1.5 mrg to never be true for nested teams. If AFFINITY_COUNT is non-zero,
859 1.5 mrg the barrier as well as gomp_managed_threads was temporarily
860 1.5 mrg set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
861 1.5 mrg AFFINITY_COUNT if non-zero will be always at least
862 1.5 mrg OLD_THREADS_COUNT - NTHREADS. */
863 1.5 mrg if (__builtin_expect (nthreads < old_threads_used, 0)
864 1.5 mrg || __builtin_expect (affinity_count, 0))
865 1.1 mrg {
866 1.1 mrg long diff = (long) nthreads - (long) old_threads_used;
867 1.1 mrg
868 1.5 mrg if (affinity_count)
869 1.5 mrg diff = -affinity_count;
870 1.5 mrg
871 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
872 1.1 mrg
873 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
874 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff);
875 1.1 mrg #else
876 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
877 1.1 mrg gomp_managed_threads += diff;
878 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
879 1.1 mrg #endif
880 1.1 mrg }
881 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0))
882 1.10 mrg {
883 1.10 mrg if (nested
884 1.10 mrg || nthreads != old_threads_used
885 1.10 mrg || force_display)
886 1.10 mrg {
887 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
888 1.10 mrg thr->place);
889 1.10 mrg if (nested)
890 1.10 mrg {
891 1.10 mrg start_data -= nthreads - 1;
892 1.10 mrg for (i = 1; i < nthreads; ++i)
893 1.10 mrg {
894 1.10 mrg gomp_display_affinity_thread (
895 1.10 mrg #ifdef LIBGOMP_USE_PTHREADS
896 1.10 mrg start_data->handle,
897 1.10 mrg #else
898 1.10 mrg gomp_thread_self (),
899 1.10 mrg #endif
900 1.10 mrg &start_data->ts,
901 1.10 mrg start_data->place);
902 1.10 mrg start_data++;
903 1.10 mrg }
904 1.10 mrg }
905 1.10 mrg else
906 1.10 mrg {
907 1.10 mrg for (i = 1; i < nthreads; ++i)
908 1.10 mrg {
909 1.10 mrg gomp_thread_handle handle
910 1.10 mrg = gomp_thread_to_pthread_t (pool->threads[i]);
911 1.10 mrg gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
912 1.10 mrg pool->threads[i]->place);
913 1.10 mrg }
914 1.10 mrg }
915 1.10 mrg }
916 1.10 mrg }
917 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0)
918 1.5 mrg && team->prev_ts.place_partition_len > 64)
919 1.5 mrg free (affinity_thr);
920 1.1 mrg }
921 1.8 mrg #endif
922 1.1 mrg
923 1.1 mrg
924 1.1 mrg /* Terminate the current team. This is only to be called by the master
925 1.1 mrg thread. We assume that we must wait for the other threads. */
926 1.1 mrg
927 1.1 mrg void
928 1.1 mrg gomp_team_end (void)
929 1.1 mrg {
930 1.1 mrg struct gomp_thread *thr = gomp_thread ();
931 1.1 mrg struct gomp_team *team = thr->ts.team;
932 1.1 mrg
933 1.5 mrg /* This barrier handles all pending explicit threads.
934 1.5 mrg As #pragma omp cancel parallel might get awaited count in
935 1.5 mrg team->barrier in a inconsistent state, we need to use a different
936 1.5 mrg counter here. */
937 1.5 mrg gomp_team_barrier_wait_final (&team->barrier);
938 1.5 mrg if (__builtin_expect (team->team_cancelled, 0))
939 1.5 mrg {
940 1.5 mrg struct gomp_work_share *ws = team->work_shares_to_free;
941 1.5 mrg do
942 1.5 mrg {
943 1.5 mrg struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
944 1.5 mrg if (next_ws == NULL)
945 1.5 mrg gomp_ptrlock_set (&ws->next_ws, ws);
946 1.5 mrg gomp_fini_work_share (ws);
947 1.5 mrg ws = next_ws;
948 1.5 mrg }
949 1.5 mrg while (ws != NULL);
950 1.5 mrg }
951 1.5 mrg else
952 1.5 mrg gomp_fini_work_share (thr->ts.work_share);
953 1.1 mrg
954 1.1 mrg gomp_end_task ();
955 1.1 mrg thr->ts = team->prev_ts;
956 1.1 mrg
957 1.10 mrg if (__builtin_expect (thr->ts.level != 0, 0))
958 1.1 mrg {
959 1.1 mrg #ifdef HAVE_SYNC_BUILTINS
960 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
961 1.1 mrg #else
962 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
963 1.1 mrg gomp_managed_threads -= team->nthreads - 1L;
964 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
965 1.1 mrg #endif
966 1.1 mrg /* This barrier has gomp_barrier_wait_last counterparts
967 1.1 mrg and ensures the team can be safely destroyed. */
968 1.1 mrg gomp_barrier_wait (&team->barrier);
969 1.1 mrg }
970 1.1 mrg
971 1.1 mrg if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
972 1.1 mrg {
973 1.1 mrg struct gomp_work_share *ws = team->work_shares[0].next_alloc;
974 1.1 mrg do
975 1.1 mrg {
976 1.1 mrg struct gomp_work_share *next_ws = ws->next_alloc;
977 1.1 mrg free (ws);
978 1.1 mrg ws = next_ws;
979 1.1 mrg }
980 1.1 mrg while (ws != NULL);
981 1.1 mrg }
982 1.1 mrg gomp_sem_destroy (&team->master_release);
983 1.1 mrg
984 1.1 mrg if (__builtin_expect (thr->ts.team != NULL, 0)
985 1.1 mrg || __builtin_expect (team->nthreads == 1, 0))
986 1.1 mrg free_team (team);
987 1.1 mrg else
988 1.1 mrg {
989 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool;
990 1.1 mrg if (pool->last_team)
991 1.1 mrg free_team (pool->last_team);
992 1.1 mrg pool->last_team = team;
993 1.6 mrg gomp_release_thread_pool (pool);
994 1.1 mrg }
995 1.1 mrg }
996 1.1 mrg
997 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
998 1.1 mrg
999 1.1 mrg /* Constructors for this file. */
1000 1.1 mrg
1001 1.1 mrg static void __attribute__((constructor))
1002 1.1 mrg initialize_team (void)
1003 1.1 mrg {
1004 1.5 mrg #if !defined HAVE_TLS && !defined USE_EMUTLS
1005 1.1 mrg static struct gomp_thread initial_thread_tls_data;
1006 1.1 mrg
1007 1.1 mrg pthread_key_create (&gomp_tls_key, NULL);
1008 1.1 mrg pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1009 1.1 mrg #endif
1010 1.1 mrg
1011 1.1 mrg if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1012 1.1 mrg gomp_fatal ("could not create thread pool destructor.");
1013 1.1 mrg }
1014 1.1 mrg
1015 1.1 mrg static void __attribute__((destructor))
1016 1.1 mrg team_destructor (void)
1017 1.1 mrg {
1018 1.1 mrg /* Without this dlclose on libgomp could lead to subsequent
1019 1.1 mrg crashes. */
1020 1.1 mrg pthread_key_delete (gomp_thread_destructor);
1021 1.1 mrg }
1022 1.10 mrg
1023 1.10 mrg /* Similar to gomp_free_pool_helper, but don't detach itself,
1024 1.10 mrg gomp_pause_host will pthread_join those threads. */
1025 1.10 mrg
1026 1.10 mrg static void
1027 1.10 mrg gomp_pause_pool_helper (void *thread_pool)
1028 1.10 mrg {
1029 1.10 mrg struct gomp_thread *thr = gomp_thread ();
1030 1.10 mrg struct gomp_thread_pool *pool
1031 1.10 mrg = (struct gomp_thread_pool *) thread_pool;
1032 1.10 mrg gomp_simple_barrier_wait_last (&pool->threads_dock);
1033 1.10 mrg gomp_sem_destroy (&thr->release);
1034 1.10 mrg thr->thread_pool = NULL;
1035 1.10 mrg thr->task = NULL;
1036 1.10 mrg pthread_exit (NULL);
1037 1.10 mrg }
1038 1.10 mrg
1039 1.10 mrg /* Free a thread pool and release its threads. Return non-zero on
1040 1.10 mrg failure. */
1041 1.10 mrg
1042 1.10 mrg int
1043 1.10 mrg gomp_pause_host (void)
1044 1.10 mrg {
1045 1.10 mrg struct gomp_thread *thr = gomp_thread ();
1046 1.10 mrg struct gomp_thread_pool *pool = thr->thread_pool;
1047 1.10 mrg if (thr->ts.level)
1048 1.10 mrg return -1;
1049 1.10 mrg if (pool)
1050 1.10 mrg {
1051 1.10 mrg if (pool->threads_used > 0)
1052 1.10 mrg {
1053 1.10 mrg int i;
1054 1.10 mrg pthread_t *thrs
1055 1.10 mrg = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1056 1.10 mrg for (i = 1; i < pool->threads_used; i++)
1057 1.10 mrg {
1058 1.10 mrg struct gomp_thread *nthr = pool->threads[i];
1059 1.10 mrg nthr->fn = gomp_pause_pool_helper;
1060 1.10 mrg nthr->data = pool;
1061 1.10 mrg thrs[i] = gomp_thread_to_pthread_t (nthr);
1062 1.10 mrg }
1063 1.10 mrg /* This barrier undocks threads docked on pool->threads_dock. */
1064 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock);
1065 1.10 mrg /* And this waits till all threads have called gomp_barrier_wait_last
1066 1.10 mrg in gomp_pause_pool_helper. */
1067 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock);
1068 1.10 mrg /* Now it is safe to destroy the barrier and free the pool. */
1069 1.10 mrg gomp_simple_barrier_destroy (&pool->threads_dock);
1070 1.10 mrg
1071 1.10 mrg #ifdef HAVE_SYNC_BUILTINS
1072 1.10 mrg __sync_fetch_and_add (&gomp_managed_threads,
1073 1.10 mrg 1L - pool->threads_used);
1074 1.10 mrg #else
1075 1.10 mrg gomp_mutex_lock (&gomp_managed_threads_lock);
1076 1.10 mrg gomp_managed_threads -= pool->threads_used - 1L;
1077 1.10 mrg gomp_mutex_unlock (&gomp_managed_threads_lock);
1078 1.10 mrg #endif
1079 1.10 mrg for (i = 1; i < pool->threads_used; i++)
1080 1.10 mrg pthread_join (thrs[i], NULL);
1081 1.10 mrg }
1082 1.10 mrg if (pool->last_team)
1083 1.10 mrg free_team (pool->last_team);
1084 1.10 mrg #ifndef __nvptx__
1085 1.11 mrg team_free (pool->threads);
1086 1.11 mrg team_free (pool);
1087 1.10 mrg #endif
1088 1.10 mrg thr->thread_pool = NULL;
1089 1.10 mrg }
1090 1.10 mrg return 0;
1091 1.10 mrg }
1092 1.8 mrg #endif
1093 1.1 mrg
1094 1.1 mrg struct gomp_task_icv *
1095 1.1 mrg gomp_new_icv (void)
1096 1.1 mrg {
1097 1.1 mrg struct gomp_thread *thr = gomp_thread ();
1098 1.1 mrg struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1099 1.1 mrg gomp_init_task (task, NULL, &gomp_global_icv);
1100 1.1 mrg thr->task = task;
1101 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS
1102 1.1 mrg pthread_setspecific (gomp_thread_destructor, thr);
1103 1.8 mrg #endif
1104 1.1 mrg return &task->icv;
1105 1.1 mrg }
1106