1 1.12 mrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Richard Henderson <rth (at) redhat.com>. 3 1.1 mrg 4 1.5 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.5 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.11 mrg /* This file handles the maintenance of threads in response to team 27 1.1 mrg creation and termination. */ 28 1.1 mrg 29 1.1 mrg #include "libgomp.h" 30 1.6 mrg #include "pool.h" 31 1.1 mrg #include <stdlib.h> 32 1.1 mrg #include <string.h> 33 1.1 mrg 34 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS 35 1.1 mrg pthread_attr_t gomp_thread_attr; 36 1.1 mrg 37 1.1 mrg /* This key is for the thread destructor. */ 38 1.1 mrg pthread_key_t gomp_thread_destructor; 39 1.1 mrg 40 1.1 mrg 41 1.1 mrg /* This is the libgomp per-thread data structure. */ 42 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS 43 1.1 mrg __thread struct gomp_thread gomp_tls_data; 44 1.1 mrg #else 45 1.1 mrg pthread_key_t gomp_tls_key; 46 1.1 mrg #endif 47 1.1 mrg 48 1.1 mrg 49 1.1 mrg /* This structure is used to communicate across pthread_create. */ 50 1.1 mrg 51 1.1 mrg struct gomp_thread_start_data 52 1.1 mrg { 53 1.1 mrg void (*fn) (void *); 54 1.1 mrg void *fn_data; 55 1.1 mrg struct gomp_team_state ts; 56 1.1 mrg struct gomp_task *task; 57 1.1 mrg struct gomp_thread_pool *thread_pool; 58 1.5 mrg unsigned int place; 59 1.12 mrg unsigned int num_teams; 60 1.12 mrg unsigned int team_num; 61 1.1 mrg bool nested; 62 1.10 mrg pthread_t handle; 63 1.1 mrg }; 64 1.1 mrg 65 1.1 mrg 66 1.1 mrg /* This function is a pthread_create entry point. This contains the idle 67 1.1 mrg loop in which a thread waits to be called up to become part of a team. */ 68 1.1 mrg 69 1.1 mrg static void * 70 1.1 mrg gomp_thread_start (void *xdata) 71 1.1 mrg { 72 1.1 mrg struct gomp_thread_start_data *data = xdata; 73 1.1 mrg struct gomp_thread *thr; 74 1.1 mrg struct gomp_thread_pool *pool; 75 1.1 mrg void (*local_fn) (void *); 76 1.1 mrg void *local_data; 77 1.1 mrg 78 1.5 mrg #if defined HAVE_TLS || defined USE_EMUTLS 79 1.1 mrg thr = &gomp_tls_data; 80 1.1 mrg #else 81 1.1 mrg struct gomp_thread local_thr; 82 1.1 mrg thr = &local_thr; 83 1.1 mrg #endif 84 1.1 mrg gomp_sem_init (&thr->release, 0); 85 1.1 mrg 86 1.1 mrg /* Extract what we need from data. */ 87 1.1 mrg local_fn = data->fn; 88 1.1 mrg local_data = data->fn_data; 89 1.1 mrg thr->thread_pool = data->thread_pool; 90 1.1 mrg thr->ts = data->ts; 91 1.1 mrg thr->task = data->task; 92 1.5 mrg thr->place = data->place; 93 1.12 mrg thr->num_teams = data->num_teams; 94 1.12 mrg thr->team_num = data->team_num; 95 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE 96 1.10 mrg thr->handle = data->handle; 97 1.10 mrg #endif 98 1.12 mrg #if !(defined HAVE_TLS || defined USE_EMUTLS) 99 1.12 mrg pthread_setspecific (gomp_tls_key, thr); 100 1.12 mrg #endif 101 1.1 mrg 102 1.1 mrg thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 103 1.1 mrg 104 1.1 mrg /* Make thread pool local. */ 105 1.1 mrg pool = thr->thread_pool; 106 1.1 mrg 107 1.1 mrg if (data->nested) 108 1.1 mrg { 109 1.1 mrg struct gomp_team *team = thr->ts.team; 110 1.1 mrg struct gomp_task *task = thr->task; 111 1.1 mrg 112 1.1 mrg gomp_barrier_wait (&team->barrier); 113 1.1 mrg 114 1.1 mrg local_fn (local_data); 115 1.5 mrg gomp_team_barrier_wait_final (&team->barrier); 116 1.1 mrg gomp_finish_task (task); 117 1.1 mrg gomp_barrier_wait_last (&team->barrier); 118 1.1 mrg } 119 1.1 mrg else 120 1.1 mrg { 121 1.1 mrg pool->threads[thr->ts.team_id] = thr; 122 1.1 mrg 123 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock); 124 1.1 mrg do 125 1.1 mrg { 126 1.1 mrg struct gomp_team *team = thr->ts.team; 127 1.1 mrg struct gomp_task *task = thr->task; 128 1.1 mrg 129 1.1 mrg local_fn (local_data); 130 1.5 mrg gomp_team_barrier_wait_final (&team->barrier); 131 1.1 mrg gomp_finish_task (task); 132 1.1 mrg 133 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock); 134 1.1 mrg 135 1.1 mrg local_fn = thr->fn; 136 1.1 mrg local_data = thr->data; 137 1.1 mrg thr->fn = NULL; 138 1.1 mrg } 139 1.1 mrg while (local_fn); 140 1.1 mrg } 141 1.1 mrg 142 1.1 mrg gomp_sem_destroy (&thr->release); 143 1.10 mrg pthread_detach (pthread_self ()); 144 1.5 mrg thr->thread_pool = NULL; 145 1.5 mrg thr->task = NULL; 146 1.1 mrg return NULL; 147 1.1 mrg } 148 1.8 mrg #endif 149 1.1 mrg 150 1.6 mrg static inline struct gomp_team * 151 1.6 mrg get_last_team (unsigned nthreads) 152 1.6 mrg { 153 1.6 mrg struct gomp_thread *thr = gomp_thread (); 154 1.6 mrg if (thr->ts.team == NULL) 155 1.6 mrg { 156 1.6 mrg struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); 157 1.6 mrg struct gomp_team *last_team = pool->last_team; 158 1.6 mrg if (last_team != NULL && last_team->nthreads == nthreads) 159 1.6 mrg { 160 1.6 mrg pool->last_team = NULL; 161 1.6 mrg return last_team; 162 1.6 mrg } 163 1.6 mrg } 164 1.6 mrg return NULL; 165 1.6 mrg } 166 1.1 mrg 167 1.1 mrg /* Create a new team data structure. */ 168 1.1 mrg 169 1.1 mrg struct gomp_team * 170 1.1 mrg gomp_new_team (unsigned nthreads) 171 1.1 mrg { 172 1.1 mrg struct gomp_team *team; 173 1.1 mrg int i; 174 1.1 mrg 175 1.6 mrg team = get_last_team (nthreads); 176 1.6 mrg if (team == NULL) 177 1.6 mrg { 178 1.6 mrg size_t extra = sizeof (team->ordered_release[0]) 179 1.6 mrg + sizeof (team->implicit_task[0]); 180 1.12 mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES 181 1.12 mrg team = gomp_aligned_alloc (__alignof (struct gomp_team), 182 1.12 mrg sizeof (*team) + nthreads * extra); 183 1.12 mrg #else 184 1.11 mrg team = team_malloc (sizeof (*team) + nthreads * extra); 185 1.12 mrg #endif 186 1.6 mrg 187 1.6 mrg #ifndef HAVE_SYNC_BUILTINS 188 1.6 mrg gomp_mutex_init (&team->work_share_list_free_lock); 189 1.6 mrg #endif 190 1.6 mrg gomp_barrier_init (&team->barrier, nthreads); 191 1.6 mrg gomp_mutex_init (&team->task_lock); 192 1.6 mrg 193 1.6 mrg team->nthreads = nthreads; 194 1.6 mrg } 195 1.1 mrg 196 1.1 mrg team->work_share_chunk = 8; 197 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 198 1.1 mrg team->single_count = 0; 199 1.1 mrg #endif 200 1.5 mrg team->work_shares_to_free = &team->work_shares[0]; 201 1.10 mrg gomp_init_work_share (&team->work_shares[0], 0, nthreads); 202 1.1 mrg team->work_shares[0].next_alloc = NULL; 203 1.1 mrg team->work_share_list_free = NULL; 204 1.1 mrg team->work_share_list_alloc = &team->work_shares[1]; 205 1.1 mrg for (i = 1; i < 7; i++) 206 1.1 mrg team->work_shares[i].next_free = &team->work_shares[i + 1]; 207 1.1 mrg team->work_shares[i].next_free = NULL; 208 1.1 mrg 209 1.1 mrg gomp_sem_init (&team->master_release, 0); 210 1.1 mrg team->ordered_release = (void *) &team->implicit_task[nthreads]; 211 1.1 mrg team->ordered_release[0] = &team->master_release; 212 1.1 mrg 213 1.6 mrg priority_queue_init (&team->task_queue); 214 1.1 mrg team->task_count = 0; 215 1.5 mrg team->task_queued_count = 0; 216 1.1 mrg team->task_running_count = 0; 217 1.5 mrg team->work_share_cancelled = 0; 218 1.5 mrg team->team_cancelled = 0; 219 1.1 mrg 220 1.12 mrg team->task_detach_count = 0; 221 1.12 mrg 222 1.1 mrg return team; 223 1.1 mrg } 224 1.1 mrg 225 1.1 mrg 226 1.1 mrg /* Free a team data structure. */ 227 1.1 mrg 228 1.1 mrg static void 229 1.1 mrg free_team (struct gomp_team *team) 230 1.1 mrg { 231 1.6 mrg #ifndef HAVE_SYNC_BUILTINS 232 1.6 mrg gomp_mutex_destroy (&team->work_share_list_free_lock); 233 1.6 mrg #endif 234 1.1 mrg gomp_barrier_destroy (&team->barrier); 235 1.1 mrg gomp_mutex_destroy (&team->task_lock); 236 1.6 mrg priority_queue_free (&team->task_queue); 237 1.11 mrg team_free (team); 238 1.1 mrg } 239 1.1 mrg 240 1.1 mrg static void 241 1.1 mrg gomp_free_pool_helper (void *thread_pool) 242 1.1 mrg { 243 1.5 mrg struct gomp_thread *thr = gomp_thread (); 244 1.1 mrg struct gomp_thread_pool *pool 245 1.1 mrg = (struct gomp_thread_pool *) thread_pool; 246 1.8 mrg gomp_simple_barrier_wait_last (&pool->threads_dock); 247 1.5 mrg gomp_sem_destroy (&thr->release); 248 1.5 mrg thr->thread_pool = NULL; 249 1.5 mrg thr->task = NULL; 250 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS 251 1.10 mrg pthread_detach (pthread_self ()); 252 1.1 mrg pthread_exit (NULL); 253 1.8 mrg #elif defined(__nvptx__) 254 1.8 mrg asm ("exit;"); 255 1.11 mrg #elif defined(__AMDGCN__) 256 1.11 mrg asm ("s_dcache_wb\n\t" 257 1.11 mrg "s_endpgm"); 258 1.8 mrg #else 259 1.8 mrg #error gomp_free_pool_helper must terminate the thread 260 1.8 mrg #endif 261 1.1 mrg } 262 1.1 mrg 263 1.1 mrg /* Free a thread pool and release its threads. */ 264 1.1 mrg 265 1.5 mrg void 266 1.1 mrg gomp_free_thread (void *arg __attribute__((unused))) 267 1.1 mrg { 268 1.1 mrg struct gomp_thread *thr = gomp_thread (); 269 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool; 270 1.1 mrg if (pool) 271 1.1 mrg { 272 1.1 mrg if (pool->threads_used > 0) 273 1.1 mrg { 274 1.1 mrg int i; 275 1.1 mrg for (i = 1; i < pool->threads_used; i++) 276 1.1 mrg { 277 1.1 mrg struct gomp_thread *nthr = pool->threads[i]; 278 1.1 mrg nthr->fn = gomp_free_pool_helper; 279 1.1 mrg nthr->data = pool; 280 1.1 mrg } 281 1.1 mrg /* This barrier undocks threads docked on pool->threads_dock. */ 282 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock); 283 1.1 mrg /* And this waits till all threads have called gomp_barrier_wait_last 284 1.1 mrg in gomp_free_pool_helper. */ 285 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock); 286 1.1 mrg /* Now it is safe to destroy the barrier and free the pool. */ 287 1.8 mrg gomp_simple_barrier_destroy (&pool->threads_dock); 288 1.3 mrg 289 1.3 mrg #ifdef HAVE_SYNC_BUILTINS 290 1.3 mrg __sync_fetch_and_add (&gomp_managed_threads, 291 1.3 mrg 1L - pool->threads_used); 292 1.3 mrg #else 293 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock); 294 1.3 mrg gomp_managed_threads -= pool->threads_used - 1L; 295 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock); 296 1.3 mrg #endif 297 1.1 mrg } 298 1.1 mrg if (pool->last_team) 299 1.1 mrg free_team (pool->last_team); 300 1.8 mrg #ifndef __nvptx__ 301 1.11 mrg team_free (pool->threads); 302 1.11 mrg team_free (pool); 303 1.8 mrg #endif 304 1.1 mrg thr->thread_pool = NULL; 305 1.1 mrg } 306 1.6 mrg if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) 307 1.6 mrg gomp_team_end (); 308 1.1 mrg if (thr->task != NULL) 309 1.1 mrg { 310 1.1 mrg struct gomp_task *task = thr->task; 311 1.1 mrg gomp_end_task (); 312 1.1 mrg free (task); 313 1.1 mrg } 314 1.1 mrg } 315 1.1 mrg 316 1.1 mrg /* Launch a team. */ 317 1.1 mrg 318 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS 319 1.1 mrg void 320 1.1 mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 321 1.10 mrg unsigned flags, struct gomp_team *team, 322 1.10 mrg struct gomp_taskgroup *taskgroup) 323 1.1 mrg { 324 1.12 mrg struct gomp_thread_start_data *start_data = NULL; 325 1.1 mrg struct gomp_thread *thr, *nthr; 326 1.1 mrg struct gomp_task *task; 327 1.1 mrg struct gomp_task_icv *icv; 328 1.1 mrg bool nested; 329 1.1 mrg struct gomp_thread_pool *pool; 330 1.1 mrg unsigned i, n, old_threads_used = 0; 331 1.1 mrg pthread_attr_t thread_attr, *attr; 332 1.3 mrg unsigned long nthreads_var; 333 1.5 mrg char bind, bind_var; 334 1.5 mrg unsigned int s = 0, rest = 0, p = 0, k = 0; 335 1.5 mrg unsigned int affinity_count = 0; 336 1.5 mrg struct gomp_thread **affinity_thr = NULL; 337 1.10 mrg bool force_display = false; 338 1.1 mrg 339 1.1 mrg thr = gomp_thread (); 340 1.6 mrg nested = thr->ts.level; 341 1.1 mrg pool = thr->thread_pool; 342 1.1 mrg task = thr->task; 343 1.1 mrg icv = task ? &task->icv : &gomp_global_icv; 344 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) 345 1.10 mrg { 346 1.10 mrg gomp_init_affinity (); 347 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1) 348 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, 349 1.10 mrg thr->place); 350 1.10 mrg } 351 1.1 mrg 352 1.1 mrg /* Always save the previous state, even if this isn't a nested team. 353 1.1 mrg In particular, we should save any work share state from an outer 354 1.1 mrg orphaned work share construct. */ 355 1.1 mrg team->prev_ts = thr->ts; 356 1.1 mrg 357 1.1 mrg thr->ts.team = team; 358 1.1 mrg thr->ts.team_id = 0; 359 1.1 mrg ++thr->ts.level; 360 1.1 mrg if (nthreads > 1) 361 1.1 mrg ++thr->ts.active_level; 362 1.1 mrg thr->ts.work_share = &team->work_shares[0]; 363 1.1 mrg thr->ts.last_work_share = NULL; 364 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 365 1.1 mrg thr->ts.single_count = 0; 366 1.1 mrg #endif 367 1.1 mrg thr->ts.static_trip = 0; 368 1.1 mrg thr->task = &team->implicit_task[0]; 369 1.10 mrg #ifdef GOMP_NEEDS_THREAD_HANDLE 370 1.10 mrg thr->handle = pthread_self (); 371 1.10 mrg #endif 372 1.3 mrg nthreads_var = icv->nthreads_var; 373 1.3 mrg if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 374 1.3 mrg && thr->ts.level < gomp_nthreads_var_list_len) 375 1.3 mrg nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 376 1.5 mrg bind_var = icv->bind_var; 377 1.5 mrg if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) 378 1.5 mrg bind_var = flags & 7; 379 1.5 mrg bind = bind_var; 380 1.5 mrg if (__builtin_expect (gomp_bind_var_list != NULL, 0) 381 1.5 mrg && thr->ts.level < gomp_bind_var_list_len) 382 1.5 mrg bind_var = gomp_bind_var_list[thr->ts.level]; 383 1.1 mrg gomp_init_task (thr->task, task, icv); 384 1.10 mrg thr->task->taskgroup = taskgroup; 385 1.3 mrg team->implicit_task[0].icv.nthreads_var = nthreads_var; 386 1.5 mrg team->implicit_task[0].icv.bind_var = bind_var; 387 1.1 mrg 388 1.1 mrg if (nthreads == 1) 389 1.1 mrg return; 390 1.1 mrg 391 1.1 mrg i = 1; 392 1.1 mrg 393 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0)) 394 1.5 mrg { 395 1.5 mrg /* Depending on chosen proc_bind model, set subpartition 396 1.5 mrg for the master thread and initialize helper variables 397 1.5 mrg P and optionally S, K and/or REST used by later place 398 1.5 mrg computation for each additional thread. */ 399 1.5 mrg p = thr->place - 1; 400 1.5 mrg switch (bind) 401 1.5 mrg { 402 1.5 mrg case omp_proc_bind_true: 403 1.5 mrg case omp_proc_bind_close: 404 1.5 mrg if (nthreads > thr->ts.place_partition_len) 405 1.5 mrg { 406 1.5 mrg /* T > P. S threads will be placed in each place, 407 1.5 mrg and the final REM threads placed one by one 408 1.5 mrg into the already occupied places. */ 409 1.5 mrg s = nthreads / thr->ts.place_partition_len; 410 1.5 mrg rest = nthreads % thr->ts.place_partition_len; 411 1.5 mrg } 412 1.5 mrg else 413 1.5 mrg s = 1; 414 1.5 mrg k = 1; 415 1.5 mrg break; 416 1.5 mrg case omp_proc_bind_master: 417 1.5 mrg /* Each thread will be bound to master's place. */ 418 1.5 mrg break; 419 1.5 mrg case omp_proc_bind_spread: 420 1.5 mrg if (nthreads <= thr->ts.place_partition_len) 421 1.5 mrg { 422 1.5 mrg /* T <= P. Each subpartition will have in between s 423 1.5 mrg and s+1 places (subpartitions starting at or 424 1.5 mrg after rest will have s places, earlier s+1 places), 425 1.5 mrg each thread will be bound to the first place in 426 1.5 mrg its subpartition (except for the master thread 427 1.5 mrg that can be bound to another place in its 428 1.5 mrg subpartition). */ 429 1.5 mrg s = thr->ts.place_partition_len / nthreads; 430 1.5 mrg rest = thr->ts.place_partition_len % nthreads; 431 1.5 mrg rest = (s + 1) * rest + thr->ts.place_partition_off; 432 1.5 mrg if (p < rest) 433 1.5 mrg { 434 1.5 mrg p -= (p - thr->ts.place_partition_off) % (s + 1); 435 1.5 mrg thr->ts.place_partition_len = s + 1; 436 1.5 mrg } 437 1.5 mrg else 438 1.5 mrg { 439 1.5 mrg p -= (p - rest) % s; 440 1.5 mrg thr->ts.place_partition_len = s; 441 1.5 mrg } 442 1.5 mrg thr->ts.place_partition_off = p; 443 1.5 mrg } 444 1.5 mrg else 445 1.5 mrg { 446 1.5 mrg /* T > P. Each subpartition will have just a single 447 1.5 mrg place and we'll place between s and s+1 448 1.5 mrg threads into each subpartition. */ 449 1.5 mrg s = nthreads / thr->ts.place_partition_len; 450 1.5 mrg rest = nthreads % thr->ts.place_partition_len; 451 1.5 mrg thr->ts.place_partition_off = p; 452 1.5 mrg thr->ts.place_partition_len = 1; 453 1.5 mrg k = 1; 454 1.5 mrg } 455 1.5 mrg break; 456 1.5 mrg } 457 1.5 mrg } 458 1.5 mrg else 459 1.5 mrg bind = omp_proc_bind_false; 460 1.5 mrg 461 1.1 mrg /* We only allow the reuse of idle threads for non-nested PARALLEL 462 1.1 mrg regions. This appears to be implied by the semantics of 463 1.1 mrg threadprivate variables, but perhaps that's reading too much into 464 1.1 mrg things. Certainly it does prevent any locking problems, since 465 1.1 mrg only the initial program thread will modify gomp_threads. */ 466 1.1 mrg if (!nested) 467 1.1 mrg { 468 1.1 mrg old_threads_used = pool->threads_used; 469 1.1 mrg 470 1.1 mrg if (nthreads <= old_threads_used) 471 1.1 mrg n = nthreads; 472 1.1 mrg else if (old_threads_used == 0) 473 1.1 mrg { 474 1.1 mrg n = 0; 475 1.8 mrg gomp_simple_barrier_init (&pool->threads_dock, nthreads); 476 1.1 mrg } 477 1.1 mrg else 478 1.1 mrg { 479 1.1 mrg n = old_threads_used; 480 1.1 mrg 481 1.1 mrg /* Increase the barrier threshold to make sure all new 482 1.1 mrg threads arrive before the team is released. */ 483 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 484 1.1 mrg } 485 1.1 mrg 486 1.1 mrg /* Not true yet, but soon will be. We're going to release all 487 1.1 mrg threads from the dock, and those that aren't part of the 488 1.1 mrg team will exit. */ 489 1.1 mrg pool->threads_used = nthreads; 490 1.1 mrg 491 1.5 mrg /* If necessary, expand the size of the gomp_threads array. It is 492 1.5 mrg expected that changes in the number of threads are rare, thus we 493 1.5 mrg make no effort to expand gomp_threads_size geometrically. */ 494 1.5 mrg if (nthreads >= pool->threads_size) 495 1.5 mrg { 496 1.5 mrg pool->threads_size = nthreads + 1; 497 1.5 mrg pool->threads 498 1.5 mrg = gomp_realloc (pool->threads, 499 1.5 mrg pool->threads_size 500 1.10 mrg * sizeof (struct gomp_thread *)); 501 1.10 mrg /* Add current (master) thread to threads[]. */ 502 1.10 mrg pool->threads[0] = thr; 503 1.5 mrg } 504 1.5 mrg 505 1.1 mrg /* Release existing idle threads. */ 506 1.1 mrg for (; i < n; ++i) 507 1.1 mrg { 508 1.5 mrg unsigned int place_partition_off = thr->ts.place_partition_off; 509 1.5 mrg unsigned int place_partition_len = thr->ts.place_partition_len; 510 1.5 mrg unsigned int place = 0; 511 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0)) 512 1.5 mrg { 513 1.5 mrg switch (bind) 514 1.5 mrg { 515 1.5 mrg case omp_proc_bind_true: 516 1.5 mrg case omp_proc_bind_close: 517 1.5 mrg if (k == s) 518 1.5 mrg { 519 1.5 mrg ++p; 520 1.5 mrg if (p == (team->prev_ts.place_partition_off 521 1.5 mrg + team->prev_ts.place_partition_len)) 522 1.5 mrg p = team->prev_ts.place_partition_off; 523 1.5 mrg k = 1; 524 1.5 mrg if (i == nthreads - rest) 525 1.5 mrg s = 1; 526 1.5 mrg } 527 1.5 mrg else 528 1.5 mrg ++k; 529 1.5 mrg break; 530 1.5 mrg case omp_proc_bind_master: 531 1.5 mrg break; 532 1.5 mrg case omp_proc_bind_spread: 533 1.5 mrg if (k == 0) 534 1.5 mrg { 535 1.5 mrg /* T <= P. */ 536 1.5 mrg if (p < rest) 537 1.5 mrg p += s + 1; 538 1.5 mrg else 539 1.5 mrg p += s; 540 1.5 mrg if (p == (team->prev_ts.place_partition_off 541 1.5 mrg + team->prev_ts.place_partition_len)) 542 1.5 mrg p = team->prev_ts.place_partition_off; 543 1.5 mrg place_partition_off = p; 544 1.5 mrg if (p < rest) 545 1.5 mrg place_partition_len = s + 1; 546 1.5 mrg else 547 1.5 mrg place_partition_len = s; 548 1.5 mrg } 549 1.5 mrg else 550 1.5 mrg { 551 1.5 mrg /* T > P. */ 552 1.5 mrg if (k == s) 553 1.5 mrg { 554 1.5 mrg ++p; 555 1.5 mrg if (p == (team->prev_ts.place_partition_off 556 1.5 mrg + team->prev_ts.place_partition_len)) 557 1.5 mrg p = team->prev_ts.place_partition_off; 558 1.5 mrg k = 1; 559 1.5 mrg if (i == nthreads - rest) 560 1.5 mrg s = 1; 561 1.5 mrg } 562 1.5 mrg else 563 1.5 mrg ++k; 564 1.5 mrg place_partition_off = p; 565 1.5 mrg place_partition_len = 1; 566 1.5 mrg } 567 1.5 mrg break; 568 1.5 mrg } 569 1.5 mrg if (affinity_thr != NULL 570 1.5 mrg || (bind != omp_proc_bind_true 571 1.5 mrg && pool->threads[i]->place != p + 1) 572 1.5 mrg || pool->threads[i]->place <= place_partition_off 573 1.5 mrg || pool->threads[i]->place > (place_partition_off 574 1.5 mrg + place_partition_len)) 575 1.5 mrg { 576 1.5 mrg unsigned int l; 577 1.10 mrg force_display = true; 578 1.5 mrg if (affinity_thr == NULL) 579 1.5 mrg { 580 1.5 mrg unsigned int j; 581 1.5 mrg 582 1.5 mrg if (team->prev_ts.place_partition_len > 64) 583 1.5 mrg affinity_thr 584 1.5 mrg = gomp_malloc (team->prev_ts.place_partition_len 585 1.5 mrg * sizeof (struct gomp_thread *)); 586 1.5 mrg else 587 1.5 mrg affinity_thr 588 1.5 mrg = gomp_alloca (team->prev_ts.place_partition_len 589 1.5 mrg * sizeof (struct gomp_thread *)); 590 1.5 mrg memset (affinity_thr, '\0', 591 1.5 mrg team->prev_ts.place_partition_len 592 1.5 mrg * sizeof (struct gomp_thread *)); 593 1.5 mrg for (j = i; j < old_threads_used; j++) 594 1.5 mrg { 595 1.5 mrg if (pool->threads[j]->place 596 1.5 mrg > team->prev_ts.place_partition_off 597 1.5 mrg && (pool->threads[j]->place 598 1.5 mrg <= (team->prev_ts.place_partition_off 599 1.5 mrg + team->prev_ts.place_partition_len))) 600 1.5 mrg { 601 1.5 mrg l = pool->threads[j]->place - 1 602 1.5 mrg - team->prev_ts.place_partition_off; 603 1.5 mrg pool->threads[j]->data = affinity_thr[l]; 604 1.5 mrg affinity_thr[l] = pool->threads[j]; 605 1.5 mrg } 606 1.5 mrg pool->threads[j] = NULL; 607 1.5 mrg } 608 1.5 mrg if (nthreads > old_threads_used) 609 1.5 mrg memset (&pool->threads[old_threads_used], 610 1.5 mrg '\0', ((nthreads - old_threads_used) 611 1.5 mrg * sizeof (struct gomp_thread *))); 612 1.5 mrg n = nthreads; 613 1.5 mrg affinity_count = old_threads_used - i; 614 1.5 mrg } 615 1.5 mrg if (affinity_count == 0) 616 1.5 mrg break; 617 1.5 mrg l = p; 618 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off] 619 1.5 mrg == NULL) 620 1.5 mrg { 621 1.5 mrg if (bind != omp_proc_bind_true) 622 1.5 mrg continue; 623 1.5 mrg for (l = place_partition_off; 624 1.5 mrg l < place_partition_off + place_partition_len; 625 1.5 mrg l++) 626 1.5 mrg if (affinity_thr[l - team->prev_ts.place_partition_off] 627 1.5 mrg != NULL) 628 1.5 mrg break; 629 1.5 mrg if (l == place_partition_off + place_partition_len) 630 1.5 mrg continue; 631 1.5 mrg } 632 1.5 mrg nthr = affinity_thr[l - team->prev_ts.place_partition_off]; 633 1.5 mrg affinity_thr[l - team->prev_ts.place_partition_off] 634 1.5 mrg = (struct gomp_thread *) nthr->data; 635 1.5 mrg affinity_count--; 636 1.5 mrg pool->threads[i] = nthr; 637 1.5 mrg } 638 1.5 mrg else 639 1.5 mrg nthr = pool->threads[i]; 640 1.5 mrg place = p + 1; 641 1.5 mrg } 642 1.5 mrg else 643 1.5 mrg nthr = pool->threads[i]; 644 1.1 mrg nthr->ts.team = team; 645 1.1 mrg nthr->ts.work_share = &team->work_shares[0]; 646 1.1 mrg nthr->ts.last_work_share = NULL; 647 1.1 mrg nthr->ts.team_id = i; 648 1.1 mrg nthr->ts.level = team->prev_ts.level + 1; 649 1.1 mrg nthr->ts.active_level = thr->ts.active_level; 650 1.5 mrg nthr->ts.place_partition_off = place_partition_off; 651 1.5 mrg nthr->ts.place_partition_len = place_partition_len; 652 1.12 mrg nthr->ts.def_allocator = thr->ts.def_allocator; 653 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 654 1.1 mrg nthr->ts.single_count = 0; 655 1.1 mrg #endif 656 1.1 mrg nthr->ts.static_trip = 0; 657 1.12 mrg nthr->num_teams = thr->num_teams; 658 1.12 mrg nthr->team_num = thr->team_num; 659 1.1 mrg nthr->task = &team->implicit_task[i]; 660 1.5 mrg nthr->place = place; 661 1.1 mrg gomp_init_task (nthr->task, task, icv); 662 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var; 663 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var; 664 1.10 mrg nthr->task->taskgroup = taskgroup; 665 1.1 mrg nthr->fn = fn; 666 1.1 mrg nthr->data = data; 667 1.1 mrg team->ordered_release[i] = &nthr->release; 668 1.1 mrg } 669 1.1 mrg 670 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0)) 671 1.5 mrg { 672 1.5 mrg /* If AFFINITY_THR is non-NULL just because we had to 673 1.5 mrg permute some threads in the pool, but we've managed 674 1.5 mrg to find exactly as many old threads as we'd find 675 1.5 mrg without affinity, we don't need to handle this 676 1.5 mrg specially anymore. */ 677 1.5 mrg if (nthreads <= old_threads_used 678 1.5 mrg ? (affinity_count == old_threads_used - nthreads) 679 1.5 mrg : (i == old_threads_used)) 680 1.5 mrg { 681 1.5 mrg if (team->prev_ts.place_partition_len > 64) 682 1.5 mrg free (affinity_thr); 683 1.5 mrg affinity_thr = NULL; 684 1.5 mrg affinity_count = 0; 685 1.5 mrg } 686 1.5 mrg else 687 1.5 mrg { 688 1.5 mrg i = 1; 689 1.5 mrg /* We are going to compute the places/subpartitions 690 1.5 mrg again from the beginning. So, we need to reinitialize 691 1.5 mrg vars modified by the switch (bind) above inside 692 1.5 mrg of the loop, to the state they had after the initial 693 1.5 mrg switch (bind). */ 694 1.5 mrg switch (bind) 695 1.5 mrg { 696 1.5 mrg case omp_proc_bind_true: 697 1.5 mrg case omp_proc_bind_close: 698 1.5 mrg if (nthreads > thr->ts.place_partition_len) 699 1.5 mrg /* T > P. S has been changed, so needs 700 1.5 mrg to be recomputed. */ 701 1.5 mrg s = nthreads / thr->ts.place_partition_len; 702 1.5 mrg k = 1; 703 1.5 mrg p = thr->place - 1; 704 1.5 mrg break; 705 1.5 mrg case omp_proc_bind_master: 706 1.5 mrg /* No vars have been changed. */ 707 1.5 mrg break; 708 1.5 mrg case omp_proc_bind_spread: 709 1.5 mrg p = thr->ts.place_partition_off; 710 1.5 mrg if (k != 0) 711 1.5 mrg { 712 1.5 mrg /* T > P. */ 713 1.5 mrg s = nthreads / team->prev_ts.place_partition_len; 714 1.5 mrg k = 1; 715 1.5 mrg } 716 1.5 mrg break; 717 1.5 mrg } 718 1.5 mrg 719 1.5 mrg /* Increase the barrier threshold to make sure all new 720 1.5 mrg threads and all the threads we're going to let die 721 1.5 mrg arrive before the team is released. */ 722 1.5 mrg if (affinity_count) 723 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, 724 1.8 mrg nthreads + affinity_count); 725 1.5 mrg } 726 1.5 mrg } 727 1.5 mrg 728 1.1 mrg if (i == nthreads) 729 1.1 mrg goto do_release; 730 1.1 mrg 731 1.1 mrg } 732 1.1 mrg 733 1.5 mrg if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) 734 1.1 mrg { 735 1.5 mrg long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; 736 1.1 mrg 737 1.1 mrg if (old_threads_used == 0) 738 1.1 mrg --diff; 739 1.1 mrg 740 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 741 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff); 742 1.1 mrg #else 743 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock); 744 1.1 mrg gomp_managed_threads += diff; 745 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock); 746 1.1 mrg #endif 747 1.1 mrg } 748 1.1 mrg 749 1.1 mrg attr = &gomp_thread_attr; 750 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0)) 751 1.1 mrg { 752 1.1 mrg size_t stacksize; 753 1.1 mrg pthread_attr_init (&thread_attr); 754 1.1 mrg if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 755 1.1 mrg pthread_attr_setstacksize (&thread_attr, stacksize); 756 1.1 mrg attr = &thread_attr; 757 1.1 mrg } 758 1.1 mrg 759 1.1 mrg start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 760 1.10 mrg * (nthreads - i)); 761 1.1 mrg 762 1.1 mrg /* Launch new threads. */ 763 1.5 mrg for (; i < nthreads; ++i) 764 1.1 mrg { 765 1.1 mrg int err; 766 1.1 mrg 767 1.5 mrg start_data->ts.place_partition_off = thr->ts.place_partition_off; 768 1.5 mrg start_data->ts.place_partition_len = thr->ts.place_partition_len; 769 1.5 mrg start_data->place = 0; 770 1.5 mrg if (__builtin_expect (gomp_places_list != NULL, 0)) 771 1.5 mrg { 772 1.5 mrg switch (bind) 773 1.5 mrg { 774 1.5 mrg case omp_proc_bind_true: 775 1.5 mrg case omp_proc_bind_close: 776 1.5 mrg if (k == s) 777 1.5 mrg { 778 1.5 mrg ++p; 779 1.5 mrg if (p == (team->prev_ts.place_partition_off 780 1.5 mrg + team->prev_ts.place_partition_len)) 781 1.5 mrg p = team->prev_ts.place_partition_off; 782 1.5 mrg k = 1; 783 1.5 mrg if (i == nthreads - rest) 784 1.5 mrg s = 1; 785 1.5 mrg } 786 1.5 mrg else 787 1.5 mrg ++k; 788 1.5 mrg break; 789 1.5 mrg case omp_proc_bind_master: 790 1.5 mrg break; 791 1.5 mrg case omp_proc_bind_spread: 792 1.5 mrg if (k == 0) 793 1.5 mrg { 794 1.5 mrg /* T <= P. */ 795 1.5 mrg if (p < rest) 796 1.5 mrg p += s + 1; 797 1.5 mrg else 798 1.5 mrg p += s; 799 1.5 mrg if (p == (team->prev_ts.place_partition_off 800 1.5 mrg + team->prev_ts.place_partition_len)) 801 1.5 mrg p = team->prev_ts.place_partition_off; 802 1.5 mrg start_data->ts.place_partition_off = p; 803 1.5 mrg if (p < rest) 804 1.5 mrg start_data->ts.place_partition_len = s + 1; 805 1.5 mrg else 806 1.5 mrg start_data->ts.place_partition_len = s; 807 1.5 mrg } 808 1.5 mrg else 809 1.5 mrg { 810 1.5 mrg /* T > P. */ 811 1.5 mrg if (k == s) 812 1.5 mrg { 813 1.5 mrg ++p; 814 1.5 mrg if (p == (team->prev_ts.place_partition_off 815 1.5 mrg + team->prev_ts.place_partition_len)) 816 1.5 mrg p = team->prev_ts.place_partition_off; 817 1.5 mrg k = 1; 818 1.5 mrg if (i == nthreads - rest) 819 1.5 mrg s = 1; 820 1.5 mrg } 821 1.5 mrg else 822 1.5 mrg ++k; 823 1.5 mrg start_data->ts.place_partition_off = p; 824 1.5 mrg start_data->ts.place_partition_len = 1; 825 1.5 mrg } 826 1.5 mrg break; 827 1.5 mrg } 828 1.5 mrg start_data->place = p + 1; 829 1.5 mrg if (affinity_thr != NULL && pool->threads[i] != NULL) 830 1.5 mrg continue; 831 1.5 mrg gomp_init_thread_affinity (attr, p); 832 1.5 mrg } 833 1.5 mrg 834 1.1 mrg start_data->fn = fn; 835 1.1 mrg start_data->fn_data = data; 836 1.1 mrg start_data->ts.team = team; 837 1.1 mrg start_data->ts.work_share = &team->work_shares[0]; 838 1.1 mrg start_data->ts.last_work_share = NULL; 839 1.1 mrg start_data->ts.team_id = i; 840 1.1 mrg start_data->ts.level = team->prev_ts.level + 1; 841 1.1 mrg start_data->ts.active_level = thr->ts.active_level; 842 1.12 mrg start_data->ts.def_allocator = thr->ts.def_allocator; 843 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 844 1.1 mrg start_data->ts.single_count = 0; 845 1.1 mrg #endif 846 1.1 mrg start_data->ts.static_trip = 0; 847 1.12 mrg start_data->num_teams = thr->num_teams; 848 1.12 mrg start_data->team_num = thr->team_num; 849 1.1 mrg start_data->task = &team->implicit_task[i]; 850 1.1 mrg gomp_init_task (start_data->task, task, icv); 851 1.3 mrg team->implicit_task[i].icv.nthreads_var = nthreads_var; 852 1.5 mrg team->implicit_task[i].icv.bind_var = bind_var; 853 1.10 mrg start_data->task->taskgroup = taskgroup; 854 1.1 mrg start_data->thread_pool = pool; 855 1.1 mrg start_data->nested = nested; 856 1.1 mrg 857 1.6 mrg attr = gomp_adjust_thread_attr (attr, &thread_attr); 858 1.10 mrg err = pthread_create (&start_data->handle, attr, gomp_thread_start, 859 1.10 mrg start_data); 860 1.10 mrg start_data++; 861 1.1 mrg if (err != 0) 862 1.1 mrg gomp_fatal ("Thread creation failed: %s", strerror (err)); 863 1.1 mrg } 864 1.1 mrg 865 1.6 mrg if (__builtin_expect (attr == &thread_attr, 0)) 866 1.1 mrg pthread_attr_destroy (&thread_attr); 867 1.1 mrg 868 1.1 mrg do_release: 869 1.8 mrg if (nested) 870 1.8 mrg gomp_barrier_wait (&team->barrier); 871 1.8 mrg else 872 1.8 mrg gomp_simple_barrier_wait (&pool->threads_dock); 873 1.1 mrg 874 1.1 mrg /* Decrease the barrier threshold to match the number of threads 875 1.1 mrg that should arrive back at the end of this team. The extra 876 1.1 mrg threads should be exiting. Note that we arrange for this test 877 1.5 mrg to never be true for nested teams. If AFFINITY_COUNT is non-zero, 878 1.5 mrg the barrier as well as gomp_managed_threads was temporarily 879 1.5 mrg set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, 880 1.5 mrg AFFINITY_COUNT if non-zero will be always at least 881 1.5 mrg OLD_THREADS_COUNT - NTHREADS. */ 882 1.5 mrg if (__builtin_expect (nthreads < old_threads_used, 0) 883 1.5 mrg || __builtin_expect (affinity_count, 0)) 884 1.1 mrg { 885 1.1 mrg long diff = (long) nthreads - (long) old_threads_used; 886 1.1 mrg 887 1.5 mrg if (affinity_count) 888 1.5 mrg diff = -affinity_count; 889 1.5 mrg 890 1.8 mrg gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 891 1.1 mrg 892 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 893 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, diff); 894 1.1 mrg #else 895 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock); 896 1.1 mrg gomp_managed_threads += diff; 897 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock); 898 1.1 mrg #endif 899 1.1 mrg } 900 1.10 mrg if (__builtin_expect (gomp_display_affinity_var, 0)) 901 1.10 mrg { 902 1.10 mrg if (nested 903 1.10 mrg || nthreads != old_threads_used 904 1.10 mrg || force_display) 905 1.10 mrg { 906 1.10 mrg gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, 907 1.10 mrg thr->place); 908 1.10 mrg if (nested) 909 1.10 mrg { 910 1.10 mrg start_data -= nthreads - 1; 911 1.10 mrg for (i = 1; i < nthreads; ++i) 912 1.10 mrg { 913 1.10 mrg gomp_display_affinity_thread ( 914 1.10 mrg #ifdef LIBGOMP_USE_PTHREADS 915 1.10 mrg start_data->handle, 916 1.10 mrg #else 917 1.10 mrg gomp_thread_self (), 918 1.10 mrg #endif 919 1.10 mrg &start_data->ts, 920 1.10 mrg start_data->place); 921 1.10 mrg start_data++; 922 1.10 mrg } 923 1.10 mrg } 924 1.10 mrg else 925 1.10 mrg { 926 1.10 mrg for (i = 1; i < nthreads; ++i) 927 1.10 mrg { 928 1.10 mrg gomp_thread_handle handle 929 1.10 mrg = gomp_thread_to_pthread_t (pool->threads[i]); 930 1.10 mrg gomp_display_affinity_thread (handle, &pool->threads[i]->ts, 931 1.10 mrg pool->threads[i]->place); 932 1.10 mrg } 933 1.10 mrg } 934 1.10 mrg } 935 1.10 mrg } 936 1.5 mrg if (__builtin_expect (affinity_thr != NULL, 0) 937 1.5 mrg && team->prev_ts.place_partition_len > 64) 938 1.5 mrg free (affinity_thr); 939 1.1 mrg } 940 1.8 mrg #endif 941 1.1 mrg 942 1.1 mrg 943 1.1 mrg /* Terminate the current team. This is only to be called by the master 944 1.1 mrg thread. We assume that we must wait for the other threads. */ 945 1.1 mrg 946 1.1 mrg void 947 1.1 mrg gomp_team_end (void) 948 1.1 mrg { 949 1.1 mrg struct gomp_thread *thr = gomp_thread (); 950 1.1 mrg struct gomp_team *team = thr->ts.team; 951 1.1 mrg 952 1.5 mrg /* This barrier handles all pending explicit threads. 953 1.5 mrg As #pragma omp cancel parallel might get awaited count in 954 1.5 mrg team->barrier in a inconsistent state, we need to use a different 955 1.5 mrg counter here. */ 956 1.5 mrg gomp_team_barrier_wait_final (&team->barrier); 957 1.5 mrg if (__builtin_expect (team->team_cancelled, 0)) 958 1.5 mrg { 959 1.5 mrg struct gomp_work_share *ws = team->work_shares_to_free; 960 1.5 mrg do 961 1.5 mrg { 962 1.5 mrg struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); 963 1.5 mrg if (next_ws == NULL) 964 1.5 mrg gomp_ptrlock_set (&ws->next_ws, ws); 965 1.5 mrg gomp_fini_work_share (ws); 966 1.5 mrg ws = next_ws; 967 1.5 mrg } 968 1.5 mrg while (ws != NULL); 969 1.5 mrg } 970 1.5 mrg else 971 1.5 mrg gomp_fini_work_share (thr->ts.work_share); 972 1.1 mrg 973 1.1 mrg gomp_end_task (); 974 1.1 mrg thr->ts = team->prev_ts; 975 1.1 mrg 976 1.10 mrg if (__builtin_expect (thr->ts.level != 0, 0)) 977 1.1 mrg { 978 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 979 1.1 mrg __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 980 1.1 mrg #else 981 1.5 mrg gomp_mutex_lock (&gomp_managed_threads_lock); 982 1.1 mrg gomp_managed_threads -= team->nthreads - 1L; 983 1.5 mrg gomp_mutex_unlock (&gomp_managed_threads_lock); 984 1.1 mrg #endif 985 1.1 mrg /* This barrier has gomp_barrier_wait_last counterparts 986 1.1 mrg and ensures the team can be safely destroyed. */ 987 1.1 mrg gomp_barrier_wait (&team->barrier); 988 1.1 mrg } 989 1.1 mrg 990 1.1 mrg if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 991 1.1 mrg { 992 1.1 mrg struct gomp_work_share *ws = team->work_shares[0].next_alloc; 993 1.1 mrg do 994 1.1 mrg { 995 1.1 mrg struct gomp_work_share *next_ws = ws->next_alloc; 996 1.1 mrg free (ws); 997 1.1 mrg ws = next_ws; 998 1.1 mrg } 999 1.1 mrg while (ws != NULL); 1000 1.1 mrg } 1001 1.1 mrg gomp_sem_destroy (&team->master_release); 1002 1.1 mrg 1003 1.1 mrg if (__builtin_expect (thr->ts.team != NULL, 0) 1004 1.1 mrg || __builtin_expect (team->nthreads == 1, 0)) 1005 1.1 mrg free_team (team); 1006 1.1 mrg else 1007 1.1 mrg { 1008 1.1 mrg struct gomp_thread_pool *pool = thr->thread_pool; 1009 1.1 mrg if (pool->last_team) 1010 1.1 mrg free_team (pool->last_team); 1011 1.1 mrg pool->last_team = team; 1012 1.6 mrg gomp_release_thread_pool (pool); 1013 1.1 mrg } 1014 1.1 mrg } 1015 1.1 mrg 1016 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS 1017 1.1 mrg 1018 1.1 mrg /* Constructors for this file. */ 1019 1.1 mrg 1020 1.1 mrg static void __attribute__((constructor)) 1021 1.1 mrg initialize_team (void) 1022 1.1 mrg { 1023 1.5 mrg #if !defined HAVE_TLS && !defined USE_EMUTLS 1024 1.1 mrg static struct gomp_thread initial_thread_tls_data; 1025 1.1 mrg 1026 1.1 mrg pthread_key_create (&gomp_tls_key, NULL); 1027 1.1 mrg pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 1028 1.1 mrg #endif 1029 1.1 mrg 1030 1.1 mrg if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 1031 1.1 mrg gomp_fatal ("could not create thread pool destructor."); 1032 1.1 mrg } 1033 1.1 mrg 1034 1.1 mrg static void __attribute__((destructor)) 1035 1.1 mrg team_destructor (void) 1036 1.1 mrg { 1037 1.1 mrg /* Without this dlclose on libgomp could lead to subsequent 1038 1.1 mrg crashes. */ 1039 1.1 mrg pthread_key_delete (gomp_thread_destructor); 1040 1.1 mrg } 1041 1.10 mrg 1042 1.10 mrg /* Similar to gomp_free_pool_helper, but don't detach itself, 1043 1.10 mrg gomp_pause_host will pthread_join those threads. */ 1044 1.10 mrg 1045 1.10 mrg static void 1046 1.10 mrg gomp_pause_pool_helper (void *thread_pool) 1047 1.10 mrg { 1048 1.10 mrg struct gomp_thread *thr = gomp_thread (); 1049 1.10 mrg struct gomp_thread_pool *pool 1050 1.10 mrg = (struct gomp_thread_pool *) thread_pool; 1051 1.10 mrg gomp_simple_barrier_wait_last (&pool->threads_dock); 1052 1.10 mrg gomp_sem_destroy (&thr->release); 1053 1.10 mrg thr->thread_pool = NULL; 1054 1.10 mrg thr->task = NULL; 1055 1.10 mrg pthread_exit (NULL); 1056 1.10 mrg } 1057 1.10 mrg 1058 1.10 mrg /* Free a thread pool and release its threads. Return non-zero on 1059 1.10 mrg failure. */ 1060 1.10 mrg 1061 1.10 mrg int 1062 1.10 mrg gomp_pause_host (void) 1063 1.10 mrg { 1064 1.10 mrg struct gomp_thread *thr = gomp_thread (); 1065 1.10 mrg struct gomp_thread_pool *pool = thr->thread_pool; 1066 1.10 mrg if (thr->ts.level) 1067 1.10 mrg return -1; 1068 1.10 mrg if (pool) 1069 1.10 mrg { 1070 1.10 mrg if (pool->threads_used > 0) 1071 1.10 mrg { 1072 1.10 mrg int i; 1073 1.10 mrg pthread_t *thrs 1074 1.10 mrg = gomp_alloca (sizeof (pthread_t) * pool->threads_used); 1075 1.10 mrg for (i = 1; i < pool->threads_used; i++) 1076 1.10 mrg { 1077 1.10 mrg struct gomp_thread *nthr = pool->threads[i]; 1078 1.10 mrg nthr->fn = gomp_pause_pool_helper; 1079 1.10 mrg nthr->data = pool; 1080 1.10 mrg thrs[i] = gomp_thread_to_pthread_t (nthr); 1081 1.10 mrg } 1082 1.10 mrg /* This barrier undocks threads docked on pool->threads_dock. */ 1083 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock); 1084 1.10 mrg /* And this waits till all threads have called gomp_barrier_wait_last 1085 1.10 mrg in gomp_pause_pool_helper. */ 1086 1.10 mrg gomp_simple_barrier_wait (&pool->threads_dock); 1087 1.10 mrg /* Now it is safe to destroy the barrier and free the pool. */ 1088 1.10 mrg gomp_simple_barrier_destroy (&pool->threads_dock); 1089 1.10 mrg 1090 1.10 mrg #ifdef HAVE_SYNC_BUILTINS 1091 1.10 mrg __sync_fetch_and_add (&gomp_managed_threads, 1092 1.10 mrg 1L - pool->threads_used); 1093 1.10 mrg #else 1094 1.10 mrg gomp_mutex_lock (&gomp_managed_threads_lock); 1095 1.10 mrg gomp_managed_threads -= pool->threads_used - 1L; 1096 1.10 mrg gomp_mutex_unlock (&gomp_managed_threads_lock); 1097 1.10 mrg #endif 1098 1.10 mrg for (i = 1; i < pool->threads_used; i++) 1099 1.10 mrg pthread_join (thrs[i], NULL); 1100 1.10 mrg } 1101 1.10 mrg if (pool->last_team) 1102 1.10 mrg free_team (pool->last_team); 1103 1.10 mrg #ifndef __nvptx__ 1104 1.11 mrg team_free (pool->threads); 1105 1.11 mrg team_free (pool); 1106 1.10 mrg #endif 1107 1.10 mrg thr->thread_pool = NULL; 1108 1.10 mrg } 1109 1.10 mrg return 0; 1110 1.10 mrg } 1111 1.8 mrg #endif 1112 1.1 mrg 1113 1.1 mrg struct gomp_task_icv * 1114 1.1 mrg gomp_new_icv (void) 1115 1.1 mrg { 1116 1.1 mrg struct gomp_thread *thr = gomp_thread (); 1117 1.1 mrg struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 1118 1.1 mrg gomp_init_task (task, NULL, &gomp_global_icv); 1119 1.1 mrg thr->task = task; 1120 1.8 mrg #ifdef LIBGOMP_USE_PTHREADS 1121 1.1 mrg pthread_setspecific (gomp_thread_destructor, thr); 1122 1.8 mrg #endif 1123 1.1 mrg return &task->icv; 1124 1.1 mrg } 1125