Home | History | Annotate | Line # | Download | only in libgomp
      1  1.12  mrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
      2   1.1  mrg    Contributed by Richard Henderson <rth (at) redhat.com>.
      3   1.1  mrg 
      4   1.5  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5   1.5  mrg    (libgomp).
      6   1.1  mrg 
      7   1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8   1.1  mrg    under the terms of the GNU General Public License as published by
      9   1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10   1.1  mrg    any later version.
     11   1.1  mrg 
     12   1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13   1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14   1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15   1.1  mrg    more details.
     16   1.1  mrg 
     17   1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18   1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19   1.1  mrg    3.1, as published by the Free Software Foundation.
     20   1.1  mrg 
     21   1.1  mrg    You should have received a copy of the GNU General Public License and
     22   1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23   1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24   1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25   1.1  mrg 
     26  1.11  mrg /* This file handles the maintenance of threads in response to team
     27   1.1  mrg    creation and termination.  */
     28   1.1  mrg 
     29   1.1  mrg #include "libgomp.h"
     30   1.6  mrg #include "pool.h"
     31   1.1  mrg #include <stdlib.h>
     32   1.1  mrg #include <string.h>
     33   1.1  mrg 
     34   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
     35   1.1  mrg pthread_attr_t gomp_thread_attr;
     36   1.1  mrg 
     37   1.1  mrg /* This key is for the thread destructor.  */
     38   1.1  mrg pthread_key_t gomp_thread_destructor;
     39   1.1  mrg 
     40   1.1  mrg 
     41   1.1  mrg /* This is the libgomp per-thread data structure.  */
     42   1.5  mrg #if defined HAVE_TLS || defined USE_EMUTLS
     43   1.1  mrg __thread struct gomp_thread gomp_tls_data;
     44   1.1  mrg #else
     45   1.1  mrg pthread_key_t gomp_tls_key;
     46   1.1  mrg #endif
     47   1.1  mrg 
     48   1.1  mrg 
     49   1.1  mrg /* This structure is used to communicate across pthread_create.  */
     50   1.1  mrg 
     51   1.1  mrg struct gomp_thread_start_data
     52   1.1  mrg {
     53   1.1  mrg   void (*fn) (void *);
     54   1.1  mrg   void *fn_data;
     55   1.1  mrg   struct gomp_team_state ts;
     56   1.1  mrg   struct gomp_task *task;
     57   1.1  mrg   struct gomp_thread_pool *thread_pool;
     58   1.5  mrg   unsigned int place;
     59  1.12  mrg   unsigned int num_teams;
     60  1.12  mrg   unsigned int team_num;
     61   1.1  mrg   bool nested;
     62  1.10  mrg   pthread_t handle;
     63   1.1  mrg };
     64   1.1  mrg 
     65   1.1  mrg 
     66   1.1  mrg /* This function is a pthread_create entry point.  This contains the idle
     67   1.1  mrg    loop in which a thread waits to be called up to become part of a team.  */
     68   1.1  mrg 
     69   1.1  mrg static void *
     70   1.1  mrg gomp_thread_start (void *xdata)
     71   1.1  mrg {
     72   1.1  mrg   struct gomp_thread_start_data *data = xdata;
     73   1.1  mrg   struct gomp_thread *thr;
     74   1.1  mrg   struct gomp_thread_pool *pool;
     75   1.1  mrg   void (*local_fn) (void *);
     76   1.1  mrg   void *local_data;
     77   1.1  mrg 
     78   1.5  mrg #if defined HAVE_TLS || defined USE_EMUTLS
     79   1.1  mrg   thr = &gomp_tls_data;
     80   1.1  mrg #else
     81   1.1  mrg   struct gomp_thread local_thr;
     82   1.1  mrg   thr = &local_thr;
     83   1.1  mrg #endif
     84   1.1  mrg   gomp_sem_init (&thr->release, 0);
     85   1.1  mrg 
     86   1.1  mrg   /* Extract what we need from data.  */
     87   1.1  mrg   local_fn = data->fn;
     88   1.1  mrg   local_data = data->fn_data;
     89   1.1  mrg   thr->thread_pool = data->thread_pool;
     90   1.1  mrg   thr->ts = data->ts;
     91   1.1  mrg   thr->task = data->task;
     92   1.5  mrg   thr->place = data->place;
     93  1.12  mrg   thr->num_teams = data->num_teams;
     94  1.12  mrg   thr->team_num = data->team_num;
     95  1.10  mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
     96  1.10  mrg   thr->handle = data->handle;
     97  1.10  mrg #endif
     98  1.12  mrg #if !(defined HAVE_TLS || defined USE_EMUTLS)
     99  1.12  mrg   pthread_setspecific (gomp_tls_key, thr);
    100  1.12  mrg #endif
    101   1.1  mrg 
    102   1.1  mrg   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
    103   1.1  mrg 
    104   1.1  mrg   /* Make thread pool local. */
    105   1.1  mrg   pool = thr->thread_pool;
    106   1.1  mrg 
    107   1.1  mrg   if (data->nested)
    108   1.1  mrg     {
    109   1.1  mrg       struct gomp_team *team = thr->ts.team;
    110   1.1  mrg       struct gomp_task *task = thr->task;
    111   1.1  mrg 
    112   1.1  mrg       gomp_barrier_wait (&team->barrier);
    113   1.1  mrg 
    114   1.1  mrg       local_fn (local_data);
    115   1.5  mrg       gomp_team_barrier_wait_final (&team->barrier);
    116   1.1  mrg       gomp_finish_task (task);
    117   1.1  mrg       gomp_barrier_wait_last (&team->barrier);
    118   1.1  mrg     }
    119   1.1  mrg   else
    120   1.1  mrg     {
    121   1.1  mrg       pool->threads[thr->ts.team_id] = thr;
    122   1.1  mrg 
    123   1.8  mrg       gomp_simple_barrier_wait (&pool->threads_dock);
    124   1.1  mrg       do
    125   1.1  mrg 	{
    126   1.1  mrg 	  struct gomp_team *team = thr->ts.team;
    127   1.1  mrg 	  struct gomp_task *task = thr->task;
    128   1.1  mrg 
    129   1.1  mrg 	  local_fn (local_data);
    130   1.5  mrg 	  gomp_team_barrier_wait_final (&team->barrier);
    131   1.1  mrg 	  gomp_finish_task (task);
    132   1.1  mrg 
    133   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    134   1.1  mrg 
    135   1.1  mrg 	  local_fn = thr->fn;
    136   1.1  mrg 	  local_data = thr->data;
    137   1.1  mrg 	  thr->fn = NULL;
    138   1.1  mrg 	}
    139   1.1  mrg       while (local_fn);
    140   1.1  mrg     }
    141   1.1  mrg 
    142   1.1  mrg   gomp_sem_destroy (&thr->release);
    143  1.10  mrg   pthread_detach (pthread_self ());
    144   1.5  mrg   thr->thread_pool = NULL;
    145   1.5  mrg   thr->task = NULL;
    146   1.1  mrg   return NULL;
    147   1.1  mrg }
    148   1.8  mrg #endif
    149   1.1  mrg 
    150   1.6  mrg static inline struct gomp_team *
    151   1.6  mrg get_last_team (unsigned nthreads)
    152   1.6  mrg {
    153   1.6  mrg   struct gomp_thread *thr = gomp_thread ();
    154   1.6  mrg   if (thr->ts.team == NULL)
    155   1.6  mrg     {
    156   1.6  mrg       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
    157   1.6  mrg       struct gomp_team *last_team = pool->last_team;
    158   1.6  mrg       if (last_team != NULL && last_team->nthreads == nthreads)
    159   1.6  mrg         {
    160   1.6  mrg           pool->last_team = NULL;
    161   1.6  mrg           return last_team;
    162   1.6  mrg         }
    163   1.6  mrg     }
    164   1.6  mrg   return NULL;
    165   1.6  mrg }
    166   1.1  mrg 
    167   1.1  mrg /* Create a new team data structure.  */
    168   1.1  mrg 
    169   1.1  mrg struct gomp_team *
    170   1.1  mrg gomp_new_team (unsigned nthreads)
    171   1.1  mrg {
    172   1.1  mrg   struct gomp_team *team;
    173   1.1  mrg   int i;
    174   1.1  mrg 
    175   1.6  mrg   team = get_last_team (nthreads);
    176   1.6  mrg   if (team == NULL)
    177   1.6  mrg     {
    178   1.6  mrg       size_t extra = sizeof (team->ordered_release[0])
    179   1.6  mrg 		     + sizeof (team->implicit_task[0]);
    180  1.12  mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
    181  1.12  mrg       team = gomp_aligned_alloc (__alignof (struct gomp_team),
    182  1.12  mrg 				 sizeof (*team) + nthreads * extra);
    183  1.12  mrg #else
    184  1.11  mrg       team = team_malloc (sizeof (*team) + nthreads * extra);
    185  1.12  mrg #endif
    186   1.6  mrg 
    187   1.6  mrg #ifndef HAVE_SYNC_BUILTINS
    188   1.6  mrg       gomp_mutex_init (&team->work_share_list_free_lock);
    189   1.6  mrg #endif
    190   1.6  mrg       gomp_barrier_init (&team->barrier, nthreads);
    191   1.6  mrg       gomp_mutex_init (&team->task_lock);
    192   1.6  mrg 
    193   1.6  mrg       team->nthreads = nthreads;
    194   1.6  mrg     }
    195   1.1  mrg 
    196   1.1  mrg   team->work_share_chunk = 8;
    197   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    198   1.1  mrg   team->single_count = 0;
    199   1.1  mrg #endif
    200   1.5  mrg   team->work_shares_to_free = &team->work_shares[0];
    201  1.10  mrg   gomp_init_work_share (&team->work_shares[0], 0, nthreads);
    202   1.1  mrg   team->work_shares[0].next_alloc = NULL;
    203   1.1  mrg   team->work_share_list_free = NULL;
    204   1.1  mrg   team->work_share_list_alloc = &team->work_shares[1];
    205   1.1  mrg   for (i = 1; i < 7; i++)
    206   1.1  mrg     team->work_shares[i].next_free = &team->work_shares[i + 1];
    207   1.1  mrg   team->work_shares[i].next_free = NULL;
    208   1.1  mrg 
    209   1.1  mrg   gomp_sem_init (&team->master_release, 0);
    210   1.1  mrg   team->ordered_release = (void *) &team->implicit_task[nthreads];
    211   1.1  mrg   team->ordered_release[0] = &team->master_release;
    212   1.1  mrg 
    213   1.6  mrg   priority_queue_init (&team->task_queue);
    214   1.1  mrg   team->task_count = 0;
    215   1.5  mrg   team->task_queued_count = 0;
    216   1.1  mrg   team->task_running_count = 0;
    217   1.5  mrg   team->work_share_cancelled = 0;
    218   1.5  mrg   team->team_cancelled = 0;
    219   1.1  mrg 
    220  1.12  mrg   team->task_detach_count = 0;
    221  1.12  mrg 
    222   1.1  mrg   return team;
    223   1.1  mrg }
    224   1.1  mrg 
    225   1.1  mrg 
    226   1.1  mrg /* Free a team data structure.  */
    227   1.1  mrg 
    228   1.1  mrg static void
    229   1.1  mrg free_team (struct gomp_team *team)
    230   1.1  mrg {
    231   1.6  mrg #ifndef HAVE_SYNC_BUILTINS
    232   1.6  mrg   gomp_mutex_destroy (&team->work_share_list_free_lock);
    233   1.6  mrg #endif
    234   1.1  mrg   gomp_barrier_destroy (&team->barrier);
    235   1.1  mrg   gomp_mutex_destroy (&team->task_lock);
    236   1.6  mrg   priority_queue_free (&team->task_queue);
    237  1.11  mrg   team_free (team);
    238   1.1  mrg }
    239   1.1  mrg 
    240   1.1  mrg static void
    241   1.1  mrg gomp_free_pool_helper (void *thread_pool)
    242   1.1  mrg {
    243   1.5  mrg   struct gomp_thread *thr = gomp_thread ();
    244   1.1  mrg   struct gomp_thread_pool *pool
    245   1.1  mrg     = (struct gomp_thread_pool *) thread_pool;
    246   1.8  mrg   gomp_simple_barrier_wait_last (&pool->threads_dock);
    247   1.5  mrg   gomp_sem_destroy (&thr->release);
    248   1.5  mrg   thr->thread_pool = NULL;
    249   1.5  mrg   thr->task = NULL;
    250   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
    251  1.10  mrg   pthread_detach (pthread_self ());
    252   1.1  mrg   pthread_exit (NULL);
    253   1.8  mrg #elif defined(__nvptx__)
    254   1.8  mrg   asm ("exit;");
    255  1.11  mrg #elif defined(__AMDGCN__)
    256  1.11  mrg   asm ("s_dcache_wb\n\t"
    257  1.11  mrg        "s_endpgm");
    258   1.8  mrg #else
    259   1.8  mrg #error gomp_free_pool_helper must terminate the thread
    260   1.8  mrg #endif
    261   1.1  mrg }
    262   1.1  mrg 
    263   1.1  mrg /* Free a thread pool and release its threads. */
    264   1.1  mrg 
    265   1.5  mrg void
    266   1.1  mrg gomp_free_thread (void *arg __attribute__((unused)))
    267   1.1  mrg {
    268   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    269   1.1  mrg   struct gomp_thread_pool *pool = thr->thread_pool;
    270   1.1  mrg   if (pool)
    271   1.1  mrg     {
    272   1.1  mrg       if (pool->threads_used > 0)
    273   1.1  mrg 	{
    274   1.1  mrg 	  int i;
    275   1.1  mrg 	  for (i = 1; i < pool->threads_used; i++)
    276   1.1  mrg 	    {
    277   1.1  mrg 	      struct gomp_thread *nthr = pool->threads[i];
    278   1.1  mrg 	      nthr->fn = gomp_free_pool_helper;
    279   1.1  mrg 	      nthr->data = pool;
    280   1.1  mrg 	    }
    281   1.1  mrg 	  /* This barrier undocks threads docked on pool->threads_dock.  */
    282   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    283   1.1  mrg 	  /* And this waits till all threads have called gomp_barrier_wait_last
    284   1.1  mrg 	     in gomp_free_pool_helper.  */
    285   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    286   1.1  mrg 	  /* Now it is safe to destroy the barrier and free the pool.  */
    287   1.8  mrg 	  gomp_simple_barrier_destroy (&pool->threads_dock);
    288   1.3  mrg 
    289   1.3  mrg #ifdef HAVE_SYNC_BUILTINS
    290   1.3  mrg 	  __sync_fetch_and_add (&gomp_managed_threads,
    291   1.3  mrg 				1L - pool->threads_used);
    292   1.3  mrg #else
    293   1.5  mrg 	  gomp_mutex_lock (&gomp_managed_threads_lock);
    294   1.3  mrg 	  gomp_managed_threads -= pool->threads_used - 1L;
    295   1.5  mrg 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
    296   1.3  mrg #endif
    297   1.1  mrg 	}
    298   1.1  mrg       if (pool->last_team)
    299   1.1  mrg 	free_team (pool->last_team);
    300   1.8  mrg #ifndef __nvptx__
    301  1.11  mrg       team_free (pool->threads);
    302  1.11  mrg       team_free (pool);
    303   1.8  mrg #endif
    304   1.1  mrg       thr->thread_pool = NULL;
    305   1.1  mrg     }
    306   1.6  mrg   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
    307   1.6  mrg     gomp_team_end ();
    308   1.1  mrg   if (thr->task != NULL)
    309   1.1  mrg     {
    310   1.1  mrg       struct gomp_task *task = thr->task;
    311   1.1  mrg       gomp_end_task ();
    312   1.1  mrg       free (task);
    313   1.1  mrg     }
    314   1.1  mrg }
    315   1.1  mrg 
    316   1.1  mrg /* Launch a team.  */
    317   1.1  mrg 
    318   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
    319   1.1  mrg void
    320   1.1  mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    321  1.10  mrg 		 unsigned flags, struct gomp_team *team,
    322  1.10  mrg 		 struct gomp_taskgroup *taskgroup)
    323   1.1  mrg {
    324  1.12  mrg   struct gomp_thread_start_data *start_data = NULL;
    325   1.1  mrg   struct gomp_thread *thr, *nthr;
    326   1.1  mrg   struct gomp_task *task;
    327   1.1  mrg   struct gomp_task_icv *icv;
    328   1.1  mrg   bool nested;
    329   1.1  mrg   struct gomp_thread_pool *pool;
    330   1.1  mrg   unsigned i, n, old_threads_used = 0;
    331   1.1  mrg   pthread_attr_t thread_attr, *attr;
    332   1.3  mrg   unsigned long nthreads_var;
    333   1.5  mrg   char bind, bind_var;
    334   1.5  mrg   unsigned int s = 0, rest = 0, p = 0, k = 0;
    335   1.5  mrg   unsigned int affinity_count = 0;
    336   1.5  mrg   struct gomp_thread **affinity_thr = NULL;
    337  1.10  mrg   bool force_display = false;
    338   1.1  mrg 
    339   1.1  mrg   thr = gomp_thread ();
    340   1.6  mrg   nested = thr->ts.level;
    341   1.1  mrg   pool = thr->thread_pool;
    342   1.1  mrg   task = thr->task;
    343   1.1  mrg   icv = task ? &task->icv : &gomp_global_icv;
    344   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
    345  1.10  mrg     {
    346  1.10  mrg       gomp_init_affinity ();
    347  1.10  mrg       if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
    348  1.10  mrg 	gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
    349  1.10  mrg 				      thr->place);
    350  1.10  mrg     }
    351   1.1  mrg 
    352   1.1  mrg   /* Always save the previous state, even if this isn't a nested team.
    353   1.1  mrg      In particular, we should save any work share state from an outer
    354   1.1  mrg      orphaned work share construct.  */
    355   1.1  mrg   team->prev_ts = thr->ts;
    356   1.1  mrg 
    357   1.1  mrg   thr->ts.team = team;
    358   1.1  mrg   thr->ts.team_id = 0;
    359   1.1  mrg   ++thr->ts.level;
    360   1.1  mrg   if (nthreads > 1)
    361   1.1  mrg     ++thr->ts.active_level;
    362   1.1  mrg   thr->ts.work_share = &team->work_shares[0];
    363   1.1  mrg   thr->ts.last_work_share = NULL;
    364   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    365   1.1  mrg   thr->ts.single_count = 0;
    366   1.1  mrg #endif
    367   1.1  mrg   thr->ts.static_trip = 0;
    368   1.1  mrg   thr->task = &team->implicit_task[0];
    369  1.10  mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
    370  1.10  mrg   thr->handle = pthread_self ();
    371  1.10  mrg #endif
    372   1.3  mrg   nthreads_var = icv->nthreads_var;
    373   1.3  mrg   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
    374   1.3  mrg       && thr->ts.level < gomp_nthreads_var_list_len)
    375   1.3  mrg     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
    376   1.5  mrg   bind_var = icv->bind_var;
    377   1.5  mrg   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
    378   1.5  mrg     bind_var = flags & 7;
    379   1.5  mrg   bind = bind_var;
    380   1.5  mrg   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
    381   1.5  mrg       && thr->ts.level < gomp_bind_var_list_len)
    382   1.5  mrg     bind_var = gomp_bind_var_list[thr->ts.level];
    383   1.1  mrg   gomp_init_task (thr->task, task, icv);
    384  1.10  mrg   thr->task->taskgroup = taskgroup;
    385   1.3  mrg   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    386   1.5  mrg   team->implicit_task[0].icv.bind_var = bind_var;
    387   1.1  mrg 
    388   1.1  mrg   if (nthreads == 1)
    389   1.1  mrg     return;
    390   1.1  mrg 
    391   1.1  mrg   i = 1;
    392   1.1  mrg 
    393   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0))
    394   1.5  mrg     {
    395   1.5  mrg       /* Depending on chosen proc_bind model, set subpartition
    396   1.5  mrg 	 for the master thread and initialize helper variables
    397   1.5  mrg 	 P and optionally S, K and/or REST used by later place
    398   1.5  mrg 	 computation for each additional thread.  */
    399   1.5  mrg       p = thr->place - 1;
    400   1.5  mrg       switch (bind)
    401   1.5  mrg 	{
    402   1.5  mrg 	case omp_proc_bind_true:
    403   1.5  mrg 	case omp_proc_bind_close:
    404   1.5  mrg 	  if (nthreads > thr->ts.place_partition_len)
    405   1.5  mrg 	    {
    406   1.5  mrg 	      /* T > P.  S threads will be placed in each place,
    407   1.5  mrg 		 and the final REM threads placed one by one
    408   1.5  mrg 		 into the already occupied places.  */
    409   1.5  mrg 	      s = nthreads / thr->ts.place_partition_len;
    410   1.5  mrg 	      rest = nthreads % thr->ts.place_partition_len;
    411   1.5  mrg 	    }
    412   1.5  mrg 	  else
    413   1.5  mrg 	    s = 1;
    414   1.5  mrg 	  k = 1;
    415   1.5  mrg 	  break;
    416   1.5  mrg 	case omp_proc_bind_master:
    417   1.5  mrg 	  /* Each thread will be bound to master's place.  */
    418   1.5  mrg 	  break;
    419   1.5  mrg 	case omp_proc_bind_spread:
    420   1.5  mrg 	  if (nthreads <= thr->ts.place_partition_len)
    421   1.5  mrg 	    {
    422   1.5  mrg 	      /* T <= P.  Each subpartition will have in between s
    423   1.5  mrg 		 and s+1 places (subpartitions starting at or
    424   1.5  mrg 		 after rest will have s places, earlier s+1 places),
    425   1.5  mrg 		 each thread will be bound to the first place in
    426   1.5  mrg 		 its subpartition (except for the master thread
    427   1.5  mrg 		 that can be bound to another place in its
    428   1.5  mrg 		 subpartition).  */
    429   1.5  mrg 	      s = thr->ts.place_partition_len / nthreads;
    430   1.5  mrg 	      rest = thr->ts.place_partition_len % nthreads;
    431   1.5  mrg 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
    432   1.5  mrg 	      if (p < rest)
    433   1.5  mrg 		{
    434   1.5  mrg 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
    435   1.5  mrg 		  thr->ts.place_partition_len = s + 1;
    436   1.5  mrg 		}
    437   1.5  mrg 	      else
    438   1.5  mrg 		{
    439   1.5  mrg 		  p -= (p - rest) % s;
    440   1.5  mrg 		  thr->ts.place_partition_len = s;
    441   1.5  mrg 		}
    442   1.5  mrg 	      thr->ts.place_partition_off = p;
    443   1.5  mrg 	    }
    444   1.5  mrg 	  else
    445   1.5  mrg 	    {
    446   1.5  mrg 	      /* T > P.  Each subpartition will have just a single
    447   1.5  mrg 		 place and we'll place between s and s+1
    448   1.5  mrg 		 threads into each subpartition.  */
    449   1.5  mrg 	      s = nthreads / thr->ts.place_partition_len;
    450   1.5  mrg 	      rest = nthreads % thr->ts.place_partition_len;
    451   1.5  mrg 	      thr->ts.place_partition_off = p;
    452   1.5  mrg 	      thr->ts.place_partition_len = 1;
    453   1.5  mrg 	      k = 1;
    454   1.5  mrg 	    }
    455   1.5  mrg 	  break;
    456   1.5  mrg 	}
    457   1.5  mrg     }
    458   1.5  mrg   else
    459   1.5  mrg     bind = omp_proc_bind_false;
    460   1.5  mrg 
    461   1.1  mrg   /* We only allow the reuse of idle threads for non-nested PARALLEL
    462   1.1  mrg      regions.  This appears to be implied by the semantics of
    463   1.1  mrg      threadprivate variables, but perhaps that's reading too much into
    464   1.1  mrg      things.  Certainly it does prevent any locking problems, since
    465   1.1  mrg      only the initial program thread will modify gomp_threads.  */
    466   1.1  mrg   if (!nested)
    467   1.1  mrg     {
    468   1.1  mrg       old_threads_used = pool->threads_used;
    469   1.1  mrg 
    470   1.1  mrg       if (nthreads <= old_threads_used)
    471   1.1  mrg 	n = nthreads;
    472   1.1  mrg       else if (old_threads_used == 0)
    473   1.1  mrg 	{
    474   1.1  mrg 	  n = 0;
    475   1.8  mrg 	  gomp_simple_barrier_init (&pool->threads_dock, nthreads);
    476   1.1  mrg 	}
    477   1.1  mrg       else
    478   1.1  mrg 	{
    479   1.1  mrg 	  n = old_threads_used;
    480   1.1  mrg 
    481   1.1  mrg 	  /* Increase the barrier threshold to make sure all new
    482   1.1  mrg 	     threads arrive before the team is released.  */
    483   1.8  mrg 	  gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
    484   1.1  mrg 	}
    485   1.1  mrg 
    486   1.1  mrg       /* Not true yet, but soon will be.  We're going to release all
    487   1.1  mrg 	 threads from the dock, and those that aren't part of the
    488   1.1  mrg 	 team will exit.  */
    489   1.1  mrg       pool->threads_used = nthreads;
    490   1.1  mrg 
    491   1.5  mrg       /* If necessary, expand the size of the gomp_threads array.  It is
    492   1.5  mrg 	 expected that changes in the number of threads are rare, thus we
    493   1.5  mrg 	 make no effort to expand gomp_threads_size geometrically.  */
    494   1.5  mrg       if (nthreads >= pool->threads_size)
    495   1.5  mrg 	{
    496   1.5  mrg 	  pool->threads_size = nthreads + 1;
    497   1.5  mrg 	  pool->threads
    498   1.5  mrg 	    = gomp_realloc (pool->threads,
    499   1.5  mrg 			    pool->threads_size
    500  1.10  mrg 			    * sizeof (struct gomp_thread *));
    501  1.10  mrg 	  /* Add current (master) thread to threads[].  */
    502  1.10  mrg 	  pool->threads[0] = thr;
    503   1.5  mrg 	}
    504   1.5  mrg 
    505   1.1  mrg       /* Release existing idle threads.  */
    506   1.1  mrg       for (; i < n; ++i)
    507   1.1  mrg 	{
    508   1.5  mrg 	  unsigned int place_partition_off = thr->ts.place_partition_off;
    509   1.5  mrg 	  unsigned int place_partition_len = thr->ts.place_partition_len;
    510   1.5  mrg 	  unsigned int place = 0;
    511   1.5  mrg 	  if (__builtin_expect (gomp_places_list != NULL, 0))
    512   1.5  mrg 	    {
    513   1.5  mrg 	      switch (bind)
    514   1.5  mrg 		{
    515   1.5  mrg 		case omp_proc_bind_true:
    516   1.5  mrg 		case omp_proc_bind_close:
    517   1.5  mrg 		  if (k == s)
    518   1.5  mrg 		    {
    519   1.5  mrg 		      ++p;
    520   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    521   1.5  mrg 				+ team->prev_ts.place_partition_len))
    522   1.5  mrg 			p = team->prev_ts.place_partition_off;
    523   1.5  mrg 		      k = 1;
    524   1.5  mrg 		      if (i == nthreads - rest)
    525   1.5  mrg 			s = 1;
    526   1.5  mrg 		    }
    527   1.5  mrg 		  else
    528   1.5  mrg 		    ++k;
    529   1.5  mrg 		  break;
    530   1.5  mrg 		case omp_proc_bind_master:
    531   1.5  mrg 		  break;
    532   1.5  mrg 		case omp_proc_bind_spread:
    533   1.5  mrg 		  if (k == 0)
    534   1.5  mrg 		    {
    535   1.5  mrg 		      /* T <= P.  */
    536   1.5  mrg 		      if (p < rest)
    537   1.5  mrg 			p += s + 1;
    538   1.5  mrg 		      else
    539   1.5  mrg 			p += s;
    540   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    541   1.5  mrg 				+ team->prev_ts.place_partition_len))
    542   1.5  mrg 			p = team->prev_ts.place_partition_off;
    543   1.5  mrg 		      place_partition_off = p;
    544   1.5  mrg 		      if (p < rest)
    545   1.5  mrg 			place_partition_len = s + 1;
    546   1.5  mrg 		      else
    547   1.5  mrg 			place_partition_len = s;
    548   1.5  mrg 		    }
    549   1.5  mrg 		  else
    550   1.5  mrg 		    {
    551   1.5  mrg 		      /* T > P.  */
    552   1.5  mrg 		      if (k == s)
    553   1.5  mrg 			{
    554   1.5  mrg 			  ++p;
    555   1.5  mrg 			  if (p == (team->prev_ts.place_partition_off
    556   1.5  mrg 				    + team->prev_ts.place_partition_len))
    557   1.5  mrg 			    p = team->prev_ts.place_partition_off;
    558   1.5  mrg 			  k = 1;
    559   1.5  mrg 			  if (i == nthreads - rest)
    560   1.5  mrg 			    s = 1;
    561   1.5  mrg 			}
    562   1.5  mrg 		      else
    563   1.5  mrg 			++k;
    564   1.5  mrg 		      place_partition_off = p;
    565   1.5  mrg 		      place_partition_len = 1;
    566   1.5  mrg 		    }
    567   1.5  mrg 		  break;
    568   1.5  mrg 		}
    569   1.5  mrg 	      if (affinity_thr != NULL
    570   1.5  mrg 		  || (bind != omp_proc_bind_true
    571   1.5  mrg 		      && pool->threads[i]->place != p + 1)
    572   1.5  mrg 		  || pool->threads[i]->place <= place_partition_off
    573   1.5  mrg 		  || pool->threads[i]->place > (place_partition_off
    574   1.5  mrg 						+ place_partition_len))
    575   1.5  mrg 		{
    576   1.5  mrg 		  unsigned int l;
    577  1.10  mrg 		  force_display = true;
    578   1.5  mrg 		  if (affinity_thr == NULL)
    579   1.5  mrg 		    {
    580   1.5  mrg 		      unsigned int j;
    581   1.5  mrg 
    582   1.5  mrg 		      if (team->prev_ts.place_partition_len > 64)
    583   1.5  mrg 			affinity_thr
    584   1.5  mrg 			  = gomp_malloc (team->prev_ts.place_partition_len
    585   1.5  mrg 					 * sizeof (struct gomp_thread *));
    586   1.5  mrg 		      else
    587   1.5  mrg 			affinity_thr
    588   1.5  mrg 			  = gomp_alloca (team->prev_ts.place_partition_len
    589   1.5  mrg 					 * sizeof (struct gomp_thread *));
    590   1.5  mrg 		      memset (affinity_thr, '\0',
    591   1.5  mrg 			      team->prev_ts.place_partition_len
    592   1.5  mrg 			      * sizeof (struct gomp_thread *));
    593   1.5  mrg 		      for (j = i; j < old_threads_used; j++)
    594   1.5  mrg 			{
    595   1.5  mrg 			  if (pool->threads[j]->place
    596   1.5  mrg 			      > team->prev_ts.place_partition_off
    597   1.5  mrg 			      && (pool->threads[j]->place
    598   1.5  mrg 				  <= (team->prev_ts.place_partition_off
    599   1.5  mrg 				      + team->prev_ts.place_partition_len)))
    600   1.5  mrg 			    {
    601   1.5  mrg 			      l = pool->threads[j]->place - 1
    602   1.5  mrg 				  - team->prev_ts.place_partition_off;
    603   1.5  mrg 			      pool->threads[j]->data = affinity_thr[l];
    604   1.5  mrg 			      affinity_thr[l] = pool->threads[j];
    605   1.5  mrg 			    }
    606   1.5  mrg 			  pool->threads[j] = NULL;
    607   1.5  mrg 			}
    608   1.5  mrg 		      if (nthreads > old_threads_used)
    609   1.5  mrg 			memset (&pool->threads[old_threads_used],
    610   1.5  mrg 				'\0', ((nthreads - old_threads_used)
    611   1.5  mrg 				       * sizeof (struct gomp_thread *)));
    612   1.5  mrg 		      n = nthreads;
    613   1.5  mrg 		      affinity_count = old_threads_used - i;
    614   1.5  mrg 		    }
    615   1.5  mrg 		  if (affinity_count == 0)
    616   1.5  mrg 		    break;
    617   1.5  mrg 		  l = p;
    618   1.5  mrg 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
    619   1.5  mrg 		      == NULL)
    620   1.5  mrg 		    {
    621   1.5  mrg 		      if (bind != omp_proc_bind_true)
    622   1.5  mrg 			continue;
    623   1.5  mrg 		      for (l = place_partition_off;
    624   1.5  mrg 			   l < place_partition_off + place_partition_len;
    625   1.5  mrg 			   l++)
    626   1.5  mrg 			if (affinity_thr[l - team->prev_ts.place_partition_off]
    627   1.5  mrg 			    != NULL)
    628   1.5  mrg 			  break;
    629   1.5  mrg 		      if (l == place_partition_off + place_partition_len)
    630   1.5  mrg 			continue;
    631   1.5  mrg 		    }
    632   1.5  mrg 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
    633   1.5  mrg 		  affinity_thr[l - team->prev_ts.place_partition_off]
    634   1.5  mrg 		    = (struct gomp_thread *) nthr->data;
    635   1.5  mrg 		  affinity_count--;
    636   1.5  mrg 		  pool->threads[i] = nthr;
    637   1.5  mrg 		}
    638   1.5  mrg 	      else
    639   1.5  mrg 		nthr = pool->threads[i];
    640   1.5  mrg 	      place = p + 1;
    641   1.5  mrg 	    }
    642   1.5  mrg 	  else
    643   1.5  mrg 	    nthr = pool->threads[i];
    644   1.1  mrg 	  nthr->ts.team = team;
    645   1.1  mrg 	  nthr->ts.work_share = &team->work_shares[0];
    646   1.1  mrg 	  nthr->ts.last_work_share = NULL;
    647   1.1  mrg 	  nthr->ts.team_id = i;
    648   1.1  mrg 	  nthr->ts.level = team->prev_ts.level + 1;
    649   1.1  mrg 	  nthr->ts.active_level = thr->ts.active_level;
    650   1.5  mrg 	  nthr->ts.place_partition_off = place_partition_off;
    651   1.5  mrg 	  nthr->ts.place_partition_len = place_partition_len;
    652  1.12  mrg 	  nthr->ts.def_allocator = thr->ts.def_allocator;
    653   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    654   1.1  mrg 	  nthr->ts.single_count = 0;
    655   1.1  mrg #endif
    656   1.1  mrg 	  nthr->ts.static_trip = 0;
    657  1.12  mrg 	  nthr->num_teams = thr->num_teams;
    658  1.12  mrg 	  nthr->team_num = thr->team_num;
    659   1.1  mrg 	  nthr->task = &team->implicit_task[i];
    660   1.5  mrg 	  nthr->place = place;
    661   1.1  mrg 	  gomp_init_task (nthr->task, task, icv);
    662   1.3  mrg 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
    663   1.5  mrg 	  team->implicit_task[i].icv.bind_var = bind_var;
    664  1.10  mrg 	  nthr->task->taskgroup = taskgroup;
    665   1.1  mrg 	  nthr->fn = fn;
    666   1.1  mrg 	  nthr->data = data;
    667   1.1  mrg 	  team->ordered_release[i] = &nthr->release;
    668   1.1  mrg 	}
    669   1.1  mrg 
    670   1.5  mrg       if (__builtin_expect (affinity_thr != NULL, 0))
    671   1.5  mrg 	{
    672   1.5  mrg 	  /* If AFFINITY_THR is non-NULL just because we had to
    673   1.5  mrg 	     permute some threads in the pool, but we've managed
    674   1.5  mrg 	     to find exactly as many old threads as we'd find
    675   1.5  mrg 	     without affinity, we don't need to handle this
    676   1.5  mrg 	     specially anymore.  */
    677   1.5  mrg 	  if (nthreads <= old_threads_used
    678   1.5  mrg 	      ? (affinity_count == old_threads_used - nthreads)
    679   1.5  mrg 	      : (i == old_threads_used))
    680   1.5  mrg 	    {
    681   1.5  mrg 	      if (team->prev_ts.place_partition_len > 64)
    682   1.5  mrg 		free (affinity_thr);
    683   1.5  mrg 	      affinity_thr = NULL;
    684   1.5  mrg 	      affinity_count = 0;
    685   1.5  mrg 	    }
    686   1.5  mrg 	  else
    687   1.5  mrg 	    {
    688   1.5  mrg 	      i = 1;
    689   1.5  mrg 	      /* We are going to compute the places/subpartitions
    690   1.5  mrg 		 again from the beginning.  So, we need to reinitialize
    691   1.5  mrg 		 vars modified by the switch (bind) above inside
    692   1.5  mrg 		 of the loop, to the state they had after the initial
    693   1.5  mrg 		 switch (bind).  */
    694   1.5  mrg 	      switch (bind)
    695   1.5  mrg 		{
    696   1.5  mrg 		case omp_proc_bind_true:
    697   1.5  mrg 		case omp_proc_bind_close:
    698   1.5  mrg 		  if (nthreads > thr->ts.place_partition_len)
    699   1.5  mrg 		    /* T > P.  S has been changed, so needs
    700   1.5  mrg 		       to be recomputed.  */
    701   1.5  mrg 		    s = nthreads / thr->ts.place_partition_len;
    702   1.5  mrg 		  k = 1;
    703   1.5  mrg 		  p = thr->place - 1;
    704   1.5  mrg 		  break;
    705   1.5  mrg 		case omp_proc_bind_master:
    706   1.5  mrg 		  /* No vars have been changed.  */
    707   1.5  mrg 		  break;
    708   1.5  mrg 		case omp_proc_bind_spread:
    709   1.5  mrg 		  p = thr->ts.place_partition_off;
    710   1.5  mrg 		  if (k != 0)
    711   1.5  mrg 		    {
    712   1.5  mrg 		      /* T > P.  */
    713   1.5  mrg 		      s = nthreads / team->prev_ts.place_partition_len;
    714   1.5  mrg 		      k = 1;
    715   1.5  mrg 		    }
    716   1.5  mrg 		  break;
    717   1.5  mrg 		}
    718   1.5  mrg 
    719   1.5  mrg 	      /* Increase the barrier threshold to make sure all new
    720   1.5  mrg 		 threads and all the threads we're going to let die
    721   1.5  mrg 		 arrive before the team is released.  */
    722   1.5  mrg 	      if (affinity_count)
    723   1.8  mrg 		gomp_simple_barrier_reinit (&pool->threads_dock,
    724   1.8  mrg 					    nthreads + affinity_count);
    725   1.5  mrg 	    }
    726   1.5  mrg 	}
    727   1.5  mrg 
    728   1.1  mrg       if (i == nthreads)
    729   1.1  mrg 	goto do_release;
    730   1.1  mrg 
    731   1.1  mrg     }
    732   1.1  mrg 
    733   1.5  mrg   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
    734   1.1  mrg     {
    735   1.5  mrg       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
    736   1.1  mrg 
    737   1.1  mrg       if (old_threads_used == 0)
    738   1.1  mrg 	--diff;
    739   1.1  mrg 
    740   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    741   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, diff);
    742   1.1  mrg #else
    743   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    744   1.1  mrg       gomp_managed_threads += diff;
    745   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    746   1.1  mrg #endif
    747   1.1  mrg     }
    748   1.1  mrg 
    749   1.1  mrg   attr = &gomp_thread_attr;
    750   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0))
    751   1.1  mrg     {
    752   1.1  mrg       size_t stacksize;
    753   1.1  mrg       pthread_attr_init (&thread_attr);
    754   1.1  mrg       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
    755   1.1  mrg 	pthread_attr_setstacksize (&thread_attr, stacksize);
    756   1.1  mrg       attr = &thread_attr;
    757   1.1  mrg     }
    758   1.1  mrg 
    759   1.1  mrg   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
    760  1.10  mrg 			    * (nthreads - i));
    761   1.1  mrg 
    762   1.1  mrg   /* Launch new threads.  */
    763   1.5  mrg   for (; i < nthreads; ++i)
    764   1.1  mrg     {
    765   1.1  mrg       int err;
    766   1.1  mrg 
    767   1.5  mrg       start_data->ts.place_partition_off = thr->ts.place_partition_off;
    768   1.5  mrg       start_data->ts.place_partition_len = thr->ts.place_partition_len;
    769   1.5  mrg       start_data->place = 0;
    770   1.5  mrg       if (__builtin_expect (gomp_places_list != NULL, 0))
    771   1.5  mrg 	{
    772   1.5  mrg 	  switch (bind)
    773   1.5  mrg 	    {
    774   1.5  mrg 	    case omp_proc_bind_true:
    775   1.5  mrg 	    case omp_proc_bind_close:
    776   1.5  mrg 	      if (k == s)
    777   1.5  mrg 		{
    778   1.5  mrg 		  ++p;
    779   1.5  mrg 		  if (p == (team->prev_ts.place_partition_off
    780   1.5  mrg 			    + team->prev_ts.place_partition_len))
    781   1.5  mrg 		    p = team->prev_ts.place_partition_off;
    782   1.5  mrg 		  k = 1;
    783   1.5  mrg 		  if (i == nthreads - rest)
    784   1.5  mrg 		    s = 1;
    785   1.5  mrg 		}
    786   1.5  mrg 	      else
    787   1.5  mrg 		++k;
    788   1.5  mrg 	      break;
    789   1.5  mrg 	    case omp_proc_bind_master:
    790   1.5  mrg 	      break;
    791   1.5  mrg 	    case omp_proc_bind_spread:
    792   1.5  mrg 	      if (k == 0)
    793   1.5  mrg 		{
    794   1.5  mrg 		  /* T <= P.  */
    795   1.5  mrg 		  if (p < rest)
    796   1.5  mrg 		    p += s + 1;
    797   1.5  mrg 		  else
    798   1.5  mrg 		    p += s;
    799   1.5  mrg 		  if (p == (team->prev_ts.place_partition_off
    800   1.5  mrg 			    + team->prev_ts.place_partition_len))
    801   1.5  mrg 		    p = team->prev_ts.place_partition_off;
    802   1.5  mrg 		  start_data->ts.place_partition_off = p;
    803   1.5  mrg 		  if (p < rest)
    804   1.5  mrg 		    start_data->ts.place_partition_len = s + 1;
    805   1.5  mrg 		  else
    806   1.5  mrg 		    start_data->ts.place_partition_len = s;
    807   1.5  mrg 		}
    808   1.5  mrg 	      else
    809   1.5  mrg 		{
    810   1.5  mrg 		  /* T > P.  */
    811   1.5  mrg 		  if (k == s)
    812   1.5  mrg 		    {
    813   1.5  mrg 		      ++p;
    814   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    815   1.5  mrg 				+ team->prev_ts.place_partition_len))
    816   1.5  mrg 			p = team->prev_ts.place_partition_off;
    817   1.5  mrg 		      k = 1;
    818   1.5  mrg 		      if (i == nthreads - rest)
    819   1.5  mrg 			s = 1;
    820   1.5  mrg 		    }
    821   1.5  mrg 		  else
    822   1.5  mrg 		    ++k;
    823   1.5  mrg 		  start_data->ts.place_partition_off = p;
    824   1.5  mrg 		  start_data->ts.place_partition_len = 1;
    825   1.5  mrg 		}
    826   1.5  mrg 	      break;
    827   1.5  mrg 	    }
    828   1.5  mrg 	  start_data->place = p + 1;
    829   1.5  mrg 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
    830   1.5  mrg 	    continue;
    831   1.5  mrg 	  gomp_init_thread_affinity (attr, p);
    832   1.5  mrg 	}
    833   1.5  mrg 
    834   1.1  mrg       start_data->fn = fn;
    835   1.1  mrg       start_data->fn_data = data;
    836   1.1  mrg       start_data->ts.team = team;
    837   1.1  mrg       start_data->ts.work_share = &team->work_shares[0];
    838   1.1  mrg       start_data->ts.last_work_share = NULL;
    839   1.1  mrg       start_data->ts.team_id = i;
    840   1.1  mrg       start_data->ts.level = team->prev_ts.level + 1;
    841   1.1  mrg       start_data->ts.active_level = thr->ts.active_level;
    842  1.12  mrg       start_data->ts.def_allocator = thr->ts.def_allocator;
    843   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    844   1.1  mrg       start_data->ts.single_count = 0;
    845   1.1  mrg #endif
    846   1.1  mrg       start_data->ts.static_trip = 0;
    847  1.12  mrg       start_data->num_teams = thr->num_teams;
    848  1.12  mrg       start_data->team_num = thr->team_num;
    849   1.1  mrg       start_data->task = &team->implicit_task[i];
    850   1.1  mrg       gomp_init_task (start_data->task, task, icv);
    851   1.3  mrg       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    852   1.5  mrg       team->implicit_task[i].icv.bind_var = bind_var;
    853  1.10  mrg       start_data->task->taskgroup = taskgroup;
    854   1.1  mrg       start_data->thread_pool = pool;
    855   1.1  mrg       start_data->nested = nested;
    856   1.1  mrg 
    857   1.6  mrg       attr = gomp_adjust_thread_attr (attr, &thread_attr);
    858  1.10  mrg       err = pthread_create (&start_data->handle, attr, gomp_thread_start,
    859  1.10  mrg 			    start_data);
    860  1.10  mrg       start_data++;
    861   1.1  mrg       if (err != 0)
    862   1.1  mrg 	gomp_fatal ("Thread creation failed: %s", strerror (err));
    863   1.1  mrg     }
    864   1.1  mrg 
    865   1.6  mrg   if (__builtin_expect (attr == &thread_attr, 0))
    866   1.1  mrg     pthread_attr_destroy (&thread_attr);
    867   1.1  mrg 
    868   1.1  mrg  do_release:
    869   1.8  mrg   if (nested)
    870   1.8  mrg     gomp_barrier_wait (&team->barrier);
    871   1.8  mrg   else
    872   1.8  mrg     gomp_simple_barrier_wait (&pool->threads_dock);
    873   1.1  mrg 
    874   1.1  mrg   /* Decrease the barrier threshold to match the number of threads
    875   1.1  mrg      that should arrive back at the end of this team.  The extra
    876   1.1  mrg      threads should be exiting.  Note that we arrange for this test
    877   1.5  mrg      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
    878   1.5  mrg      the barrier as well as gomp_managed_threads was temporarily
    879   1.5  mrg      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
    880   1.5  mrg      AFFINITY_COUNT if non-zero will be always at least
    881   1.5  mrg      OLD_THREADS_COUNT - NTHREADS.  */
    882   1.5  mrg   if (__builtin_expect (nthreads < old_threads_used, 0)
    883   1.5  mrg       || __builtin_expect (affinity_count, 0))
    884   1.1  mrg     {
    885   1.1  mrg       long diff = (long) nthreads - (long) old_threads_used;
    886   1.1  mrg 
    887   1.5  mrg       if (affinity_count)
    888   1.5  mrg 	diff = -affinity_count;
    889   1.5  mrg 
    890   1.8  mrg       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
    891   1.1  mrg 
    892   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    893   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, diff);
    894   1.1  mrg #else
    895   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    896   1.1  mrg       gomp_managed_threads += diff;
    897   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    898   1.1  mrg #endif
    899   1.1  mrg     }
    900  1.10  mrg   if (__builtin_expect (gomp_display_affinity_var, 0))
    901  1.10  mrg     {
    902  1.10  mrg       if (nested
    903  1.10  mrg 	  || nthreads != old_threads_used
    904  1.10  mrg 	  || force_display)
    905  1.10  mrg 	{
    906  1.10  mrg 	  gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
    907  1.10  mrg 					thr->place);
    908  1.10  mrg 	  if (nested)
    909  1.10  mrg 	    {
    910  1.10  mrg 	      start_data -= nthreads - 1;
    911  1.10  mrg 	      for (i = 1; i < nthreads; ++i)
    912  1.10  mrg 		{
    913  1.10  mrg 		  gomp_display_affinity_thread (
    914  1.10  mrg #ifdef LIBGOMP_USE_PTHREADS
    915  1.10  mrg 						start_data->handle,
    916  1.10  mrg #else
    917  1.10  mrg 						gomp_thread_self (),
    918  1.10  mrg #endif
    919  1.10  mrg 						&start_data->ts,
    920  1.10  mrg 						start_data->place);
    921  1.10  mrg 		  start_data++;
    922  1.10  mrg 		}
    923  1.10  mrg 	    }
    924  1.10  mrg 	  else
    925  1.10  mrg 	    {
    926  1.10  mrg 	      for (i = 1; i < nthreads; ++i)
    927  1.10  mrg 		{
    928  1.10  mrg 		  gomp_thread_handle handle
    929  1.10  mrg 		    = gomp_thread_to_pthread_t (pool->threads[i]);
    930  1.10  mrg 		  gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
    931  1.10  mrg 						pool->threads[i]->place);
    932  1.10  mrg 		}
    933  1.10  mrg 	    }
    934  1.10  mrg 	}
    935  1.10  mrg     }
    936   1.5  mrg   if (__builtin_expect (affinity_thr != NULL, 0)
    937   1.5  mrg       && team->prev_ts.place_partition_len > 64)
    938   1.5  mrg     free (affinity_thr);
    939   1.1  mrg }
    940   1.8  mrg #endif
    941   1.1  mrg 
    942   1.1  mrg 
    943   1.1  mrg /* Terminate the current team.  This is only to be called by the master
    944   1.1  mrg    thread.  We assume that we must wait for the other threads.  */
    945   1.1  mrg 
    946   1.1  mrg void
    947   1.1  mrg gomp_team_end (void)
    948   1.1  mrg {
    949   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    950   1.1  mrg   struct gomp_team *team = thr->ts.team;
    951   1.1  mrg 
    952   1.5  mrg   /* This barrier handles all pending explicit threads.
    953   1.5  mrg      As #pragma omp cancel parallel might get awaited count in
    954   1.5  mrg      team->barrier in a inconsistent state, we need to use a different
    955   1.5  mrg      counter here.  */
    956   1.5  mrg   gomp_team_barrier_wait_final (&team->barrier);
    957   1.5  mrg   if (__builtin_expect (team->team_cancelled, 0))
    958   1.5  mrg     {
    959   1.5  mrg       struct gomp_work_share *ws = team->work_shares_to_free;
    960   1.5  mrg       do
    961   1.5  mrg 	{
    962   1.5  mrg 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
    963   1.5  mrg 	  if (next_ws == NULL)
    964   1.5  mrg 	    gomp_ptrlock_set (&ws->next_ws, ws);
    965   1.5  mrg 	  gomp_fini_work_share (ws);
    966   1.5  mrg 	  ws = next_ws;
    967   1.5  mrg 	}
    968   1.5  mrg       while (ws != NULL);
    969   1.5  mrg     }
    970   1.5  mrg   else
    971   1.5  mrg     gomp_fini_work_share (thr->ts.work_share);
    972   1.1  mrg 
    973   1.1  mrg   gomp_end_task ();
    974   1.1  mrg   thr->ts = team->prev_ts;
    975   1.1  mrg 
    976  1.10  mrg   if (__builtin_expect (thr->ts.level != 0, 0))
    977   1.1  mrg     {
    978   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    979   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
    980   1.1  mrg #else
    981   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    982   1.1  mrg       gomp_managed_threads -= team->nthreads - 1L;
    983   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    984   1.1  mrg #endif
    985   1.1  mrg       /* This barrier has gomp_barrier_wait_last counterparts
    986   1.1  mrg 	 and ensures the team can be safely destroyed.  */
    987   1.1  mrg       gomp_barrier_wait (&team->barrier);
    988   1.1  mrg     }
    989   1.1  mrg 
    990   1.1  mrg   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
    991   1.1  mrg     {
    992   1.1  mrg       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
    993   1.1  mrg       do
    994   1.1  mrg 	{
    995   1.1  mrg 	  struct gomp_work_share *next_ws = ws->next_alloc;
    996   1.1  mrg 	  free (ws);
    997   1.1  mrg 	  ws = next_ws;
    998   1.1  mrg 	}
    999   1.1  mrg       while (ws != NULL);
   1000   1.1  mrg     }
   1001   1.1  mrg   gomp_sem_destroy (&team->master_release);
   1002   1.1  mrg 
   1003   1.1  mrg   if (__builtin_expect (thr->ts.team != NULL, 0)
   1004   1.1  mrg       || __builtin_expect (team->nthreads == 1, 0))
   1005   1.1  mrg     free_team (team);
   1006   1.1  mrg   else
   1007   1.1  mrg     {
   1008   1.1  mrg       struct gomp_thread_pool *pool = thr->thread_pool;
   1009   1.1  mrg       if (pool->last_team)
   1010   1.1  mrg 	free_team (pool->last_team);
   1011   1.1  mrg       pool->last_team = team;
   1012   1.6  mrg       gomp_release_thread_pool (pool);
   1013   1.1  mrg     }
   1014   1.1  mrg }
   1015   1.1  mrg 
   1016   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
   1017   1.1  mrg 
   1018   1.1  mrg /* Constructors for this file.  */
   1019   1.1  mrg 
   1020   1.1  mrg static void __attribute__((constructor))
   1021   1.1  mrg initialize_team (void)
   1022   1.1  mrg {
   1023   1.5  mrg #if !defined HAVE_TLS && !defined USE_EMUTLS
   1024   1.1  mrg   static struct gomp_thread initial_thread_tls_data;
   1025   1.1  mrg 
   1026   1.1  mrg   pthread_key_create (&gomp_tls_key, NULL);
   1027   1.1  mrg   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
   1028   1.1  mrg #endif
   1029   1.1  mrg 
   1030   1.1  mrg   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
   1031   1.1  mrg     gomp_fatal ("could not create thread pool destructor.");
   1032   1.1  mrg }
   1033   1.1  mrg 
   1034   1.1  mrg static void __attribute__((destructor))
   1035   1.1  mrg team_destructor (void)
   1036   1.1  mrg {
   1037   1.1  mrg   /* Without this dlclose on libgomp could lead to subsequent
   1038   1.1  mrg      crashes.  */
   1039   1.1  mrg   pthread_key_delete (gomp_thread_destructor);
   1040   1.1  mrg }
   1041  1.10  mrg 
   1042  1.10  mrg /* Similar to gomp_free_pool_helper, but don't detach itself,
   1043  1.10  mrg    gomp_pause_host will pthread_join those threads.  */
   1044  1.10  mrg 
   1045  1.10  mrg static void
   1046  1.10  mrg gomp_pause_pool_helper (void *thread_pool)
   1047  1.10  mrg {
   1048  1.10  mrg   struct gomp_thread *thr = gomp_thread ();
   1049  1.10  mrg   struct gomp_thread_pool *pool
   1050  1.10  mrg     = (struct gomp_thread_pool *) thread_pool;
   1051  1.10  mrg   gomp_simple_barrier_wait_last (&pool->threads_dock);
   1052  1.10  mrg   gomp_sem_destroy (&thr->release);
   1053  1.10  mrg   thr->thread_pool = NULL;
   1054  1.10  mrg   thr->task = NULL;
   1055  1.10  mrg   pthread_exit (NULL);
   1056  1.10  mrg }
   1057  1.10  mrg 
   1058  1.10  mrg /* Free a thread pool and release its threads.  Return non-zero on
   1059  1.10  mrg    failure.  */
   1060  1.10  mrg 
   1061  1.10  mrg int
   1062  1.10  mrg gomp_pause_host (void)
   1063  1.10  mrg {
   1064  1.10  mrg   struct gomp_thread *thr = gomp_thread ();
   1065  1.10  mrg   struct gomp_thread_pool *pool = thr->thread_pool;
   1066  1.10  mrg   if (thr->ts.level)
   1067  1.10  mrg     return -1;
   1068  1.10  mrg   if (pool)
   1069  1.10  mrg     {
   1070  1.10  mrg       if (pool->threads_used > 0)
   1071  1.10  mrg 	{
   1072  1.10  mrg 	  int i;
   1073  1.10  mrg 	  pthread_t *thrs
   1074  1.10  mrg 	    = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
   1075  1.10  mrg 	  for (i = 1; i < pool->threads_used; i++)
   1076  1.10  mrg 	    {
   1077  1.10  mrg 	      struct gomp_thread *nthr = pool->threads[i];
   1078  1.10  mrg 	      nthr->fn = gomp_pause_pool_helper;
   1079  1.10  mrg 	      nthr->data = pool;
   1080  1.10  mrg 	      thrs[i] = gomp_thread_to_pthread_t (nthr);
   1081  1.10  mrg 	    }
   1082  1.10  mrg 	  /* This barrier undocks threads docked on pool->threads_dock.  */
   1083  1.10  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
   1084  1.10  mrg 	  /* And this waits till all threads have called gomp_barrier_wait_last
   1085  1.10  mrg 	     in gomp_pause_pool_helper.  */
   1086  1.10  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
   1087  1.10  mrg 	  /* Now it is safe to destroy the barrier and free the pool.  */
   1088  1.10  mrg 	  gomp_simple_barrier_destroy (&pool->threads_dock);
   1089  1.10  mrg 
   1090  1.10  mrg #ifdef HAVE_SYNC_BUILTINS
   1091  1.10  mrg 	  __sync_fetch_and_add (&gomp_managed_threads,
   1092  1.10  mrg 				1L - pool->threads_used);
   1093  1.10  mrg #else
   1094  1.10  mrg 	  gomp_mutex_lock (&gomp_managed_threads_lock);
   1095  1.10  mrg 	  gomp_managed_threads -= pool->threads_used - 1L;
   1096  1.10  mrg 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
   1097  1.10  mrg #endif
   1098  1.10  mrg 	  for (i = 1; i < pool->threads_used; i++)
   1099  1.10  mrg 	    pthread_join (thrs[i], NULL);
   1100  1.10  mrg 	}
   1101  1.10  mrg       if (pool->last_team)
   1102  1.10  mrg 	free_team (pool->last_team);
   1103  1.10  mrg #ifndef __nvptx__
   1104  1.11  mrg       team_free (pool->threads);
   1105  1.11  mrg       team_free (pool);
   1106  1.10  mrg #endif
   1107  1.10  mrg       thr->thread_pool = NULL;
   1108  1.10  mrg     }
   1109  1.10  mrg   return 0;
   1110  1.10  mrg }
   1111   1.8  mrg #endif
   1112   1.1  mrg 
   1113   1.1  mrg struct gomp_task_icv *
   1114   1.1  mrg gomp_new_icv (void)
   1115   1.1  mrg {
   1116   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
   1117   1.1  mrg   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
   1118   1.1  mrg   gomp_init_task (task, NULL, &gomp_global_icv);
   1119   1.1  mrg   thr->task = task;
   1120   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
   1121   1.1  mrg   pthread_setspecific (gomp_thread_destructor, thr);
   1122   1.8  mrg #endif
   1123   1.1  mrg   return &task->icv;
   1124   1.1  mrg }
   1125