Home | History | Annotate | Line # | Download | only in libgomp
team.c revision 1.11
      1  1.11  mrg /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
      2   1.1  mrg    Contributed by Richard Henderson <rth (at) redhat.com>.
      3   1.1  mrg 
      4   1.5  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5   1.5  mrg    (libgomp).
      6   1.1  mrg 
      7   1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8   1.1  mrg    under the terms of the GNU General Public License as published by
      9   1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10   1.1  mrg    any later version.
     11   1.1  mrg 
     12   1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13   1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14   1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15   1.1  mrg    more details.
     16   1.1  mrg 
     17   1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18   1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19   1.1  mrg    3.1, as published by the Free Software Foundation.
     20   1.1  mrg 
     21   1.1  mrg    You should have received a copy of the GNU General Public License and
     22   1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23   1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24   1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25   1.1  mrg 
     26  1.11  mrg /* This file handles the maintenance of threads in response to team
     27   1.1  mrg    creation and termination.  */
     28   1.1  mrg 
     29   1.1  mrg #include "libgomp.h"
     30   1.6  mrg #include "pool.h"
     31   1.1  mrg #include <stdlib.h>
     32   1.1  mrg #include <string.h>
     33   1.1  mrg 
     34   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
     35   1.1  mrg pthread_attr_t gomp_thread_attr;
     36   1.1  mrg 
     37   1.1  mrg /* This key is for the thread destructor.  */
     38   1.1  mrg pthread_key_t gomp_thread_destructor;
     39   1.1  mrg 
     40   1.1  mrg 
     41   1.1  mrg /* This is the libgomp per-thread data structure.  */
     42   1.5  mrg #if defined HAVE_TLS || defined USE_EMUTLS
     43   1.1  mrg __thread struct gomp_thread gomp_tls_data;
     44   1.1  mrg #else
     45   1.1  mrg pthread_key_t gomp_tls_key;
     46   1.1  mrg #endif
     47   1.1  mrg 
     48   1.1  mrg 
     49   1.1  mrg /* This structure is used to communicate across pthread_create.  */
     50   1.1  mrg 
     51   1.1  mrg struct gomp_thread_start_data
     52   1.1  mrg {
     53   1.1  mrg   void (*fn) (void *);
     54   1.1  mrg   void *fn_data;
     55   1.1  mrg   struct gomp_team_state ts;
     56   1.1  mrg   struct gomp_task *task;
     57   1.1  mrg   struct gomp_thread_pool *thread_pool;
     58   1.5  mrg   unsigned int place;
     59   1.1  mrg   bool nested;
     60  1.10  mrg   pthread_t handle;
     61   1.1  mrg };
     62   1.1  mrg 
     63   1.1  mrg 
     64   1.1  mrg /* This function is a pthread_create entry point.  This contains the idle
     65   1.1  mrg    loop in which a thread waits to be called up to become part of a team.  */
     66   1.1  mrg 
     67   1.1  mrg static void *
     68   1.1  mrg gomp_thread_start (void *xdata)
     69   1.1  mrg {
     70   1.1  mrg   struct gomp_thread_start_data *data = xdata;
     71   1.1  mrg   struct gomp_thread *thr;
     72   1.1  mrg   struct gomp_thread_pool *pool;
     73   1.1  mrg   void (*local_fn) (void *);
     74   1.1  mrg   void *local_data;
     75   1.1  mrg 
     76   1.5  mrg #if defined HAVE_TLS || defined USE_EMUTLS
     77   1.1  mrg   thr = &gomp_tls_data;
     78   1.1  mrg #else
     79   1.1  mrg   struct gomp_thread local_thr;
     80   1.1  mrg   thr = &local_thr;
     81   1.1  mrg   pthread_setspecific (gomp_tls_key, thr);
     82   1.1  mrg #endif
     83   1.1  mrg   gomp_sem_init (&thr->release, 0);
     84   1.1  mrg 
     85   1.1  mrg   /* Extract what we need from data.  */
     86   1.1  mrg   local_fn = data->fn;
     87   1.1  mrg   local_data = data->fn_data;
     88   1.1  mrg   thr->thread_pool = data->thread_pool;
     89   1.1  mrg   thr->ts = data->ts;
     90   1.1  mrg   thr->task = data->task;
     91   1.5  mrg   thr->place = data->place;
     92  1.10  mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
     93  1.10  mrg   thr->handle = data->handle;
     94  1.10  mrg #endif
     95   1.1  mrg 
     96   1.1  mrg   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
     97   1.1  mrg 
     98   1.1  mrg   /* Make thread pool local. */
     99   1.1  mrg   pool = thr->thread_pool;
    100   1.1  mrg 
    101   1.1  mrg   if (data->nested)
    102   1.1  mrg     {
    103   1.1  mrg       struct gomp_team *team = thr->ts.team;
    104   1.1  mrg       struct gomp_task *task = thr->task;
    105   1.1  mrg 
    106   1.1  mrg       gomp_barrier_wait (&team->barrier);
    107   1.1  mrg 
    108   1.1  mrg       local_fn (local_data);
    109   1.5  mrg       gomp_team_barrier_wait_final (&team->barrier);
    110   1.1  mrg       gomp_finish_task (task);
    111   1.1  mrg       gomp_barrier_wait_last (&team->barrier);
    112   1.1  mrg     }
    113   1.1  mrg   else
    114   1.1  mrg     {
    115   1.1  mrg       pool->threads[thr->ts.team_id] = thr;
    116   1.1  mrg 
    117   1.8  mrg       gomp_simple_barrier_wait (&pool->threads_dock);
    118   1.1  mrg       do
    119   1.1  mrg 	{
    120   1.1  mrg 	  struct gomp_team *team = thr->ts.team;
    121   1.1  mrg 	  struct gomp_task *task = thr->task;
    122   1.1  mrg 
    123   1.1  mrg 	  local_fn (local_data);
    124   1.5  mrg 	  gomp_team_barrier_wait_final (&team->barrier);
    125   1.1  mrg 	  gomp_finish_task (task);
    126   1.1  mrg 
    127   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    128   1.1  mrg 
    129   1.1  mrg 	  local_fn = thr->fn;
    130   1.1  mrg 	  local_data = thr->data;
    131   1.1  mrg 	  thr->fn = NULL;
    132   1.1  mrg 	}
    133   1.1  mrg       while (local_fn);
    134   1.1  mrg     }
    135   1.1  mrg 
    136   1.1  mrg   gomp_sem_destroy (&thr->release);
    137  1.10  mrg   pthread_detach (pthread_self ());
    138   1.5  mrg   thr->thread_pool = NULL;
    139   1.5  mrg   thr->task = NULL;
    140   1.1  mrg   return NULL;
    141   1.1  mrg }
    142   1.8  mrg #endif
    143   1.1  mrg 
    144   1.6  mrg static inline struct gomp_team *
    145   1.6  mrg get_last_team (unsigned nthreads)
    146   1.6  mrg {
    147   1.6  mrg   struct gomp_thread *thr = gomp_thread ();
    148   1.6  mrg   if (thr->ts.team == NULL)
    149   1.6  mrg     {
    150   1.6  mrg       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
    151   1.6  mrg       struct gomp_team *last_team = pool->last_team;
    152   1.6  mrg       if (last_team != NULL && last_team->nthreads == nthreads)
    153   1.6  mrg         {
    154   1.6  mrg           pool->last_team = NULL;
    155   1.6  mrg           return last_team;
    156   1.6  mrg         }
    157   1.6  mrg     }
    158   1.6  mrg   return NULL;
    159   1.6  mrg }
    160   1.1  mrg 
    161   1.1  mrg /* Create a new team data structure.  */
    162   1.1  mrg 
    163   1.1  mrg struct gomp_team *
    164   1.1  mrg gomp_new_team (unsigned nthreads)
    165   1.1  mrg {
    166   1.1  mrg   struct gomp_team *team;
    167   1.1  mrg   int i;
    168   1.1  mrg 
    169   1.6  mrg   team = get_last_team (nthreads);
    170   1.6  mrg   if (team == NULL)
    171   1.6  mrg     {
    172   1.6  mrg       size_t extra = sizeof (team->ordered_release[0])
    173   1.6  mrg 		     + sizeof (team->implicit_task[0]);
    174  1.11  mrg       team = team_malloc (sizeof (*team) + nthreads * extra);
    175   1.6  mrg 
    176   1.6  mrg #ifndef HAVE_SYNC_BUILTINS
    177   1.6  mrg       gomp_mutex_init (&team->work_share_list_free_lock);
    178   1.6  mrg #endif
    179   1.6  mrg       gomp_barrier_init (&team->barrier, nthreads);
    180   1.6  mrg       gomp_mutex_init (&team->task_lock);
    181   1.6  mrg 
    182   1.6  mrg       team->nthreads = nthreads;
    183   1.6  mrg     }
    184   1.1  mrg 
    185   1.1  mrg   team->work_share_chunk = 8;
    186   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    187   1.1  mrg   team->single_count = 0;
    188   1.1  mrg #endif
    189   1.5  mrg   team->work_shares_to_free = &team->work_shares[0];
    190  1.10  mrg   gomp_init_work_share (&team->work_shares[0], 0, nthreads);
    191   1.1  mrg   team->work_shares[0].next_alloc = NULL;
    192   1.1  mrg   team->work_share_list_free = NULL;
    193   1.1  mrg   team->work_share_list_alloc = &team->work_shares[1];
    194   1.1  mrg   for (i = 1; i < 7; i++)
    195   1.1  mrg     team->work_shares[i].next_free = &team->work_shares[i + 1];
    196   1.1  mrg   team->work_shares[i].next_free = NULL;
    197   1.1  mrg 
    198   1.1  mrg   gomp_sem_init (&team->master_release, 0);
    199   1.1  mrg   team->ordered_release = (void *) &team->implicit_task[nthreads];
    200   1.1  mrg   team->ordered_release[0] = &team->master_release;
    201   1.1  mrg 
    202   1.6  mrg   priority_queue_init (&team->task_queue);
    203   1.1  mrg   team->task_count = 0;
    204   1.5  mrg   team->task_queued_count = 0;
    205   1.1  mrg   team->task_running_count = 0;
    206   1.5  mrg   team->work_share_cancelled = 0;
    207   1.5  mrg   team->team_cancelled = 0;
    208   1.1  mrg 
    209   1.1  mrg   return team;
    210   1.1  mrg }
    211   1.1  mrg 
    212   1.1  mrg 
    213   1.1  mrg /* Free a team data structure.  */
    214   1.1  mrg 
    215   1.1  mrg static void
    216   1.1  mrg free_team (struct gomp_team *team)
    217   1.1  mrg {
    218   1.6  mrg #ifndef HAVE_SYNC_BUILTINS
    219   1.6  mrg   gomp_mutex_destroy (&team->work_share_list_free_lock);
    220   1.6  mrg #endif
    221   1.1  mrg   gomp_barrier_destroy (&team->barrier);
    222   1.1  mrg   gomp_mutex_destroy (&team->task_lock);
    223   1.6  mrg   priority_queue_free (&team->task_queue);
    224  1.11  mrg   team_free (team);
    225   1.1  mrg }
    226   1.1  mrg 
    227   1.1  mrg static void
    228   1.1  mrg gomp_free_pool_helper (void *thread_pool)
    229   1.1  mrg {
    230   1.5  mrg   struct gomp_thread *thr = gomp_thread ();
    231   1.1  mrg   struct gomp_thread_pool *pool
    232   1.1  mrg     = (struct gomp_thread_pool *) thread_pool;
    233   1.8  mrg   gomp_simple_barrier_wait_last (&pool->threads_dock);
    234   1.5  mrg   gomp_sem_destroy (&thr->release);
    235   1.5  mrg   thr->thread_pool = NULL;
    236   1.5  mrg   thr->task = NULL;
    237   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
    238  1.10  mrg   pthread_detach (pthread_self ());
    239   1.1  mrg   pthread_exit (NULL);
    240   1.8  mrg #elif defined(__nvptx__)
    241   1.8  mrg   asm ("exit;");
    242  1.11  mrg #elif defined(__AMDGCN__)
    243  1.11  mrg   asm ("s_dcache_wb\n\t"
    244  1.11  mrg        "s_endpgm");
    245   1.8  mrg #else
    246   1.8  mrg #error gomp_free_pool_helper must terminate the thread
    247   1.8  mrg #endif
    248   1.1  mrg }
    249   1.1  mrg 
    250   1.1  mrg /* Free a thread pool and release its threads. */
    251   1.1  mrg 
    252   1.5  mrg void
    253   1.1  mrg gomp_free_thread (void *arg __attribute__((unused)))
    254   1.1  mrg {
    255   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    256   1.1  mrg   struct gomp_thread_pool *pool = thr->thread_pool;
    257   1.1  mrg   if (pool)
    258   1.1  mrg     {
    259   1.1  mrg       if (pool->threads_used > 0)
    260   1.1  mrg 	{
    261   1.1  mrg 	  int i;
    262   1.1  mrg 	  for (i = 1; i < pool->threads_used; i++)
    263   1.1  mrg 	    {
    264   1.1  mrg 	      struct gomp_thread *nthr = pool->threads[i];
    265   1.1  mrg 	      nthr->fn = gomp_free_pool_helper;
    266   1.1  mrg 	      nthr->data = pool;
    267   1.1  mrg 	    }
    268   1.1  mrg 	  /* This barrier undocks threads docked on pool->threads_dock.  */
    269   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    270   1.1  mrg 	  /* And this waits till all threads have called gomp_barrier_wait_last
    271   1.1  mrg 	     in gomp_free_pool_helper.  */
    272   1.8  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
    273   1.1  mrg 	  /* Now it is safe to destroy the barrier and free the pool.  */
    274   1.8  mrg 	  gomp_simple_barrier_destroy (&pool->threads_dock);
    275   1.3  mrg 
    276   1.3  mrg #ifdef HAVE_SYNC_BUILTINS
    277   1.3  mrg 	  __sync_fetch_and_add (&gomp_managed_threads,
    278   1.3  mrg 				1L - pool->threads_used);
    279   1.3  mrg #else
    280   1.5  mrg 	  gomp_mutex_lock (&gomp_managed_threads_lock);
    281   1.3  mrg 	  gomp_managed_threads -= pool->threads_used - 1L;
    282   1.5  mrg 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
    283   1.3  mrg #endif
    284   1.1  mrg 	}
    285   1.1  mrg       if (pool->last_team)
    286   1.1  mrg 	free_team (pool->last_team);
    287   1.8  mrg #ifndef __nvptx__
    288  1.11  mrg       team_free (pool->threads);
    289  1.11  mrg       team_free (pool);
    290   1.8  mrg #endif
    291   1.1  mrg       thr->thread_pool = NULL;
    292   1.1  mrg     }
    293   1.6  mrg   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
    294   1.6  mrg     gomp_team_end ();
    295   1.1  mrg   if (thr->task != NULL)
    296   1.1  mrg     {
    297   1.1  mrg       struct gomp_task *task = thr->task;
    298   1.1  mrg       gomp_end_task ();
    299   1.1  mrg       free (task);
    300   1.1  mrg     }
    301   1.1  mrg }
    302   1.1  mrg 
    303   1.1  mrg /* Launch a team.  */
    304   1.1  mrg 
    305   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
    306   1.1  mrg void
    307   1.1  mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    308  1.10  mrg 		 unsigned flags, struct gomp_team *team,
    309  1.10  mrg 		 struct gomp_taskgroup *taskgroup)
    310   1.1  mrg {
    311   1.1  mrg   struct gomp_thread_start_data *start_data;
    312   1.1  mrg   struct gomp_thread *thr, *nthr;
    313   1.1  mrg   struct gomp_task *task;
    314   1.1  mrg   struct gomp_task_icv *icv;
    315   1.1  mrg   bool nested;
    316   1.1  mrg   struct gomp_thread_pool *pool;
    317   1.1  mrg   unsigned i, n, old_threads_used = 0;
    318   1.1  mrg   pthread_attr_t thread_attr, *attr;
    319   1.3  mrg   unsigned long nthreads_var;
    320   1.5  mrg   char bind, bind_var;
    321   1.5  mrg   unsigned int s = 0, rest = 0, p = 0, k = 0;
    322   1.5  mrg   unsigned int affinity_count = 0;
    323   1.5  mrg   struct gomp_thread **affinity_thr = NULL;
    324  1.10  mrg   bool force_display = false;
    325   1.1  mrg 
    326   1.1  mrg   thr = gomp_thread ();
    327   1.6  mrg   nested = thr->ts.level;
    328   1.1  mrg   pool = thr->thread_pool;
    329   1.1  mrg   task = thr->task;
    330   1.1  mrg   icv = task ? &task->icv : &gomp_global_icv;
    331   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
    332  1.10  mrg     {
    333  1.10  mrg       gomp_init_affinity ();
    334  1.10  mrg       if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
    335  1.10  mrg 	gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
    336  1.10  mrg 				      thr->place);
    337  1.10  mrg     }
    338   1.1  mrg 
    339   1.1  mrg   /* Always save the previous state, even if this isn't a nested team.
    340   1.1  mrg      In particular, we should save any work share state from an outer
    341   1.1  mrg      orphaned work share construct.  */
    342   1.1  mrg   team->prev_ts = thr->ts;
    343   1.1  mrg 
    344   1.1  mrg   thr->ts.team = team;
    345   1.1  mrg   thr->ts.team_id = 0;
    346   1.1  mrg   ++thr->ts.level;
    347   1.1  mrg   if (nthreads > 1)
    348   1.1  mrg     ++thr->ts.active_level;
    349   1.1  mrg   thr->ts.work_share = &team->work_shares[0];
    350   1.1  mrg   thr->ts.last_work_share = NULL;
    351   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    352   1.1  mrg   thr->ts.single_count = 0;
    353   1.1  mrg #endif
    354   1.1  mrg   thr->ts.static_trip = 0;
    355   1.1  mrg   thr->task = &team->implicit_task[0];
    356  1.10  mrg #ifdef GOMP_NEEDS_THREAD_HANDLE
    357  1.10  mrg   thr->handle = pthread_self ();
    358  1.10  mrg #endif
    359   1.3  mrg   nthreads_var = icv->nthreads_var;
    360   1.3  mrg   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
    361   1.3  mrg       && thr->ts.level < gomp_nthreads_var_list_len)
    362   1.3  mrg     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
    363   1.5  mrg   bind_var = icv->bind_var;
    364   1.5  mrg   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
    365   1.5  mrg     bind_var = flags & 7;
    366   1.5  mrg   bind = bind_var;
    367   1.5  mrg   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
    368   1.5  mrg       && thr->ts.level < gomp_bind_var_list_len)
    369   1.5  mrg     bind_var = gomp_bind_var_list[thr->ts.level];
    370   1.1  mrg   gomp_init_task (thr->task, task, icv);
    371  1.10  mrg   thr->task->taskgroup = taskgroup;
    372   1.3  mrg   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    373   1.5  mrg   team->implicit_task[0].icv.bind_var = bind_var;
    374   1.1  mrg 
    375   1.1  mrg   if (nthreads == 1)
    376   1.1  mrg     return;
    377   1.1  mrg 
    378   1.1  mrg   i = 1;
    379   1.1  mrg 
    380   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0))
    381   1.5  mrg     {
    382   1.5  mrg       /* Depending on chosen proc_bind model, set subpartition
    383   1.5  mrg 	 for the master thread and initialize helper variables
    384   1.5  mrg 	 P and optionally S, K and/or REST used by later place
    385   1.5  mrg 	 computation for each additional thread.  */
    386   1.5  mrg       p = thr->place - 1;
    387   1.5  mrg       switch (bind)
    388   1.5  mrg 	{
    389   1.5  mrg 	case omp_proc_bind_true:
    390   1.5  mrg 	case omp_proc_bind_close:
    391   1.5  mrg 	  if (nthreads > thr->ts.place_partition_len)
    392   1.5  mrg 	    {
    393   1.5  mrg 	      /* T > P.  S threads will be placed in each place,
    394   1.5  mrg 		 and the final REM threads placed one by one
    395   1.5  mrg 		 into the already occupied places.  */
    396   1.5  mrg 	      s = nthreads / thr->ts.place_partition_len;
    397   1.5  mrg 	      rest = nthreads % thr->ts.place_partition_len;
    398   1.5  mrg 	    }
    399   1.5  mrg 	  else
    400   1.5  mrg 	    s = 1;
    401   1.5  mrg 	  k = 1;
    402   1.5  mrg 	  break;
    403   1.5  mrg 	case omp_proc_bind_master:
    404   1.5  mrg 	  /* Each thread will be bound to master's place.  */
    405   1.5  mrg 	  break;
    406   1.5  mrg 	case omp_proc_bind_spread:
    407   1.5  mrg 	  if (nthreads <= thr->ts.place_partition_len)
    408   1.5  mrg 	    {
    409   1.5  mrg 	      /* T <= P.  Each subpartition will have in between s
    410   1.5  mrg 		 and s+1 places (subpartitions starting at or
    411   1.5  mrg 		 after rest will have s places, earlier s+1 places),
    412   1.5  mrg 		 each thread will be bound to the first place in
    413   1.5  mrg 		 its subpartition (except for the master thread
    414   1.5  mrg 		 that can be bound to another place in its
    415   1.5  mrg 		 subpartition).  */
    416   1.5  mrg 	      s = thr->ts.place_partition_len / nthreads;
    417   1.5  mrg 	      rest = thr->ts.place_partition_len % nthreads;
    418   1.5  mrg 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
    419   1.5  mrg 	      if (p < rest)
    420   1.5  mrg 		{
    421   1.5  mrg 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
    422   1.5  mrg 		  thr->ts.place_partition_len = s + 1;
    423   1.5  mrg 		}
    424   1.5  mrg 	      else
    425   1.5  mrg 		{
    426   1.5  mrg 		  p -= (p - rest) % s;
    427   1.5  mrg 		  thr->ts.place_partition_len = s;
    428   1.5  mrg 		}
    429   1.5  mrg 	      thr->ts.place_partition_off = p;
    430   1.5  mrg 	    }
    431   1.5  mrg 	  else
    432   1.5  mrg 	    {
    433   1.5  mrg 	      /* T > P.  Each subpartition will have just a single
    434   1.5  mrg 		 place and we'll place between s and s+1
    435   1.5  mrg 		 threads into each subpartition.  */
    436   1.5  mrg 	      s = nthreads / thr->ts.place_partition_len;
    437   1.5  mrg 	      rest = nthreads % thr->ts.place_partition_len;
    438   1.5  mrg 	      thr->ts.place_partition_off = p;
    439   1.5  mrg 	      thr->ts.place_partition_len = 1;
    440   1.5  mrg 	      k = 1;
    441   1.5  mrg 	    }
    442   1.5  mrg 	  break;
    443   1.5  mrg 	}
    444   1.5  mrg     }
    445   1.5  mrg   else
    446   1.5  mrg     bind = omp_proc_bind_false;
    447   1.5  mrg 
    448   1.1  mrg   /* We only allow the reuse of idle threads for non-nested PARALLEL
    449   1.1  mrg      regions.  This appears to be implied by the semantics of
    450   1.1  mrg      threadprivate variables, but perhaps that's reading too much into
    451   1.1  mrg      things.  Certainly it does prevent any locking problems, since
    452   1.1  mrg      only the initial program thread will modify gomp_threads.  */
    453   1.1  mrg   if (!nested)
    454   1.1  mrg     {
    455   1.1  mrg       old_threads_used = pool->threads_used;
    456   1.1  mrg 
    457   1.1  mrg       if (nthreads <= old_threads_used)
    458   1.1  mrg 	n = nthreads;
    459   1.1  mrg       else if (old_threads_used == 0)
    460   1.1  mrg 	{
    461   1.1  mrg 	  n = 0;
    462   1.8  mrg 	  gomp_simple_barrier_init (&pool->threads_dock, nthreads);
    463   1.1  mrg 	}
    464   1.1  mrg       else
    465   1.1  mrg 	{
    466   1.1  mrg 	  n = old_threads_used;
    467   1.1  mrg 
    468   1.1  mrg 	  /* Increase the barrier threshold to make sure all new
    469   1.1  mrg 	     threads arrive before the team is released.  */
    470   1.8  mrg 	  gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
    471   1.1  mrg 	}
    472   1.1  mrg 
    473   1.1  mrg       /* Not true yet, but soon will be.  We're going to release all
    474   1.1  mrg 	 threads from the dock, and those that aren't part of the
    475   1.1  mrg 	 team will exit.  */
    476   1.1  mrg       pool->threads_used = nthreads;
    477   1.1  mrg 
    478   1.5  mrg       /* If necessary, expand the size of the gomp_threads array.  It is
    479   1.5  mrg 	 expected that changes in the number of threads are rare, thus we
    480   1.5  mrg 	 make no effort to expand gomp_threads_size geometrically.  */
    481   1.5  mrg       if (nthreads >= pool->threads_size)
    482   1.5  mrg 	{
    483   1.5  mrg 	  pool->threads_size = nthreads + 1;
    484   1.5  mrg 	  pool->threads
    485   1.5  mrg 	    = gomp_realloc (pool->threads,
    486   1.5  mrg 			    pool->threads_size
    487  1.10  mrg 			    * sizeof (struct gomp_thread *));
    488  1.10  mrg 	  /* Add current (master) thread to threads[].  */
    489  1.10  mrg 	  pool->threads[0] = thr;
    490   1.5  mrg 	}
    491   1.5  mrg 
    492   1.1  mrg       /* Release existing idle threads.  */
    493   1.1  mrg       for (; i < n; ++i)
    494   1.1  mrg 	{
    495   1.5  mrg 	  unsigned int place_partition_off = thr->ts.place_partition_off;
    496   1.5  mrg 	  unsigned int place_partition_len = thr->ts.place_partition_len;
    497   1.5  mrg 	  unsigned int place = 0;
    498   1.5  mrg 	  if (__builtin_expect (gomp_places_list != NULL, 0))
    499   1.5  mrg 	    {
    500   1.5  mrg 	      switch (bind)
    501   1.5  mrg 		{
    502   1.5  mrg 		case omp_proc_bind_true:
    503   1.5  mrg 		case omp_proc_bind_close:
    504   1.5  mrg 		  if (k == s)
    505   1.5  mrg 		    {
    506   1.5  mrg 		      ++p;
    507   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    508   1.5  mrg 				+ team->prev_ts.place_partition_len))
    509   1.5  mrg 			p = team->prev_ts.place_partition_off;
    510   1.5  mrg 		      k = 1;
    511   1.5  mrg 		      if (i == nthreads - rest)
    512   1.5  mrg 			s = 1;
    513   1.5  mrg 		    }
    514   1.5  mrg 		  else
    515   1.5  mrg 		    ++k;
    516   1.5  mrg 		  break;
    517   1.5  mrg 		case omp_proc_bind_master:
    518   1.5  mrg 		  break;
    519   1.5  mrg 		case omp_proc_bind_spread:
    520   1.5  mrg 		  if (k == 0)
    521   1.5  mrg 		    {
    522   1.5  mrg 		      /* T <= P.  */
    523   1.5  mrg 		      if (p < rest)
    524   1.5  mrg 			p += s + 1;
    525   1.5  mrg 		      else
    526   1.5  mrg 			p += s;
    527   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    528   1.5  mrg 				+ team->prev_ts.place_partition_len))
    529   1.5  mrg 			p = team->prev_ts.place_partition_off;
    530   1.5  mrg 		      place_partition_off = p;
    531   1.5  mrg 		      if (p < rest)
    532   1.5  mrg 			place_partition_len = s + 1;
    533   1.5  mrg 		      else
    534   1.5  mrg 			place_partition_len = s;
    535   1.5  mrg 		    }
    536   1.5  mrg 		  else
    537   1.5  mrg 		    {
    538   1.5  mrg 		      /* T > P.  */
    539   1.5  mrg 		      if (k == s)
    540   1.5  mrg 			{
    541   1.5  mrg 			  ++p;
    542   1.5  mrg 			  if (p == (team->prev_ts.place_partition_off
    543   1.5  mrg 				    + team->prev_ts.place_partition_len))
    544   1.5  mrg 			    p = team->prev_ts.place_partition_off;
    545   1.5  mrg 			  k = 1;
    546   1.5  mrg 			  if (i == nthreads - rest)
    547   1.5  mrg 			    s = 1;
    548   1.5  mrg 			}
    549   1.5  mrg 		      else
    550   1.5  mrg 			++k;
    551   1.5  mrg 		      place_partition_off = p;
    552   1.5  mrg 		      place_partition_len = 1;
    553   1.5  mrg 		    }
    554   1.5  mrg 		  break;
    555   1.5  mrg 		}
    556   1.5  mrg 	      if (affinity_thr != NULL
    557   1.5  mrg 		  || (bind != omp_proc_bind_true
    558   1.5  mrg 		      && pool->threads[i]->place != p + 1)
    559   1.5  mrg 		  || pool->threads[i]->place <= place_partition_off
    560   1.5  mrg 		  || pool->threads[i]->place > (place_partition_off
    561   1.5  mrg 						+ place_partition_len))
    562   1.5  mrg 		{
    563   1.5  mrg 		  unsigned int l;
    564  1.10  mrg 		  force_display = true;
    565   1.5  mrg 		  if (affinity_thr == NULL)
    566   1.5  mrg 		    {
    567   1.5  mrg 		      unsigned int j;
    568   1.5  mrg 
    569   1.5  mrg 		      if (team->prev_ts.place_partition_len > 64)
    570   1.5  mrg 			affinity_thr
    571   1.5  mrg 			  = gomp_malloc (team->prev_ts.place_partition_len
    572   1.5  mrg 					 * sizeof (struct gomp_thread *));
    573   1.5  mrg 		      else
    574   1.5  mrg 			affinity_thr
    575   1.5  mrg 			  = gomp_alloca (team->prev_ts.place_partition_len
    576   1.5  mrg 					 * sizeof (struct gomp_thread *));
    577   1.5  mrg 		      memset (affinity_thr, '\0',
    578   1.5  mrg 			      team->prev_ts.place_partition_len
    579   1.5  mrg 			      * sizeof (struct gomp_thread *));
    580   1.5  mrg 		      for (j = i; j < old_threads_used; j++)
    581   1.5  mrg 			{
    582   1.5  mrg 			  if (pool->threads[j]->place
    583   1.5  mrg 			      > team->prev_ts.place_partition_off
    584   1.5  mrg 			      && (pool->threads[j]->place
    585   1.5  mrg 				  <= (team->prev_ts.place_partition_off
    586   1.5  mrg 				      + team->prev_ts.place_partition_len)))
    587   1.5  mrg 			    {
    588   1.5  mrg 			      l = pool->threads[j]->place - 1
    589   1.5  mrg 				  - team->prev_ts.place_partition_off;
    590   1.5  mrg 			      pool->threads[j]->data = affinity_thr[l];
    591   1.5  mrg 			      affinity_thr[l] = pool->threads[j];
    592   1.5  mrg 			    }
    593   1.5  mrg 			  pool->threads[j] = NULL;
    594   1.5  mrg 			}
    595   1.5  mrg 		      if (nthreads > old_threads_used)
    596   1.5  mrg 			memset (&pool->threads[old_threads_used],
    597   1.5  mrg 				'\0', ((nthreads - old_threads_used)
    598   1.5  mrg 				       * sizeof (struct gomp_thread *)));
    599   1.5  mrg 		      n = nthreads;
    600   1.5  mrg 		      affinity_count = old_threads_used - i;
    601   1.5  mrg 		    }
    602   1.5  mrg 		  if (affinity_count == 0)
    603   1.5  mrg 		    break;
    604   1.5  mrg 		  l = p;
    605   1.5  mrg 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
    606   1.5  mrg 		      == NULL)
    607   1.5  mrg 		    {
    608   1.5  mrg 		      if (bind != omp_proc_bind_true)
    609   1.5  mrg 			continue;
    610   1.5  mrg 		      for (l = place_partition_off;
    611   1.5  mrg 			   l < place_partition_off + place_partition_len;
    612   1.5  mrg 			   l++)
    613   1.5  mrg 			if (affinity_thr[l - team->prev_ts.place_partition_off]
    614   1.5  mrg 			    != NULL)
    615   1.5  mrg 			  break;
    616   1.5  mrg 		      if (l == place_partition_off + place_partition_len)
    617   1.5  mrg 			continue;
    618   1.5  mrg 		    }
    619   1.5  mrg 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
    620   1.5  mrg 		  affinity_thr[l - team->prev_ts.place_partition_off]
    621   1.5  mrg 		    = (struct gomp_thread *) nthr->data;
    622   1.5  mrg 		  affinity_count--;
    623   1.5  mrg 		  pool->threads[i] = nthr;
    624   1.5  mrg 		}
    625   1.5  mrg 	      else
    626   1.5  mrg 		nthr = pool->threads[i];
    627   1.5  mrg 	      place = p + 1;
    628   1.5  mrg 	    }
    629   1.5  mrg 	  else
    630   1.5  mrg 	    nthr = pool->threads[i];
    631   1.1  mrg 	  nthr->ts.team = team;
    632   1.1  mrg 	  nthr->ts.work_share = &team->work_shares[0];
    633   1.1  mrg 	  nthr->ts.last_work_share = NULL;
    634   1.1  mrg 	  nthr->ts.team_id = i;
    635   1.1  mrg 	  nthr->ts.level = team->prev_ts.level + 1;
    636   1.1  mrg 	  nthr->ts.active_level = thr->ts.active_level;
    637   1.5  mrg 	  nthr->ts.place_partition_off = place_partition_off;
    638   1.5  mrg 	  nthr->ts.place_partition_len = place_partition_len;
    639   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    640   1.1  mrg 	  nthr->ts.single_count = 0;
    641   1.1  mrg #endif
    642   1.1  mrg 	  nthr->ts.static_trip = 0;
    643   1.1  mrg 	  nthr->task = &team->implicit_task[i];
    644   1.5  mrg 	  nthr->place = place;
    645   1.1  mrg 	  gomp_init_task (nthr->task, task, icv);
    646   1.3  mrg 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
    647   1.5  mrg 	  team->implicit_task[i].icv.bind_var = bind_var;
    648  1.10  mrg 	  nthr->task->taskgroup = taskgroup;
    649   1.1  mrg 	  nthr->fn = fn;
    650   1.1  mrg 	  nthr->data = data;
    651   1.1  mrg 	  team->ordered_release[i] = &nthr->release;
    652   1.1  mrg 	}
    653   1.1  mrg 
    654   1.5  mrg       if (__builtin_expect (affinity_thr != NULL, 0))
    655   1.5  mrg 	{
    656   1.5  mrg 	  /* If AFFINITY_THR is non-NULL just because we had to
    657   1.5  mrg 	     permute some threads in the pool, but we've managed
    658   1.5  mrg 	     to find exactly as many old threads as we'd find
    659   1.5  mrg 	     without affinity, we don't need to handle this
    660   1.5  mrg 	     specially anymore.  */
    661   1.5  mrg 	  if (nthreads <= old_threads_used
    662   1.5  mrg 	      ? (affinity_count == old_threads_used - nthreads)
    663   1.5  mrg 	      : (i == old_threads_used))
    664   1.5  mrg 	    {
    665   1.5  mrg 	      if (team->prev_ts.place_partition_len > 64)
    666   1.5  mrg 		free (affinity_thr);
    667   1.5  mrg 	      affinity_thr = NULL;
    668   1.5  mrg 	      affinity_count = 0;
    669   1.5  mrg 	    }
    670   1.5  mrg 	  else
    671   1.5  mrg 	    {
    672   1.5  mrg 	      i = 1;
    673   1.5  mrg 	      /* We are going to compute the places/subpartitions
    674   1.5  mrg 		 again from the beginning.  So, we need to reinitialize
    675   1.5  mrg 		 vars modified by the switch (bind) above inside
    676   1.5  mrg 		 of the loop, to the state they had after the initial
    677   1.5  mrg 		 switch (bind).  */
    678   1.5  mrg 	      switch (bind)
    679   1.5  mrg 		{
    680   1.5  mrg 		case omp_proc_bind_true:
    681   1.5  mrg 		case omp_proc_bind_close:
    682   1.5  mrg 		  if (nthreads > thr->ts.place_partition_len)
    683   1.5  mrg 		    /* T > P.  S has been changed, so needs
    684   1.5  mrg 		       to be recomputed.  */
    685   1.5  mrg 		    s = nthreads / thr->ts.place_partition_len;
    686   1.5  mrg 		  k = 1;
    687   1.5  mrg 		  p = thr->place - 1;
    688   1.5  mrg 		  break;
    689   1.5  mrg 		case omp_proc_bind_master:
    690   1.5  mrg 		  /* No vars have been changed.  */
    691   1.5  mrg 		  break;
    692   1.5  mrg 		case omp_proc_bind_spread:
    693   1.5  mrg 		  p = thr->ts.place_partition_off;
    694   1.5  mrg 		  if (k != 0)
    695   1.5  mrg 		    {
    696   1.5  mrg 		      /* T > P.  */
    697   1.5  mrg 		      s = nthreads / team->prev_ts.place_partition_len;
    698   1.5  mrg 		      k = 1;
    699   1.5  mrg 		    }
    700   1.5  mrg 		  break;
    701   1.5  mrg 		}
    702   1.5  mrg 
    703   1.5  mrg 	      /* Increase the barrier threshold to make sure all new
    704   1.5  mrg 		 threads and all the threads we're going to let die
    705   1.5  mrg 		 arrive before the team is released.  */
    706   1.5  mrg 	      if (affinity_count)
    707   1.8  mrg 		gomp_simple_barrier_reinit (&pool->threads_dock,
    708   1.8  mrg 					    nthreads + affinity_count);
    709   1.5  mrg 	    }
    710   1.5  mrg 	}
    711   1.5  mrg 
    712   1.1  mrg       if (i == nthreads)
    713   1.1  mrg 	goto do_release;
    714   1.1  mrg 
    715   1.1  mrg     }
    716   1.1  mrg 
    717   1.5  mrg   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
    718   1.1  mrg     {
    719   1.5  mrg       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
    720   1.1  mrg 
    721   1.1  mrg       if (old_threads_used == 0)
    722   1.1  mrg 	--diff;
    723   1.1  mrg 
    724   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    725   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, diff);
    726   1.1  mrg #else
    727   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    728   1.1  mrg       gomp_managed_threads += diff;
    729   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    730   1.1  mrg #endif
    731   1.1  mrg     }
    732   1.1  mrg 
    733   1.1  mrg   attr = &gomp_thread_attr;
    734   1.5  mrg   if (__builtin_expect (gomp_places_list != NULL, 0))
    735   1.1  mrg     {
    736   1.1  mrg       size_t stacksize;
    737   1.1  mrg       pthread_attr_init (&thread_attr);
    738   1.1  mrg       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
    739   1.1  mrg 	pthread_attr_setstacksize (&thread_attr, stacksize);
    740   1.1  mrg       attr = &thread_attr;
    741   1.1  mrg     }
    742   1.1  mrg 
    743   1.1  mrg   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
    744  1.10  mrg 			    * (nthreads - i));
    745   1.1  mrg 
    746   1.1  mrg   /* Launch new threads.  */
    747   1.5  mrg   for (; i < nthreads; ++i)
    748   1.1  mrg     {
    749   1.1  mrg       int err;
    750   1.1  mrg 
    751   1.5  mrg       start_data->ts.place_partition_off = thr->ts.place_partition_off;
    752   1.5  mrg       start_data->ts.place_partition_len = thr->ts.place_partition_len;
    753   1.5  mrg       start_data->place = 0;
    754   1.5  mrg       if (__builtin_expect (gomp_places_list != NULL, 0))
    755   1.5  mrg 	{
    756   1.5  mrg 	  switch (bind)
    757   1.5  mrg 	    {
    758   1.5  mrg 	    case omp_proc_bind_true:
    759   1.5  mrg 	    case omp_proc_bind_close:
    760   1.5  mrg 	      if (k == s)
    761   1.5  mrg 		{
    762   1.5  mrg 		  ++p;
    763   1.5  mrg 		  if (p == (team->prev_ts.place_partition_off
    764   1.5  mrg 			    + team->prev_ts.place_partition_len))
    765   1.5  mrg 		    p = team->prev_ts.place_partition_off;
    766   1.5  mrg 		  k = 1;
    767   1.5  mrg 		  if (i == nthreads - rest)
    768   1.5  mrg 		    s = 1;
    769   1.5  mrg 		}
    770   1.5  mrg 	      else
    771   1.5  mrg 		++k;
    772   1.5  mrg 	      break;
    773   1.5  mrg 	    case omp_proc_bind_master:
    774   1.5  mrg 	      break;
    775   1.5  mrg 	    case omp_proc_bind_spread:
    776   1.5  mrg 	      if (k == 0)
    777   1.5  mrg 		{
    778   1.5  mrg 		  /* T <= P.  */
    779   1.5  mrg 		  if (p < rest)
    780   1.5  mrg 		    p += s + 1;
    781   1.5  mrg 		  else
    782   1.5  mrg 		    p += s;
    783   1.5  mrg 		  if (p == (team->prev_ts.place_partition_off
    784   1.5  mrg 			    + team->prev_ts.place_partition_len))
    785   1.5  mrg 		    p = team->prev_ts.place_partition_off;
    786   1.5  mrg 		  start_data->ts.place_partition_off = p;
    787   1.5  mrg 		  if (p < rest)
    788   1.5  mrg 		    start_data->ts.place_partition_len = s + 1;
    789   1.5  mrg 		  else
    790   1.5  mrg 		    start_data->ts.place_partition_len = s;
    791   1.5  mrg 		}
    792   1.5  mrg 	      else
    793   1.5  mrg 		{
    794   1.5  mrg 		  /* T > P.  */
    795   1.5  mrg 		  if (k == s)
    796   1.5  mrg 		    {
    797   1.5  mrg 		      ++p;
    798   1.5  mrg 		      if (p == (team->prev_ts.place_partition_off
    799   1.5  mrg 				+ team->prev_ts.place_partition_len))
    800   1.5  mrg 			p = team->prev_ts.place_partition_off;
    801   1.5  mrg 		      k = 1;
    802   1.5  mrg 		      if (i == nthreads - rest)
    803   1.5  mrg 			s = 1;
    804   1.5  mrg 		    }
    805   1.5  mrg 		  else
    806   1.5  mrg 		    ++k;
    807   1.5  mrg 		  start_data->ts.place_partition_off = p;
    808   1.5  mrg 		  start_data->ts.place_partition_len = 1;
    809   1.5  mrg 		}
    810   1.5  mrg 	      break;
    811   1.5  mrg 	    }
    812   1.5  mrg 	  start_data->place = p + 1;
    813   1.5  mrg 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
    814   1.5  mrg 	    continue;
    815   1.5  mrg 	  gomp_init_thread_affinity (attr, p);
    816   1.5  mrg 	}
    817   1.5  mrg 
    818   1.1  mrg       start_data->fn = fn;
    819   1.1  mrg       start_data->fn_data = data;
    820   1.1  mrg       start_data->ts.team = team;
    821   1.1  mrg       start_data->ts.work_share = &team->work_shares[0];
    822   1.1  mrg       start_data->ts.last_work_share = NULL;
    823   1.1  mrg       start_data->ts.team_id = i;
    824   1.1  mrg       start_data->ts.level = team->prev_ts.level + 1;
    825   1.1  mrg       start_data->ts.active_level = thr->ts.active_level;
    826   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    827   1.1  mrg       start_data->ts.single_count = 0;
    828   1.1  mrg #endif
    829   1.1  mrg       start_data->ts.static_trip = 0;
    830   1.1  mrg       start_data->task = &team->implicit_task[i];
    831   1.1  mrg       gomp_init_task (start_data->task, task, icv);
    832   1.3  mrg       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    833   1.5  mrg       team->implicit_task[i].icv.bind_var = bind_var;
    834  1.10  mrg       start_data->task->taskgroup = taskgroup;
    835   1.1  mrg       start_data->thread_pool = pool;
    836   1.1  mrg       start_data->nested = nested;
    837   1.1  mrg 
    838   1.6  mrg       attr = gomp_adjust_thread_attr (attr, &thread_attr);
    839  1.10  mrg       err = pthread_create (&start_data->handle, attr, gomp_thread_start,
    840  1.10  mrg 			    start_data);
    841  1.10  mrg       start_data++;
    842   1.1  mrg       if (err != 0)
    843   1.1  mrg 	gomp_fatal ("Thread creation failed: %s", strerror (err));
    844   1.1  mrg     }
    845   1.1  mrg 
    846   1.6  mrg   if (__builtin_expect (attr == &thread_attr, 0))
    847   1.1  mrg     pthread_attr_destroy (&thread_attr);
    848   1.1  mrg 
    849   1.1  mrg  do_release:
    850   1.8  mrg   if (nested)
    851   1.8  mrg     gomp_barrier_wait (&team->barrier);
    852   1.8  mrg   else
    853   1.8  mrg     gomp_simple_barrier_wait (&pool->threads_dock);
    854   1.1  mrg 
    855   1.1  mrg   /* Decrease the barrier threshold to match the number of threads
    856   1.1  mrg      that should arrive back at the end of this team.  The extra
    857   1.1  mrg      threads should be exiting.  Note that we arrange for this test
    858   1.5  mrg      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
    859   1.5  mrg      the barrier as well as gomp_managed_threads was temporarily
    860   1.5  mrg      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
    861   1.5  mrg      AFFINITY_COUNT if non-zero will be always at least
    862   1.5  mrg      OLD_THREADS_COUNT - NTHREADS.  */
    863   1.5  mrg   if (__builtin_expect (nthreads < old_threads_used, 0)
    864   1.5  mrg       || __builtin_expect (affinity_count, 0))
    865   1.1  mrg     {
    866   1.1  mrg       long diff = (long) nthreads - (long) old_threads_used;
    867   1.1  mrg 
    868   1.5  mrg       if (affinity_count)
    869   1.5  mrg 	diff = -affinity_count;
    870   1.5  mrg 
    871   1.8  mrg       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
    872   1.1  mrg 
    873   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    874   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, diff);
    875   1.1  mrg #else
    876   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    877   1.1  mrg       gomp_managed_threads += diff;
    878   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    879   1.1  mrg #endif
    880   1.1  mrg     }
    881  1.10  mrg   if (__builtin_expect (gomp_display_affinity_var, 0))
    882  1.10  mrg     {
    883  1.10  mrg       if (nested
    884  1.10  mrg 	  || nthreads != old_threads_used
    885  1.10  mrg 	  || force_display)
    886  1.10  mrg 	{
    887  1.10  mrg 	  gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
    888  1.10  mrg 					thr->place);
    889  1.10  mrg 	  if (nested)
    890  1.10  mrg 	    {
    891  1.10  mrg 	      start_data -= nthreads - 1;
    892  1.10  mrg 	      for (i = 1; i < nthreads; ++i)
    893  1.10  mrg 		{
    894  1.10  mrg 		  gomp_display_affinity_thread (
    895  1.10  mrg #ifdef LIBGOMP_USE_PTHREADS
    896  1.10  mrg 						start_data->handle,
    897  1.10  mrg #else
    898  1.10  mrg 						gomp_thread_self (),
    899  1.10  mrg #endif
    900  1.10  mrg 						&start_data->ts,
    901  1.10  mrg 						start_data->place);
    902  1.10  mrg 		  start_data++;
    903  1.10  mrg 		}
    904  1.10  mrg 	    }
    905  1.10  mrg 	  else
    906  1.10  mrg 	    {
    907  1.10  mrg 	      for (i = 1; i < nthreads; ++i)
    908  1.10  mrg 		{
    909  1.10  mrg 		  gomp_thread_handle handle
    910  1.10  mrg 		    = gomp_thread_to_pthread_t (pool->threads[i]);
    911  1.10  mrg 		  gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
    912  1.10  mrg 						pool->threads[i]->place);
    913  1.10  mrg 		}
    914  1.10  mrg 	    }
    915  1.10  mrg 	}
    916  1.10  mrg     }
    917   1.5  mrg   if (__builtin_expect (affinity_thr != NULL, 0)
    918   1.5  mrg       && team->prev_ts.place_partition_len > 64)
    919   1.5  mrg     free (affinity_thr);
    920   1.1  mrg }
    921   1.8  mrg #endif
    922   1.1  mrg 
    923   1.1  mrg 
    924   1.1  mrg /* Terminate the current team.  This is only to be called by the master
    925   1.1  mrg    thread.  We assume that we must wait for the other threads.  */
    926   1.1  mrg 
    927   1.1  mrg void
    928   1.1  mrg gomp_team_end (void)
    929   1.1  mrg {
    930   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    931   1.1  mrg   struct gomp_team *team = thr->ts.team;
    932   1.1  mrg 
    933   1.5  mrg   /* This barrier handles all pending explicit threads.
    934   1.5  mrg      As #pragma omp cancel parallel might get awaited count in
    935   1.5  mrg      team->barrier in a inconsistent state, we need to use a different
    936   1.5  mrg      counter here.  */
    937   1.5  mrg   gomp_team_barrier_wait_final (&team->barrier);
    938   1.5  mrg   if (__builtin_expect (team->team_cancelled, 0))
    939   1.5  mrg     {
    940   1.5  mrg       struct gomp_work_share *ws = team->work_shares_to_free;
    941   1.5  mrg       do
    942   1.5  mrg 	{
    943   1.5  mrg 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
    944   1.5  mrg 	  if (next_ws == NULL)
    945   1.5  mrg 	    gomp_ptrlock_set (&ws->next_ws, ws);
    946   1.5  mrg 	  gomp_fini_work_share (ws);
    947   1.5  mrg 	  ws = next_ws;
    948   1.5  mrg 	}
    949   1.5  mrg       while (ws != NULL);
    950   1.5  mrg     }
    951   1.5  mrg   else
    952   1.5  mrg     gomp_fini_work_share (thr->ts.work_share);
    953   1.1  mrg 
    954   1.1  mrg   gomp_end_task ();
    955   1.1  mrg   thr->ts = team->prev_ts;
    956   1.1  mrg 
    957  1.10  mrg   if (__builtin_expect (thr->ts.level != 0, 0))
    958   1.1  mrg     {
    959   1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    960   1.1  mrg       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
    961   1.1  mrg #else
    962   1.5  mrg       gomp_mutex_lock (&gomp_managed_threads_lock);
    963   1.1  mrg       gomp_managed_threads -= team->nthreads - 1L;
    964   1.5  mrg       gomp_mutex_unlock (&gomp_managed_threads_lock);
    965   1.1  mrg #endif
    966   1.1  mrg       /* This barrier has gomp_barrier_wait_last counterparts
    967   1.1  mrg 	 and ensures the team can be safely destroyed.  */
    968   1.1  mrg       gomp_barrier_wait (&team->barrier);
    969   1.1  mrg     }
    970   1.1  mrg 
    971   1.1  mrg   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
    972   1.1  mrg     {
    973   1.1  mrg       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
    974   1.1  mrg       do
    975   1.1  mrg 	{
    976   1.1  mrg 	  struct gomp_work_share *next_ws = ws->next_alloc;
    977   1.1  mrg 	  free (ws);
    978   1.1  mrg 	  ws = next_ws;
    979   1.1  mrg 	}
    980   1.1  mrg       while (ws != NULL);
    981   1.1  mrg     }
    982   1.1  mrg   gomp_sem_destroy (&team->master_release);
    983   1.1  mrg 
    984   1.1  mrg   if (__builtin_expect (thr->ts.team != NULL, 0)
    985   1.1  mrg       || __builtin_expect (team->nthreads == 1, 0))
    986   1.1  mrg     free_team (team);
    987   1.1  mrg   else
    988   1.1  mrg     {
    989   1.1  mrg       struct gomp_thread_pool *pool = thr->thread_pool;
    990   1.1  mrg       if (pool->last_team)
    991   1.1  mrg 	free_team (pool->last_team);
    992   1.1  mrg       pool->last_team = team;
    993   1.6  mrg       gomp_release_thread_pool (pool);
    994   1.1  mrg     }
    995   1.1  mrg }
    996   1.1  mrg 
    997   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
    998   1.1  mrg 
    999   1.1  mrg /* Constructors for this file.  */
   1000   1.1  mrg 
   1001   1.1  mrg static void __attribute__((constructor))
   1002   1.1  mrg initialize_team (void)
   1003   1.1  mrg {
   1004   1.5  mrg #if !defined HAVE_TLS && !defined USE_EMUTLS
   1005   1.1  mrg   static struct gomp_thread initial_thread_tls_data;
   1006   1.1  mrg 
   1007   1.1  mrg   pthread_key_create (&gomp_tls_key, NULL);
   1008   1.1  mrg   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
   1009   1.1  mrg #endif
   1010   1.1  mrg 
   1011   1.1  mrg   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
   1012   1.1  mrg     gomp_fatal ("could not create thread pool destructor.");
   1013   1.1  mrg }
   1014   1.1  mrg 
   1015   1.1  mrg static void __attribute__((destructor))
   1016   1.1  mrg team_destructor (void)
   1017   1.1  mrg {
   1018   1.1  mrg   /* Without this dlclose on libgomp could lead to subsequent
   1019   1.1  mrg      crashes.  */
   1020   1.1  mrg   pthread_key_delete (gomp_thread_destructor);
   1021   1.1  mrg }
   1022  1.10  mrg 
   1023  1.10  mrg /* Similar to gomp_free_pool_helper, but don't detach itself,
   1024  1.10  mrg    gomp_pause_host will pthread_join those threads.  */
   1025  1.10  mrg 
   1026  1.10  mrg static void
   1027  1.10  mrg gomp_pause_pool_helper (void *thread_pool)
   1028  1.10  mrg {
   1029  1.10  mrg   struct gomp_thread *thr = gomp_thread ();
   1030  1.10  mrg   struct gomp_thread_pool *pool
   1031  1.10  mrg     = (struct gomp_thread_pool *) thread_pool;
   1032  1.10  mrg   gomp_simple_barrier_wait_last (&pool->threads_dock);
   1033  1.10  mrg   gomp_sem_destroy (&thr->release);
   1034  1.10  mrg   thr->thread_pool = NULL;
   1035  1.10  mrg   thr->task = NULL;
   1036  1.10  mrg   pthread_exit (NULL);
   1037  1.10  mrg }
   1038  1.10  mrg 
   1039  1.10  mrg /* Free a thread pool and release its threads.  Return non-zero on
   1040  1.10  mrg    failure.  */
   1041  1.10  mrg 
   1042  1.10  mrg int
   1043  1.10  mrg gomp_pause_host (void)
   1044  1.10  mrg {
   1045  1.10  mrg   struct gomp_thread *thr = gomp_thread ();
   1046  1.10  mrg   struct gomp_thread_pool *pool = thr->thread_pool;
   1047  1.10  mrg   if (thr->ts.level)
   1048  1.10  mrg     return -1;
   1049  1.10  mrg   if (pool)
   1050  1.10  mrg     {
   1051  1.10  mrg       if (pool->threads_used > 0)
   1052  1.10  mrg 	{
   1053  1.10  mrg 	  int i;
   1054  1.10  mrg 	  pthread_t *thrs
   1055  1.10  mrg 	    = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
   1056  1.10  mrg 	  for (i = 1; i < pool->threads_used; i++)
   1057  1.10  mrg 	    {
   1058  1.10  mrg 	      struct gomp_thread *nthr = pool->threads[i];
   1059  1.10  mrg 	      nthr->fn = gomp_pause_pool_helper;
   1060  1.10  mrg 	      nthr->data = pool;
   1061  1.10  mrg 	      thrs[i] = gomp_thread_to_pthread_t (nthr);
   1062  1.10  mrg 	    }
   1063  1.10  mrg 	  /* This barrier undocks threads docked on pool->threads_dock.  */
   1064  1.10  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
   1065  1.10  mrg 	  /* And this waits till all threads have called gomp_barrier_wait_last
   1066  1.10  mrg 	     in gomp_pause_pool_helper.  */
   1067  1.10  mrg 	  gomp_simple_barrier_wait (&pool->threads_dock);
   1068  1.10  mrg 	  /* Now it is safe to destroy the barrier and free the pool.  */
   1069  1.10  mrg 	  gomp_simple_barrier_destroy (&pool->threads_dock);
   1070  1.10  mrg 
   1071  1.10  mrg #ifdef HAVE_SYNC_BUILTINS
   1072  1.10  mrg 	  __sync_fetch_and_add (&gomp_managed_threads,
   1073  1.10  mrg 				1L - pool->threads_used);
   1074  1.10  mrg #else
   1075  1.10  mrg 	  gomp_mutex_lock (&gomp_managed_threads_lock);
   1076  1.10  mrg 	  gomp_managed_threads -= pool->threads_used - 1L;
   1077  1.10  mrg 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
   1078  1.10  mrg #endif
   1079  1.10  mrg 	  for (i = 1; i < pool->threads_used; i++)
   1080  1.10  mrg 	    pthread_join (thrs[i], NULL);
   1081  1.10  mrg 	}
   1082  1.10  mrg       if (pool->last_team)
   1083  1.10  mrg 	free_team (pool->last_team);
   1084  1.10  mrg #ifndef __nvptx__
   1085  1.11  mrg       team_free (pool->threads);
   1086  1.11  mrg       team_free (pool);
   1087  1.10  mrg #endif
   1088  1.10  mrg       thr->thread_pool = NULL;
   1089  1.10  mrg     }
   1090  1.10  mrg   return 0;
   1091  1.10  mrg }
   1092   1.8  mrg #endif
   1093   1.1  mrg 
   1094   1.1  mrg struct gomp_task_icv *
   1095   1.1  mrg gomp_new_icv (void)
   1096   1.1  mrg {
   1097   1.1  mrg   struct gomp_thread *thr = gomp_thread ();
   1098   1.1  mrg   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
   1099   1.1  mrg   gomp_init_task (task, NULL, &gomp_global_icv);
   1100   1.1  mrg   thr->task = task;
   1101   1.8  mrg #ifdef LIBGOMP_USE_PTHREADS
   1102   1.1  mrg   pthread_setspecific (gomp_thread_destructor, thr);
   1103   1.8  mrg #endif
   1104   1.1  mrg   return &task->icv;
   1105   1.1  mrg }
   1106