Home | History | Annotate | Line # | Download | only in libgomp
      1  1.1.1.12  mrg /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
      2       1.1  mrg    Contributed by Richard Henderson <rth (at) redhat.com>.
      3       1.1  mrg 
      4   1.1.1.3  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5   1.1.1.3  mrg    (libgomp).
      6       1.1  mrg 
      7       1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8       1.1  mrg    under the terms of the GNU General Public License as published by
      9       1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10       1.1  mrg    any later version.
     11       1.1  mrg 
     12       1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13       1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14       1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15       1.1  mrg    more details.
     16       1.1  mrg 
     17       1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18       1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19       1.1  mrg    3.1, as published by the Free Software Foundation.
     20       1.1  mrg 
     21       1.1  mrg    You should have received a copy of the GNU General Public License and
     22       1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23       1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24       1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25       1.1  mrg 
     26       1.1  mrg /* This file handles the (bare) PARALLEL construct.  */
     27       1.1  mrg 
     28       1.1  mrg #include "libgomp.h"
     29       1.1  mrg #include <limits.h>
     30       1.1  mrg 
     31       1.1  mrg 
     32       1.1  mrg /* Determine the number of threads to be launched for a PARALLEL construct.
     33       1.1  mrg    This algorithm is explicitly described in OpenMP 3.0 section 2.4.1.
     34       1.1  mrg    SPECIFIED is a combination of the NUM_THREADS clause and the IF clause.
     35       1.1  mrg    If the IF clause is false, SPECIFIED is forced to 1.  When NUM_THREADS
     36       1.1  mrg    is not present, SPECIFIED is 0.  */
     37       1.1  mrg 
     38       1.1  mrg unsigned
     39       1.1  mrg gomp_resolve_num_threads (unsigned specified, unsigned count)
     40       1.1  mrg {
     41   1.1.1.3  mrg   struct gomp_thread *thr = gomp_thread ();
     42       1.1  mrg   struct gomp_task_icv *icv;
     43       1.1  mrg   unsigned threads_requested, max_num_threads, num_threads;
     44   1.1.1.3  mrg   unsigned long busy;
     45   1.1.1.3  mrg   struct gomp_thread_pool *pool;
     46       1.1  mrg 
     47       1.1  mrg   icv = gomp_icv (false);
     48       1.1  mrg 
     49       1.1  mrg   if (specified == 1)
     50       1.1  mrg     return 1;
     51  1.1.1.11  mrg 
     52  1.1.1.11  mrg   if (thr->ts.active_level >= 1
     53  1.1.1.11  mrg   /* Accelerators with fixed thread counts require this to return 1 for
     54  1.1.1.11  mrg      nested parallel regions.  */
     55  1.1.1.11  mrg #if !defined(__AMDGCN__) && !defined(__nvptx__)
     56  1.1.1.11  mrg       && icv->max_active_levels_var <= 1
     57  1.1.1.11  mrg #endif
     58  1.1.1.11  mrg       )
     59       1.1  mrg     return 1;
     60  1.1.1.11  mrg   else if (thr->ts.active_level >= icv->max_active_levels_var)
     61       1.1  mrg     return 1;
     62       1.1  mrg 
     63       1.1  mrg   /* If NUM_THREADS not specified, use nthreads_var.  */
     64       1.1  mrg   if (specified == 0)
     65       1.1  mrg     threads_requested = icv->nthreads_var;
     66       1.1  mrg   else
     67       1.1  mrg     threads_requested = specified;
     68       1.1  mrg 
     69       1.1  mrg   max_num_threads = threads_requested;
     70       1.1  mrg 
     71       1.1  mrg   /* If dynamic threads are enabled, bound the number of threads
     72       1.1  mrg      that we launch.  */
     73       1.1  mrg   if (icv->dyn_var)
     74       1.1  mrg     {
     75       1.1  mrg       unsigned dyn = gomp_dynamic_max_threads ();
     76       1.1  mrg       if (dyn < max_num_threads)
     77       1.1  mrg 	max_num_threads = dyn;
     78       1.1  mrg 
     79       1.1  mrg       /* Optimization for parallel sections.  */
     80       1.1  mrg       if (count && count < max_num_threads)
     81       1.1  mrg 	max_num_threads = count;
     82       1.1  mrg     }
     83       1.1  mrg 
     84   1.1.1.3  mrg   /* UINT_MAX stands for infinity.  */
     85   1.1.1.3  mrg   if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
     86       1.1  mrg       || max_num_threads == 1)
     87       1.1  mrg     return max_num_threads;
     88       1.1  mrg 
     89   1.1.1.3  mrg   /* The threads_busy counter lives in thread_pool, if there
     90   1.1.1.3  mrg      isn't a thread_pool yet, there must be just one thread
     91   1.1.1.3  mrg      in the contention group.  If thr->team is NULL, this isn't
     92   1.1.1.3  mrg      nested parallel, so there is just one thread in the
     93   1.1.1.3  mrg      contention group as well, no need to handle it atomically.  */
     94   1.1.1.3  mrg   pool = thr->thread_pool;
     95   1.1.1.4  mrg   if (thr->ts.team == NULL || pool == NULL)
     96   1.1.1.3  mrg     {
     97   1.1.1.3  mrg       num_threads = max_num_threads;
     98   1.1.1.3  mrg       if (num_threads > icv->thread_limit_var)
     99   1.1.1.3  mrg 	num_threads = icv->thread_limit_var;
    100   1.1.1.3  mrg       if (pool)
    101   1.1.1.3  mrg 	pool->threads_busy = num_threads;
    102   1.1.1.3  mrg       return num_threads;
    103   1.1.1.3  mrg     }
    104   1.1.1.3  mrg 
    105       1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    106       1.1  mrg   do
    107       1.1  mrg     {
    108   1.1.1.3  mrg       busy = pool->threads_busy;
    109       1.1  mrg       num_threads = max_num_threads;
    110   1.1.1.3  mrg       if (icv->thread_limit_var - busy + 1 < num_threads)
    111   1.1.1.3  mrg 	num_threads = icv->thread_limit_var - busy + 1;
    112       1.1  mrg     }
    113   1.1.1.3  mrg   while (__sync_val_compare_and_swap (&pool->threads_busy,
    114   1.1.1.3  mrg 				      busy, busy + num_threads - 1)
    115   1.1.1.3  mrg 	 != busy);
    116       1.1  mrg #else
    117   1.1.1.3  mrg   gomp_mutex_lock (&gomp_managed_threads_lock);
    118       1.1  mrg   num_threads = max_num_threads;
    119   1.1.1.3  mrg   busy = pool->threads_busy;
    120   1.1.1.3  mrg   if (icv->thread_limit_var - busy + 1 < num_threads)
    121   1.1.1.3  mrg     num_threads = icv->thread_limit_var - busy + 1;
    122   1.1.1.3  mrg   pool->threads_busy += num_threads - 1;
    123   1.1.1.3  mrg   gomp_mutex_unlock (&gomp_managed_threads_lock);
    124       1.1  mrg #endif
    125       1.1  mrg 
    126       1.1  mrg   return num_threads;
    127       1.1  mrg }
    128       1.1  mrg 
    129       1.1  mrg void
    130       1.1  mrg GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
    131       1.1  mrg {
    132       1.1  mrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
    133   1.1.1.9  mrg   gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
    134   1.1.1.9  mrg 		   NULL);
    135       1.1  mrg }
    136       1.1  mrg 
    137       1.1  mrg void
    138       1.1  mrg GOMP_parallel_end (void)
    139       1.1  mrg {
    140   1.1.1.3  mrg   struct gomp_task_icv *icv = gomp_icv (false);
    141   1.1.1.3  mrg   if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
    142       1.1  mrg     {
    143       1.1  mrg       struct gomp_thread *thr = gomp_thread ();
    144       1.1  mrg       struct gomp_team *team = thr->ts.team;
    145   1.1.1.2  mrg       unsigned int nthreads = team ? team->nthreads : 1;
    146   1.1.1.2  mrg       gomp_team_end ();
    147   1.1.1.2  mrg       if (nthreads > 1)
    148       1.1  mrg 	{
    149   1.1.1.3  mrg 	  /* If not nested, there is just one thread in the
    150   1.1.1.3  mrg 	     contention group left, no need for atomicity.  */
    151   1.1.1.3  mrg 	  if (thr->ts.team == NULL)
    152   1.1.1.3  mrg 	    thr->thread_pool->threads_busy = 1;
    153   1.1.1.3  mrg 	  else
    154   1.1.1.3  mrg 	    {
    155       1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    156   1.1.1.3  mrg 	      __sync_fetch_and_add (&thr->thread_pool->threads_busy,
    157   1.1.1.3  mrg 				    1UL - nthreads);
    158       1.1  mrg #else
    159   1.1.1.3  mrg 	      gomp_mutex_lock (&gomp_managed_threads_lock);
    160   1.1.1.3  mrg 	      thr->thread_pool->threads_busy -= nthreads - 1;
    161   1.1.1.3  mrg 	      gomp_mutex_unlock (&gomp_managed_threads_lock);
    162       1.1  mrg #endif
    163   1.1.1.3  mrg 	    }
    164       1.1  mrg 	}
    165       1.1  mrg     }
    166   1.1.1.2  mrg   else
    167   1.1.1.2  mrg     gomp_team_end ();
    168       1.1  mrg }
    169   1.1.1.3  mrg ialias (GOMP_parallel_end)
    170   1.1.1.3  mrg 
    171   1.1.1.3  mrg void
    172   1.1.1.9  mrg GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
    173   1.1.1.9  mrg 	       unsigned int flags)
    174   1.1.1.3  mrg {
    175   1.1.1.3  mrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
    176   1.1.1.9  mrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
    177   1.1.1.9  mrg 		   NULL);
    178   1.1.1.3  mrg   fn (data);
    179   1.1.1.3  mrg   ialias_call (GOMP_parallel_end) ();
    180   1.1.1.3  mrg }
    181   1.1.1.3  mrg 
    182   1.1.1.9  mrg unsigned
    183   1.1.1.9  mrg GOMP_parallel_reductions (void (*fn) (void *), void *data,
    184   1.1.1.9  mrg 			  unsigned num_threads, unsigned int flags)
    185   1.1.1.9  mrg {
    186   1.1.1.9  mrg   struct gomp_taskgroup *taskgroup;
    187   1.1.1.9  mrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
    188   1.1.1.9  mrg   uintptr_t *rdata = *(uintptr_t **)data;
    189   1.1.1.9  mrg   taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
    190   1.1.1.9  mrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
    191   1.1.1.9  mrg 		   taskgroup);
    192   1.1.1.9  mrg   fn (data);
    193   1.1.1.9  mrg   ialias_call (GOMP_parallel_end) ();
    194   1.1.1.9  mrg   gomp_sem_destroy (&taskgroup->taskgroup_sem);
    195   1.1.1.9  mrg   free (taskgroup);
    196   1.1.1.9  mrg   return num_threads;
    197   1.1.1.9  mrg }
    198   1.1.1.9  mrg 
    199   1.1.1.3  mrg bool
    200   1.1.1.3  mrg GOMP_cancellation_point (int which)
    201   1.1.1.3  mrg {
    202   1.1.1.3  mrg   if (!gomp_cancel_var)
    203   1.1.1.3  mrg     return false;
    204       1.1  mrg 
    205   1.1.1.3  mrg   struct gomp_thread *thr = gomp_thread ();
    206   1.1.1.3  mrg   struct gomp_team *team = thr->ts.team;
    207   1.1.1.3  mrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
    208   1.1.1.3  mrg     {
    209   1.1.1.3  mrg       if (team == NULL)
    210   1.1.1.3  mrg 	return false;
    211   1.1.1.3  mrg       return team->work_share_cancelled != 0;
    212   1.1.1.3  mrg     }
    213   1.1.1.3  mrg   else if (which & GOMP_CANCEL_TASKGROUP)
    214   1.1.1.3  mrg     {
    215   1.1.1.9  mrg       if (thr->task->taskgroup)
    216   1.1.1.9  mrg 	{
    217   1.1.1.9  mrg 	  if (thr->task->taskgroup->cancelled)
    218   1.1.1.9  mrg 	    return true;
    219   1.1.1.9  mrg 	  if (thr->task->taskgroup->workshare
    220   1.1.1.9  mrg 	      && thr->task->taskgroup->prev
    221   1.1.1.9  mrg 	      && thr->task->taskgroup->prev->cancelled)
    222   1.1.1.9  mrg 	    return true;
    223   1.1.1.9  mrg 	}
    224   1.1.1.3  mrg       /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
    225   1.1.1.3  mrg 	 as #pragma omp cancel parallel also cancels all explicit
    226   1.1.1.3  mrg 	 tasks.  */
    227   1.1.1.3  mrg     }
    228   1.1.1.3  mrg   if (team)
    229   1.1.1.3  mrg     return gomp_team_barrier_cancelled (&team->barrier);
    230   1.1.1.3  mrg   return false;
    231   1.1.1.3  mrg }
    232   1.1.1.3  mrg ialias (GOMP_cancellation_point)
    233   1.1.1.3  mrg 
    234   1.1.1.3  mrg bool
    235   1.1.1.3  mrg GOMP_cancel (int which, bool do_cancel)
    236   1.1.1.3  mrg {
    237   1.1.1.3  mrg   if (!gomp_cancel_var)
    238   1.1.1.3  mrg     return false;
    239   1.1.1.3  mrg 
    240   1.1.1.3  mrg   if (!do_cancel)
    241   1.1.1.3  mrg     return ialias_call (GOMP_cancellation_point) (which);
    242   1.1.1.3  mrg 
    243   1.1.1.3  mrg   struct gomp_thread *thr = gomp_thread ();
    244   1.1.1.3  mrg   struct gomp_team *team = thr->ts.team;
    245   1.1.1.3  mrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
    246   1.1.1.3  mrg     {
    247   1.1.1.3  mrg       /* In orphaned worksharing region, all we want to cancel
    248   1.1.1.3  mrg 	 is current thread.  */
    249   1.1.1.3  mrg       if (team != NULL)
    250   1.1.1.3  mrg 	team->work_share_cancelled = 1;
    251   1.1.1.3  mrg       return true;
    252   1.1.1.3  mrg     }
    253   1.1.1.3  mrg   else if (which & GOMP_CANCEL_TASKGROUP)
    254   1.1.1.3  mrg     {
    255   1.1.1.9  mrg       if (thr->task->taskgroup)
    256   1.1.1.3  mrg 	{
    257   1.1.1.9  mrg 	  struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
    258   1.1.1.9  mrg 	  if (taskgroup->workshare && taskgroup->prev)
    259   1.1.1.9  mrg 	    taskgroup = taskgroup->prev;
    260   1.1.1.9  mrg 	  if (!taskgroup->cancelled)
    261   1.1.1.9  mrg 	    {
    262   1.1.1.9  mrg 	      gomp_mutex_lock (&team->task_lock);
    263   1.1.1.9  mrg 	      taskgroup->cancelled = true;
    264   1.1.1.9  mrg 	      gomp_mutex_unlock (&team->task_lock);
    265   1.1.1.9  mrg 	    }
    266   1.1.1.3  mrg 	}
    267   1.1.1.3  mrg       return true;
    268   1.1.1.3  mrg     }
    269   1.1.1.3  mrg   team->team_cancelled = 1;
    270   1.1.1.3  mrg   gomp_team_barrier_cancel (team);
    271   1.1.1.3  mrg   return true;
    272   1.1.1.3  mrg }
    273       1.1  mrg 
    274       1.1  mrg /* The public OpenMP API for thread and team related inquiries.  */
    276       1.1  mrg 
    277       1.1  mrg int
    278       1.1  mrg omp_get_num_threads (void)
    279       1.1  mrg {
    280       1.1  mrg   struct gomp_team *team = gomp_thread ()->ts.team;
    281       1.1  mrg   return team ? team->nthreads : 1;
    282       1.1  mrg }
    283       1.1  mrg 
    284       1.1  mrg int
    285       1.1  mrg omp_get_thread_num (void)
    286       1.1  mrg {
    287       1.1  mrg   return gomp_thread ()->ts.team_id;
    288       1.1  mrg }
    289       1.1  mrg 
    290       1.1  mrg /* This wasn't right for OpenMP 2.5.  Active region used to be non-zero
    291       1.1  mrg    when the IF clause doesn't evaluate to false, starting with OpenMP 3.0
    292       1.1  mrg    it is non-zero with more than one thread in the team.  */
    293       1.1  mrg 
    294       1.1  mrg int
    295       1.1  mrg omp_in_parallel (void)
    296       1.1  mrg {
    297       1.1  mrg   return gomp_thread ()->ts.active_level > 0;
    298       1.1  mrg }
    299       1.1  mrg 
    300       1.1  mrg int
    301       1.1  mrg omp_get_level (void)
    302       1.1  mrg {
    303       1.1  mrg   return gomp_thread ()->ts.level;
    304       1.1  mrg }
    305       1.1  mrg 
    306       1.1  mrg int
    307       1.1  mrg omp_get_ancestor_thread_num (int level)
    308       1.1  mrg {
    309       1.1  mrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
    310       1.1  mrg   if (level < 0 || level > ts->level)
    311       1.1  mrg     return -1;
    312       1.1  mrg   for (level = ts->level - level; level > 0; --level)
    313       1.1  mrg     ts = &ts->team->prev_ts;
    314       1.1  mrg   return ts->team_id;
    315       1.1  mrg }
    316       1.1  mrg 
    317       1.1  mrg int
    318       1.1  mrg omp_get_team_size (int level)
    319       1.1  mrg {
    320       1.1  mrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
    321       1.1  mrg   if (level < 0 || level > ts->level)
    322       1.1  mrg     return -1;
    323       1.1  mrg   for (level = ts->level - level; level > 0; --level)
    324       1.1  mrg     ts = &ts->team->prev_ts;
    325       1.1  mrg   if (ts->team == NULL)
    326       1.1  mrg     return 1;
    327       1.1  mrg   else
    328       1.1  mrg     return ts->team->nthreads;
    329       1.1  mrg }
    330       1.1  mrg 
    331       1.1  mrg int
    332       1.1  mrg omp_get_active_level (void)
    333       1.1  mrg {
    334       1.1  mrg   return gomp_thread ()->ts.active_level;
    335       1.1  mrg }
    336       1.1  mrg 
    337       1.1  mrg ialias (omp_get_num_threads)
    338       1.1  mrg ialias (omp_get_thread_num)
    339       1.1  mrg ialias (omp_in_parallel)
    340       1.1  mrg ialias (omp_get_level)
    341       1.1  mrg ialias (omp_get_ancestor_thread_num)
    342       1.1  mrg ialias (omp_get_team_size)
    343                ialias (omp_get_active_level)
    344