Home | History | Annotate | Line # | Download | only in libgomp
      1 /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
      2    Contributed by Richard Henderson <rth (at) redhat.com>.
      3 
      4    This file is part of the GNU Offloading and Multi Processing Library
      5    (libgomp).
      6 
      7    Libgomp is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15    more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 /* This file contains routines to manage the work-share queue for a team
     27    of threads.  */
     28 
     29 #include "libgomp.h"
     30 #include <stddef.h>
     31 #include <stdlib.h>
     32 #include <string.h>
     33 
     34 
     35 /* Allocate a new work share structure, preferably from current team's
     36    free gomp_work_share cache.  */
     37 
     38 static struct gomp_work_share *
     39 alloc_work_share (struct gomp_team *team)
     40 {
     41   struct gomp_work_share *ws;
     42   unsigned int i;
     43 
     44   /* This is called in a critical section.  */
     45   if (team->work_share_list_alloc != NULL)
     46     {
     47       ws = team->work_share_list_alloc;
     48       team->work_share_list_alloc = ws->next_free;
     49       return ws;
     50     }
     51 
     52 #ifdef HAVE_SYNC_BUILTINS
     53   ws = team->work_share_list_free;
     54   /* We need atomic read from work_share_list_free,
     55      as free_work_share can be called concurrently.  */
     56   __asm ("" : "+r" (ws));
     57 
     58   if (ws && ws->next_free)
     59     {
     60       struct gomp_work_share *next = ws->next_free;
     61       ws->next_free = NULL;
     62       team->work_share_list_alloc = next->next_free;
     63       return next;
     64     }
     65 #else
     66   gomp_mutex_lock (&team->work_share_list_free_lock);
     67   ws = team->work_share_list_free;
     68   if (ws)
     69     {
     70       team->work_share_list_alloc = ws->next_free;
     71       team->work_share_list_free = NULL;
     72       gomp_mutex_unlock (&team->work_share_list_free_lock);
     73       return ws;
     74     }
     75   gomp_mutex_unlock (&team->work_share_list_free_lock);
     76 #endif
     77 
     78   team->work_share_chunk *= 2;
     79   /* Allocating gomp_work_share structures aligned is just an
     80      optimization, don't do it when using the fallback method.  */
     81 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
     82   ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
     83 			   team->work_share_chunk
     84 			   * sizeof (struct gomp_work_share));
     85 #else
     86   ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
     87 #endif
     88   ws->next_alloc = team->work_shares[0].next_alloc;
     89   team->work_shares[0].next_alloc = ws;
     90   team->work_share_list_alloc = &ws[1];
     91   for (i = 1; i < team->work_share_chunk - 1; i++)
     92     ws[i].next_free = &ws[i + 1];
     93   ws[i].next_free = NULL;
     94   return ws;
     95 }
     96 
     97 /* Initialize an already allocated struct gomp_work_share.
     98    This shouldn't touch the next_alloc field.  */
     99 
    100 void
    101 gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
    102 		      unsigned nthreads)
    103 {
    104   gomp_mutex_init (&ws->lock);
    105   if (__builtin_expect (ordered, 0))
    106     {
    107 #define INLINE_ORDERED_TEAM_IDS_SIZE \
    108   (sizeof (struct gomp_work_share) \
    109    - offsetof (struct gomp_work_share, inline_ordered_team_ids))
    110 
    111       if (__builtin_expect (ordered != 1, 0))
    112 	{
    113 	  size_t o = nthreads * sizeof (*ws->ordered_team_ids);
    114 	  o += __alignof__ (long long) - 1;
    115 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
    116 	       & (__alignof__ (long long) - 1)) == 0
    117 	      && __alignof__ (struct gomp_work_share)
    118 		 >= __alignof__ (long long))
    119 	    o &= ~(__alignof__ (long long) - 1);
    120 	  ordered += o - 1;
    121 	}
    122       else
    123 	ordered = nthreads * sizeof (*ws->ordered_team_ids);
    124       if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
    125 	ws->ordered_team_ids = team_malloc (ordered);
    126       else
    127 	ws->ordered_team_ids = ws->inline_ordered_team_ids;
    128       memset (ws->ordered_team_ids, '\0', ordered);
    129       ws->ordered_num_used = 0;
    130       ws->ordered_owner = -1;
    131       ws->ordered_cur = 0;
    132     }
    133   else
    134     ws->ordered_team_ids = ws->inline_ordered_team_ids;
    135   gomp_ptrlock_init (&ws->next_ws, NULL);
    136   ws->threads_completed = 0;
    137 }
    138 
    139 /* Do any needed destruction of gomp_work_share fields before it
    140    is put back into free gomp_work_share cache or freed.  */
    141 
    142 void
    143 gomp_fini_work_share (struct gomp_work_share *ws)
    144 {
    145   gomp_mutex_destroy (&ws->lock);
    146   if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
    147     team_free (ws->ordered_team_ids);
    148   gomp_ptrlock_destroy (&ws->next_ws);
    149 }
    150 
    151 /* Free a work share struct, if not orphaned, put it into current
    152    team's free gomp_work_share cache.  */
    153 
    154 static inline void
    155 free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
    156 {
    157   gomp_fini_work_share (ws);
    158   if (__builtin_expect (team == NULL, 0))
    159     free (ws);
    160   else
    161     {
    162       struct gomp_work_share *next_ws;
    163 #ifdef HAVE_SYNC_BUILTINS
    164       do
    165 	{
    166 	  next_ws = team->work_share_list_free;
    167 	  ws->next_free = next_ws;
    168 	}
    169       while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
    170 					    next_ws, ws));
    171 #else
    172       gomp_mutex_lock (&team->work_share_list_free_lock);
    173       next_ws = team->work_share_list_free;
    174       ws->next_free = next_ws;
    175       team->work_share_list_free = ws;
    176       gomp_mutex_unlock (&team->work_share_list_free_lock);
    177 #endif
    178     }
    179 }
    180 
    181 /* The current thread is ready to begin the next work sharing construct.
    182    In all cases, thr->ts.work_share is updated to point to the new
    183    structure.  In all cases the work_share lock is locked.  Return true
    184    if this was the first thread to reach this point.  */
    185 
    186 bool
    187 gomp_work_share_start (size_t ordered)
    188 {
    189   struct gomp_thread *thr = gomp_thread ();
    190   struct gomp_team *team = thr->ts.team;
    191   struct gomp_work_share *ws;
    192 
    193   /* Work sharing constructs can be orphaned.  */
    194   if (team == NULL)
    195     {
    196 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
    197       ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
    198 			       sizeof (*ws));
    199 #else
    200       ws = gomp_malloc (sizeof (*ws));
    201 #endif
    202       gomp_init_work_share (ws, ordered, 1);
    203       thr->ts.work_share = ws;
    204       return true;
    205     }
    206 
    207   ws = thr->ts.work_share;
    208   thr->ts.last_work_share = ws;
    209   ws = gomp_ptrlock_get (&ws->next_ws);
    210   if (ws == NULL)
    211     {
    212       /* This thread encountered a new ws first.  */
    213       struct gomp_work_share *ws = alloc_work_share (team);
    214       gomp_init_work_share (ws, ordered, team->nthreads);
    215       thr->ts.work_share = ws;
    216       return true;
    217     }
    218   else
    219     {
    220       thr->ts.work_share = ws;
    221       return false;
    222     }
    223 }
    224 
    225 /* The current thread is done with its current work sharing construct.
    226    This version does imply a barrier at the end of the work-share.  */
    227 
    228 void
    229 gomp_work_share_end (void)
    230 {
    231   struct gomp_thread *thr = gomp_thread ();
    232   struct gomp_team *team = thr->ts.team;
    233   gomp_barrier_state_t bstate;
    234 
    235   /* Work sharing constructs can be orphaned.  */
    236   if (team == NULL)
    237     {
    238       free_work_share (NULL, thr->ts.work_share);
    239       thr->ts.work_share = NULL;
    240       return;
    241     }
    242 
    243   bstate = gomp_barrier_wait_start (&team->barrier);
    244 
    245   if (gomp_barrier_last_thread (bstate))
    246     {
    247       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
    248 	{
    249 	  team->work_shares_to_free = thr->ts.work_share;
    250 	  free_work_share (team, thr->ts.last_work_share);
    251 	}
    252     }
    253 
    254   gomp_team_barrier_wait_end (&team->barrier, bstate);
    255   thr->ts.last_work_share = NULL;
    256 }
    257 
    258 /* The current thread is done with its current work sharing construct.
    259    This version implies a cancellable barrier at the end of the work-share.  */
    260 
    261 bool
    262 gomp_work_share_end_cancel (void)
    263 {
    264   struct gomp_thread *thr = gomp_thread ();
    265   struct gomp_team *team = thr->ts.team;
    266   gomp_barrier_state_t bstate;
    267 
    268   /* Cancellable work sharing constructs cannot be orphaned.  */
    269   bstate = gomp_barrier_wait_cancel_start (&team->barrier);
    270 
    271   if (gomp_barrier_last_thread (bstate))
    272     {
    273       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
    274 	{
    275 	  team->work_shares_to_free = thr->ts.work_share;
    276 	  free_work_share (team, thr->ts.last_work_share);
    277 	}
    278     }
    279   thr->ts.last_work_share = NULL;
    280 
    281   return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
    282 }
    283 
    284 /* The current thread is done with its current work sharing construct.
    285    This version does NOT imply a barrier at the end of the work-share.  */
    286 
    287 void
    288 gomp_work_share_end_nowait (void)
    289 {
    290   struct gomp_thread *thr = gomp_thread ();
    291   struct gomp_team *team = thr->ts.team;
    292   struct gomp_work_share *ws = thr->ts.work_share;
    293   unsigned completed;
    294 
    295   /* Work sharing constructs can be orphaned.  */
    296   if (team == NULL)
    297     {
    298       free_work_share (NULL, ws);
    299       thr->ts.work_share = NULL;
    300       return;
    301     }
    302 
    303   if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
    304     return;
    305 
    306 #ifdef HAVE_SYNC_BUILTINS
    307   completed = __sync_add_and_fetch (&ws->threads_completed, 1);
    308 #else
    309   gomp_mutex_lock (&ws->lock);
    310   completed = ++ws->threads_completed;
    311   gomp_mutex_unlock (&ws->lock);
    312 #endif
    313 
    314   if (completed == team->nthreads)
    315     {
    316       team->work_shares_to_free = thr->ts.work_share;
    317       free_work_share (team, thr->ts.last_work_share);
    318     }
    319   thr->ts.last_work_share = NULL;
    320 }
    321