Home | History | Annotate | Line # | Download | only in libgomp
      1  1.1.1.12  mrg /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
      2       1.1  mrg    Contributed by Richard Henderson <rth (at) redhat.com>.
      3       1.1  mrg 
      4   1.1.1.3  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5   1.1.1.3  mrg    (libgomp).
      6       1.1  mrg 
      7       1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8       1.1  mrg    under the terms of the GNU General Public License as published by
      9       1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10       1.1  mrg    any later version.
     11       1.1  mrg 
     12       1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13       1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14       1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15       1.1  mrg    more details.
     16       1.1  mrg 
     17       1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18       1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19       1.1  mrg    3.1, as published by the Free Software Foundation.
     20       1.1  mrg 
     21       1.1  mrg    You should have received a copy of the GNU General Public License and
     22       1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23       1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24       1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25       1.1  mrg 
     26       1.1  mrg /* This file contains routines to manage the work-share queue for a team
     27       1.1  mrg    of threads.  */
     28       1.1  mrg 
     29       1.1  mrg #include "libgomp.h"
     30       1.1  mrg #include <stddef.h>
     31       1.1  mrg #include <stdlib.h>
     32       1.1  mrg #include <string.h>
     33       1.1  mrg 
     34       1.1  mrg 
     35       1.1  mrg /* Allocate a new work share structure, preferably from current team's
     36       1.1  mrg    free gomp_work_share cache.  */
     37       1.1  mrg 
     38       1.1  mrg static struct gomp_work_share *
     39       1.1  mrg alloc_work_share (struct gomp_team *team)
     40       1.1  mrg {
     41       1.1  mrg   struct gomp_work_share *ws;
     42       1.1  mrg   unsigned int i;
     43       1.1  mrg 
     44       1.1  mrg   /* This is called in a critical section.  */
     45       1.1  mrg   if (team->work_share_list_alloc != NULL)
     46       1.1  mrg     {
     47       1.1  mrg       ws = team->work_share_list_alloc;
     48       1.1  mrg       team->work_share_list_alloc = ws->next_free;
     49       1.1  mrg       return ws;
     50       1.1  mrg     }
     51       1.1  mrg 
     52       1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     53       1.1  mrg   ws = team->work_share_list_free;
     54       1.1  mrg   /* We need atomic read from work_share_list_free,
     55       1.1  mrg      as free_work_share can be called concurrently.  */
     56       1.1  mrg   __asm ("" : "+r" (ws));
     57       1.1  mrg 
     58       1.1  mrg   if (ws && ws->next_free)
     59       1.1  mrg     {
     60       1.1  mrg       struct gomp_work_share *next = ws->next_free;
     61       1.1  mrg       ws->next_free = NULL;
     62       1.1  mrg       team->work_share_list_alloc = next->next_free;
     63       1.1  mrg       return next;
     64       1.1  mrg     }
     65       1.1  mrg #else
     66       1.1  mrg   gomp_mutex_lock (&team->work_share_list_free_lock);
     67       1.1  mrg   ws = team->work_share_list_free;
     68       1.1  mrg   if (ws)
     69       1.1  mrg     {
     70       1.1  mrg       team->work_share_list_alloc = ws->next_free;
     71       1.1  mrg       team->work_share_list_free = NULL;
     72       1.1  mrg       gomp_mutex_unlock (&team->work_share_list_free_lock);
     73       1.1  mrg       return ws;
     74       1.1  mrg     }
     75       1.1  mrg   gomp_mutex_unlock (&team->work_share_list_free_lock);
     76       1.1  mrg #endif
     77       1.1  mrg 
     78       1.1  mrg   team->work_share_chunk *= 2;
     79   1.1.1.9  mrg   /* Allocating gomp_work_share structures aligned is just an
     80   1.1.1.9  mrg      optimization, don't do it when using the fallback method.  */
     81  1.1.1.11  mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
     82   1.1.1.9  mrg   ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
     83   1.1.1.9  mrg 			   team->work_share_chunk
     84   1.1.1.9  mrg 			   * sizeof (struct gomp_work_share));
     85   1.1.1.9  mrg #else
     86       1.1  mrg   ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
     87   1.1.1.9  mrg #endif
     88       1.1  mrg   ws->next_alloc = team->work_shares[0].next_alloc;
     89       1.1  mrg   team->work_shares[0].next_alloc = ws;
     90       1.1  mrg   team->work_share_list_alloc = &ws[1];
     91       1.1  mrg   for (i = 1; i < team->work_share_chunk - 1; i++)
     92       1.1  mrg     ws[i].next_free = &ws[i + 1];
     93       1.1  mrg   ws[i].next_free = NULL;
     94       1.1  mrg   return ws;
     95       1.1  mrg }
     96       1.1  mrg 
     97       1.1  mrg /* Initialize an already allocated struct gomp_work_share.
     98       1.1  mrg    This shouldn't touch the next_alloc field.  */
     99       1.1  mrg 
    100       1.1  mrg void
    101   1.1.1.9  mrg gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
    102       1.1  mrg 		      unsigned nthreads)
    103       1.1  mrg {
    104       1.1  mrg   gomp_mutex_init (&ws->lock);
    105       1.1  mrg   if (__builtin_expect (ordered, 0))
    106       1.1  mrg     {
    107   1.1.1.9  mrg #define INLINE_ORDERED_TEAM_IDS_SIZE \
    108   1.1.1.9  mrg   (sizeof (struct gomp_work_share) \
    109   1.1.1.9  mrg    - offsetof (struct gomp_work_share, inline_ordered_team_ids))
    110   1.1.1.9  mrg 
    111   1.1.1.9  mrg       if (__builtin_expect (ordered != 1, 0))
    112   1.1.1.9  mrg 	{
    113   1.1.1.9  mrg 	  size_t o = nthreads * sizeof (*ws->ordered_team_ids);
    114   1.1.1.9  mrg 	  o += __alignof__ (long long) - 1;
    115   1.1.1.9  mrg 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
    116  1.1.1.11  mrg 	       & (__alignof__ (long long) - 1)) == 0
    117  1.1.1.11  mrg 	      && __alignof__ (struct gomp_work_share)
    118  1.1.1.11  mrg 		 >= __alignof__ (long long))
    119   1.1.1.9  mrg 	    o &= ~(__alignof__ (long long) - 1);
    120   1.1.1.9  mrg 	  ordered += o - 1;
    121   1.1.1.9  mrg 	}
    122   1.1.1.9  mrg       else
    123   1.1.1.9  mrg 	ordered = nthreads * sizeof (*ws->ordered_team_ids);
    124   1.1.1.9  mrg       if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
    125  1.1.1.10  mrg 	ws->ordered_team_ids = team_malloc (ordered);
    126       1.1  mrg       else
    127       1.1  mrg 	ws->ordered_team_ids = ws->inline_ordered_team_ids;
    128   1.1.1.9  mrg       memset (ws->ordered_team_ids, '\0', ordered);
    129       1.1  mrg       ws->ordered_num_used = 0;
    130       1.1  mrg       ws->ordered_owner = -1;
    131       1.1  mrg       ws->ordered_cur = 0;
    132       1.1  mrg     }
    133       1.1  mrg   else
    134   1.1.1.9  mrg     ws->ordered_team_ids = ws->inline_ordered_team_ids;
    135       1.1  mrg   gomp_ptrlock_init (&ws->next_ws, NULL);
    136       1.1  mrg   ws->threads_completed = 0;
    137       1.1  mrg }
    138       1.1  mrg 
    139       1.1  mrg /* Do any needed destruction of gomp_work_share fields before it
    140       1.1  mrg    is put back into free gomp_work_share cache or freed.  */
    141       1.1  mrg 
    142       1.1  mrg void
    143       1.1  mrg gomp_fini_work_share (struct gomp_work_share *ws)
    144       1.1  mrg {
    145       1.1  mrg   gomp_mutex_destroy (&ws->lock);
    146       1.1  mrg   if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
    147  1.1.1.10  mrg     team_free (ws->ordered_team_ids);
    148       1.1  mrg   gomp_ptrlock_destroy (&ws->next_ws);
    149       1.1  mrg }
    150       1.1  mrg 
    151       1.1  mrg /* Free a work share struct, if not orphaned, put it into current
    152       1.1  mrg    team's free gomp_work_share cache.  */
    153       1.1  mrg 
    154       1.1  mrg static inline void
    155       1.1  mrg free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
    156       1.1  mrg {
    157       1.1  mrg   gomp_fini_work_share (ws);
    158       1.1  mrg   if (__builtin_expect (team == NULL, 0))
    159       1.1  mrg     free (ws);
    160       1.1  mrg   else
    161       1.1  mrg     {
    162       1.1  mrg       struct gomp_work_share *next_ws;
    163       1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    164       1.1  mrg       do
    165       1.1  mrg 	{
    166       1.1  mrg 	  next_ws = team->work_share_list_free;
    167       1.1  mrg 	  ws->next_free = next_ws;
    168       1.1  mrg 	}
    169       1.1  mrg       while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
    170       1.1  mrg 					    next_ws, ws));
    171       1.1  mrg #else
    172       1.1  mrg       gomp_mutex_lock (&team->work_share_list_free_lock);
    173       1.1  mrg       next_ws = team->work_share_list_free;
    174       1.1  mrg       ws->next_free = next_ws;
    175       1.1  mrg       team->work_share_list_free = ws;
    176       1.1  mrg       gomp_mutex_unlock (&team->work_share_list_free_lock);
    177       1.1  mrg #endif
    178       1.1  mrg     }
    179       1.1  mrg }
    180       1.1  mrg 
    181       1.1  mrg /* The current thread is ready to begin the next work sharing construct.
    182       1.1  mrg    In all cases, thr->ts.work_share is updated to point to the new
    183       1.1  mrg    structure.  In all cases the work_share lock is locked.  Return true
    184       1.1  mrg    if this was the first thread to reach this point.  */
    185       1.1  mrg 
    186       1.1  mrg bool
    187   1.1.1.9  mrg gomp_work_share_start (size_t ordered)
    188       1.1  mrg {
    189       1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    190       1.1  mrg   struct gomp_team *team = thr->ts.team;
    191       1.1  mrg   struct gomp_work_share *ws;
    192       1.1  mrg 
    193       1.1  mrg   /* Work sharing constructs can be orphaned.  */
    194       1.1  mrg   if (team == NULL)
    195       1.1  mrg     {
    196  1.1.1.11  mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
    197  1.1.1.11  mrg       ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
    198  1.1.1.11  mrg 			       sizeof (*ws));
    199  1.1.1.11  mrg #else
    200       1.1  mrg       ws = gomp_malloc (sizeof (*ws));
    201  1.1.1.11  mrg #endif
    202       1.1  mrg       gomp_init_work_share (ws, ordered, 1);
    203       1.1  mrg       thr->ts.work_share = ws;
    204   1.1.1.9  mrg       return true;
    205       1.1  mrg     }
    206       1.1  mrg 
    207       1.1  mrg   ws = thr->ts.work_share;
    208       1.1  mrg   thr->ts.last_work_share = ws;
    209       1.1  mrg   ws = gomp_ptrlock_get (&ws->next_ws);
    210       1.1  mrg   if (ws == NULL)
    211       1.1  mrg     {
    212       1.1  mrg       /* This thread encountered a new ws first.  */
    213       1.1  mrg       struct gomp_work_share *ws = alloc_work_share (team);
    214       1.1  mrg       gomp_init_work_share (ws, ordered, team->nthreads);
    215       1.1  mrg       thr->ts.work_share = ws;
    216       1.1  mrg       return true;
    217       1.1  mrg     }
    218       1.1  mrg   else
    219       1.1  mrg     {
    220       1.1  mrg       thr->ts.work_share = ws;
    221       1.1  mrg       return false;
    222       1.1  mrg     }
    223       1.1  mrg }
    224       1.1  mrg 
    225       1.1  mrg /* The current thread is done with its current work sharing construct.
    226       1.1  mrg    This version does imply a barrier at the end of the work-share.  */
    227       1.1  mrg 
    228       1.1  mrg void
    229       1.1  mrg gomp_work_share_end (void)
    230       1.1  mrg {
    231       1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    232       1.1  mrg   struct gomp_team *team = thr->ts.team;
    233       1.1  mrg   gomp_barrier_state_t bstate;
    234       1.1  mrg 
    235       1.1  mrg   /* Work sharing constructs can be orphaned.  */
    236       1.1  mrg   if (team == NULL)
    237       1.1  mrg     {
    238       1.1  mrg       free_work_share (NULL, thr->ts.work_share);
    239       1.1  mrg       thr->ts.work_share = NULL;
    240       1.1  mrg       return;
    241       1.1  mrg     }
    242       1.1  mrg 
    243       1.1  mrg   bstate = gomp_barrier_wait_start (&team->barrier);
    244       1.1  mrg 
    245       1.1  mrg   if (gomp_barrier_last_thread (bstate))
    246       1.1  mrg     {
    247       1.1  mrg       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
    248   1.1.1.3  mrg 	{
    249   1.1.1.3  mrg 	  team->work_shares_to_free = thr->ts.work_share;
    250   1.1.1.3  mrg 	  free_work_share (team, thr->ts.last_work_share);
    251   1.1.1.3  mrg 	}
    252       1.1  mrg     }
    253       1.1  mrg 
    254       1.1  mrg   gomp_team_barrier_wait_end (&team->barrier, bstate);
    255       1.1  mrg   thr->ts.last_work_share = NULL;
    256       1.1  mrg }
    257       1.1  mrg 
    258       1.1  mrg /* The current thread is done with its current work sharing construct.
    259   1.1.1.3  mrg    This version implies a cancellable barrier at the end of the work-share.  */
    260   1.1.1.3  mrg 
    261   1.1.1.3  mrg bool
    262   1.1.1.3  mrg gomp_work_share_end_cancel (void)
    263   1.1.1.3  mrg {
    264   1.1.1.3  mrg   struct gomp_thread *thr = gomp_thread ();
    265   1.1.1.3  mrg   struct gomp_team *team = thr->ts.team;
    266   1.1.1.3  mrg   gomp_barrier_state_t bstate;
    267   1.1.1.3  mrg 
    268   1.1.1.3  mrg   /* Cancellable work sharing constructs cannot be orphaned.  */
    269   1.1.1.3  mrg   bstate = gomp_barrier_wait_cancel_start (&team->barrier);
    270   1.1.1.3  mrg 
    271   1.1.1.3  mrg   if (gomp_barrier_last_thread (bstate))
    272   1.1.1.3  mrg     {
    273   1.1.1.3  mrg       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
    274   1.1.1.3  mrg 	{
    275   1.1.1.3  mrg 	  team->work_shares_to_free = thr->ts.work_share;
    276   1.1.1.3  mrg 	  free_work_share (team, thr->ts.last_work_share);
    277   1.1.1.3  mrg 	}
    278   1.1.1.3  mrg     }
    279   1.1.1.3  mrg   thr->ts.last_work_share = NULL;
    280   1.1.1.3  mrg 
    281   1.1.1.3  mrg   return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
    282   1.1.1.3  mrg }
    283   1.1.1.3  mrg 
    284   1.1.1.3  mrg /* The current thread is done with its current work sharing construct.
    285       1.1  mrg    This version does NOT imply a barrier at the end of the work-share.  */
    286       1.1  mrg 
    287       1.1  mrg void
    288       1.1  mrg gomp_work_share_end_nowait (void)
    289       1.1  mrg {
    290       1.1  mrg   struct gomp_thread *thr = gomp_thread ();
    291       1.1  mrg   struct gomp_team *team = thr->ts.team;
    292       1.1  mrg   struct gomp_work_share *ws = thr->ts.work_share;
    293       1.1  mrg   unsigned completed;
    294       1.1  mrg 
    295       1.1  mrg   /* Work sharing constructs can be orphaned.  */
    296       1.1  mrg   if (team == NULL)
    297       1.1  mrg     {
    298       1.1  mrg       free_work_share (NULL, ws);
    299       1.1  mrg       thr->ts.work_share = NULL;
    300       1.1  mrg       return;
    301       1.1  mrg     }
    302       1.1  mrg 
    303       1.1  mrg   if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
    304       1.1  mrg     return;
    305       1.1  mrg 
    306       1.1  mrg #ifdef HAVE_SYNC_BUILTINS
    307       1.1  mrg   completed = __sync_add_and_fetch (&ws->threads_completed, 1);
    308       1.1  mrg #else
    309       1.1  mrg   gomp_mutex_lock (&ws->lock);
    310       1.1  mrg   completed = ++ws->threads_completed;
    311       1.1  mrg   gomp_mutex_unlock (&ws->lock);
    312       1.1  mrg #endif
    313       1.1  mrg 
    314       1.1  mrg   if (completed == team->nthreads)
    315   1.1.1.3  mrg     {
    316   1.1.1.3  mrg       team->work_shares_to_free = thr->ts.work_share;
    317   1.1.1.3  mrg       free_work_share (team, thr->ts.last_work_share);
    318   1.1.1.3  mrg     }
    319       1.1  mrg   thr->ts.last_work_share = NULL;
    320       1.1  mrg }
    321