Home | History | Annotate | Line # | Download | only in gcn
team.c revision 1.1.1.2
      1 /* Copyright (C) 2017-2022 Free Software Foundation, Inc.
      2    Contributed by Mentor Embedded.
      3 
      4    This file is part of the GNU Offloading and Multi Processing Library
      5    (libgomp).
      6 
      7    Libgomp is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15    more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 /* This file handles maintenance of threads on AMD GCN.  */
     27 
     28 #include "libgomp.h"
     29 #include <stdlib.h>
     30 #include <string.h>
     31 
     32 static void gomp_thread_start (struct gomp_thread_pool *);
     33 
     34 /* This externally visible function handles target region entry.  It
     35    sets up a per-team thread pool and transfers control by returning to
     36    the kernel in the master thread or gomp_thread_start in other threads.
     37 
     38    The name of this function is part of the interface with the compiler: for
     39    each OpenMP kernel the compiler configures the stack, then calls here.
     40 
     41    Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue.  */
     42 
     43 void
     44 gomp_gcn_enter_kernel (void)
     45 {
     46   int threadid = __builtin_gcn_dim_pos (1);
     47 
     48   if (threadid == 0)
     49     {
     50       int numthreads = __builtin_gcn_dim_size (1);
     51       int teamid = __builtin_gcn_dim_pos(0);
     52 
     53       /* Set up the global state.
     54 	 Every team will do this, but that should be harmless.  */
     55       gomp_global_icv.nthreads_var = 16;
     56       gomp_global_icv.thread_limit_var = numthreads;
     57       /* Starting additional threads is not supported.  */
     58       gomp_global_icv.dyn_var = true;
     59 
     60       /* Initialize the team arena for optimized memory allocation.
     61          The arena has been allocated on the host side, and the address
     62          passed in via the kernargs.  Each team takes a small slice of it.  */
     63       register void **kernargs asm("s8");
     64       void *team_arena = (kernargs[4] + TEAM_ARENA_SIZE*teamid);
     65       void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START;
     66       void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE;
     67       void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END;
     68       *arena_start = team_arena;
     69       *arena_free = team_arena;
     70       *arena_end = team_arena + TEAM_ARENA_SIZE;
     71 
     72       /* Allocate and initialize the team-local-storage data.  */
     73       struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs)
     74 						      * numthreads);
     75       set_gcn_thrs (thrs);
     76 
     77       /* Allocate and initialize a pool of threads in the team.
     78          The threads are already running, of course, we just need to manage
     79          the communication between them.  */
     80       struct gomp_thread_pool *pool = team_malloc (sizeof (*pool));
     81       pool->threads = team_malloc (sizeof (void *) * numthreads);
     82       for (int tid = 0; tid < numthreads; tid++)
     83 	pool->threads[tid] = &thrs[tid];
     84       pool->threads_size = numthreads;
     85       pool->threads_used = numthreads;
     86       pool->threads_busy = 1;
     87       pool->last_team = NULL;
     88       gomp_simple_barrier_init (&pool->threads_dock, numthreads);
     89       thrs->thread_pool = pool;
     90 
     91       asm ("s_barrier" ::: "memory");
     92       return;  /* Return to kernel.  */
     93     }
     94   else
     95     {
     96       asm ("s_barrier" ::: "memory");
     97       gomp_thread_start (gcn_thrs ()[0].thread_pool);
     98       /* gomp_thread_start does not return.  */
     99     }
    100 }
    101 
    102 void
    103 gomp_gcn_exit_kernel (void)
    104 {
    105   gomp_free_thread (gcn_thrs ());
    106   team_free (gcn_thrs ());
    107 }
    108 
    109 /* This function contains the idle loop in which a thread waits
    110    to be called up to become part of a team.  */
    111 
    112 static void
    113 gomp_thread_start (struct gomp_thread_pool *pool)
    114 {
    115   struct gomp_thread *thr = gomp_thread ();
    116 
    117   gomp_sem_init (&thr->release, 0);
    118   thr->thread_pool = pool;
    119 
    120   /* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
    121      which contains "s_endpgm", or an infinite no-op loop is
    122      suspected (this happens when the thread master crashes).  */
    123   int nul_limit = 99;
    124   do
    125     {
    126       gomp_simple_barrier_wait (&pool->threads_dock);
    127       if (!thr->fn)
    128 	{
    129 	  if (nul_limit-- > 0)
    130 	    continue;
    131 	  else
    132 	    {
    133 	      const char msg[] = ("team master not responding;"
    134 				  " slave thread aborting");
    135 	      write (2, msg, sizeof (msg)-1);
    136 	      abort();
    137 	    }
    138 	}
    139       thr->fn (thr->data);
    140       thr->fn = NULL;
    141 
    142       struct gomp_task *task = thr->task;
    143       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    144       gomp_finish_task (task);
    145     }
    146   while (1);
    147 }
    148 
    149 /* Launch a team.  */
    150 
    151 void
    152 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    153 		 unsigned flags, struct gomp_team *team,
    154 		 struct gomp_taskgroup *taskgroup)
    155 {
    156   struct gomp_thread *thr, *nthr;
    157   struct gomp_task *task;
    158   struct gomp_task_icv *icv;
    159   struct gomp_thread_pool *pool;
    160   unsigned long nthreads_var;
    161 
    162   thr = gomp_thread ();
    163   pool = thr->thread_pool;
    164   task = thr->task;
    165   icv = task ? &task->icv : &gomp_global_icv;
    166 
    167   /* Always save the previous state, even if this isn't a nested team.
    168      In particular, we should save any work share state from an outer
    169      orphaned work share construct.  */
    170   team->prev_ts = thr->ts;
    171 
    172   thr->ts.team = team;
    173   thr->ts.team_id = 0;
    174   ++thr->ts.level;
    175   if (nthreads > 1)
    176     ++thr->ts.active_level;
    177   thr->ts.work_share = &team->work_shares[0];
    178   thr->ts.last_work_share = NULL;
    179   thr->ts.single_count = 0;
    180   thr->ts.static_trip = 0;
    181   thr->task = &team->implicit_task[0];
    182   nthreads_var = icv->nthreads_var;
    183   gomp_init_task (thr->task, task, icv);
    184   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    185   team->implicit_task[0].taskgroup = taskgroup;
    186 
    187   if (nthreads == 1)
    188     return;
    189 
    190   /* Release existing idle threads.  */
    191   for (unsigned i = 1; i < nthreads; ++i)
    192     {
    193       nthr = pool->threads[i];
    194       nthr->ts.team = team;
    195       nthr->ts.work_share = &team->work_shares[0];
    196       nthr->ts.last_work_share = NULL;
    197       nthr->ts.team_id = i;
    198       nthr->ts.level = team->prev_ts.level + 1;
    199       nthr->ts.active_level = thr->ts.active_level;
    200       nthr->ts.single_count = 0;
    201       nthr->ts.static_trip = 0;
    202       nthr->task = &team->implicit_task[i];
    203       gomp_init_task (nthr->task, task, icv);
    204       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    205       team->implicit_task[i].taskgroup = taskgroup;
    206       nthr->fn = fn;
    207       nthr->data = data;
    208       team->ordered_release[i] = &nthr->release;
    209     }
    210 
    211   gomp_simple_barrier_wait (&pool->threads_dock);
    212 }
    213 
    214 #include "../../team.c"
    215