Home | History | Annotate | Line # | Download | only in nvptx
team.c revision 1.1.1.6
      1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
      2    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3 
      4    This file is part of the GNU Offloading and Multi Processing Library
      5    (libgomp).
      6 
      7    Libgomp is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3, or (at your option)
     10    any later version.
     11 
     12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15    more details.
     16 
     17    Under Section 7 of GPL version 3, you are granted additional
     18    permissions described in the GCC Runtime Library Exception, version
     19    3.1, as published by the Free Software Foundation.
     20 
     21    You should have received a copy of the GNU General Public License and
     22    a copy of the GCC Runtime Library Exception along with this program;
     23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24    <http://www.gnu.org/licenses/>.  */
     25 
     26 /* This file handles maintenance of threads on NVPTX.  */
     27 
     28 #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
     29 
     30 #include "libgomp.h"
     31 #include <stdlib.h>
     32 #include <string.h>
     33 
     34 struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
     35 int __gomp_team_num __attribute__((shared,nocommon));
     36 
     37 static void gomp_thread_start (struct gomp_thread_pool *);
     38 
     39 
     40 /* This externally visible function handles target region entry.  It
     41    sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
     42    in the master thread or gomp_thread_start in other threads.
     43 
     44    The name of this function is part of the interface with the compiler: for
     45    each target region, GCC emits a PTX .kernel function that sets up soft-stack
     46    and uniform-simt state and calls this function, passing in FN the original
     47    function outlined for the target region.  */
     48 
     49 void
     50 gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     51 {
     52   int tid, ntids;
     53   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
     54   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
     55   if (tid == 0)
     56     {
     57       gomp_global_icv.nthreads_var = ntids;
     58       gomp_global_icv.thread_limit_var = ntids;
     59       /* Starting additional threads is not supported.  */
     60       gomp_global_icv.dyn_var = true;
     61 
     62       __gomp_team_num = 0;
     63       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
     64       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
     65 
     66       struct gomp_thread_pool *pool = alloca (sizeof (*pool));
     67       pool->threads = alloca (ntids * sizeof (*pool->threads));
     68       for (tid = 0; tid < ntids; tid++)
     69 	pool->threads[tid] = nvptx_thrs + tid;
     70       pool->threads_size = ntids;
     71       pool->threads_used = ntids;
     72       pool->threads_busy = 1;
     73       pool->last_team = NULL;
     74       gomp_simple_barrier_init (&pool->threads_dock, ntids);
     75 
     76       nvptx_thrs[0].thread_pool = pool;
     77       asm ("bar.sync 0;");
     78       fn (fn_data);
     79 
     80       gomp_free_thread (nvptx_thrs);
     81     }
     82   else
     83     {
     84       asm ("bar.sync 0;");
     85       gomp_thread_start (nvptx_thrs[0].thread_pool);
     86     }
     87 }
     88 
     89 /* This function contains the idle loop in which a thread waits
     90    to be called up to become part of a team.  */
     91 
     92 static void
     93 gomp_thread_start (struct gomp_thread_pool *pool)
     94 {
     95   struct gomp_thread *thr = gomp_thread ();
     96 
     97   gomp_sem_init (&thr->release, 0);
     98   thr->thread_pool = pool;
     99 
    100   do
    101     {
    102       gomp_simple_barrier_wait (&pool->threads_dock);
    103       if (!thr->fn)
    104 	continue;
    105       thr->fn (thr->data);
    106       thr->fn = NULL;
    107 
    108       struct gomp_task *task = thr->task;
    109       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    110       gomp_finish_task (task);
    111     }
    112   /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
    113      it can trash stack pointer R1 in loops lacking exit edges.  Add a cheap
    114      artificial exit that the driver would not be able to optimize out.  */
    115   while (nvptx_thrs);
    116 }
    117 
    118 /* Launch a team.  */
    119 
    120 void
    121 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    122 		 unsigned flags, struct gomp_team *team,
    123 		 struct gomp_taskgroup *taskgroup)
    124 {
    125   struct gomp_thread *thr, *nthr;
    126   struct gomp_task *task;
    127   struct gomp_task_icv *icv;
    128   struct gomp_thread_pool *pool;
    129   unsigned long nthreads_var;
    130 
    131   thr = gomp_thread ();
    132   pool = thr->thread_pool;
    133   task = thr->task;
    134   icv = task ? &task->icv : &gomp_global_icv;
    135 
    136   /* Always save the previous state, even if this isn't a nested team.
    137      In particular, we should save any work share state from an outer
    138      orphaned work share construct.  */
    139   team->prev_ts = thr->ts;
    140 
    141   thr->ts.team = team;
    142   thr->ts.team_id = 0;
    143   ++thr->ts.level;
    144   if (nthreads > 1)
    145     ++thr->ts.active_level;
    146   thr->ts.work_share = &team->work_shares[0];
    147   thr->ts.last_work_share = NULL;
    148   thr->ts.single_count = 0;
    149   thr->ts.static_trip = 0;
    150   thr->task = &team->implicit_task[0];
    151   nthreads_var = icv->nthreads_var;
    152   gomp_init_task (thr->task, task, icv);
    153   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    154   team->implicit_task[0].taskgroup = taskgroup;
    155 
    156   if (nthreads == 1)
    157     return;
    158 
    159   /* Release existing idle threads.  */
    160   for (unsigned i = 1; i < nthreads; ++i)
    161     {
    162       nthr = pool->threads[i];
    163       nthr->ts.team = team;
    164       nthr->ts.work_share = &team->work_shares[0];
    165       nthr->ts.last_work_share = NULL;
    166       nthr->ts.team_id = i;
    167       nthr->ts.level = team->prev_ts.level + 1;
    168       nthr->ts.active_level = thr->ts.active_level;
    169       nthr->ts.single_count = 0;
    170       nthr->ts.static_trip = 0;
    171       nthr->task = &team->implicit_task[i];
    172       gomp_init_task (nthr->task, task, icv);
    173       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    174       team->implicit_task[i].taskgroup = taskgroup;
    175       nthr->fn = fn;
    176       nthr->data = data;
    177       team->ordered_release[i] = &nthr->release;
    178     }
    179 
    180   gomp_simple_barrier_wait (&pool->threads_dock);
    181 }
    182 
    183 int
    184 gomp_pause_host (void)
    185 {
    186   return -1;
    187 }
    188 
    189 #include "../../team.c"
    190 #endif
    191