Home | History | Annotate | Line # | Download | only in nvptx
      1  1.7  mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
      2  1.3  mrg    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3  1.3  mrg 
      4  1.3  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5  1.3  mrg    (libgomp).
      6  1.3  mrg 
      7  1.3  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8  1.3  mrg    under the terms of the GNU General Public License as published by
      9  1.3  mrg    the Free Software Foundation; either version 3, or (at your option)
     10  1.3  mrg    any later version.
     11  1.3  mrg 
     12  1.3  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.3  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14  1.3  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15  1.3  mrg    more details.
     16  1.3  mrg 
     17  1.3  mrg    Under Section 7 of GPL version 3, you are granted additional
     18  1.3  mrg    permissions described in the GCC Runtime Library Exception, version
     19  1.3  mrg    3.1, as published by the Free Software Foundation.
     20  1.3  mrg 
     21  1.3  mrg    You should have received a copy of the GNU General Public License and
     22  1.3  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23  1.3  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.3  mrg    <http://www.gnu.org/licenses/>.  */
     25  1.3  mrg 
     26  1.6  mrg /* This file handles maintenance of threads on NVPTX.  */
     27  1.3  mrg 
     28  1.3  mrg #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
     29  1.3  mrg 
     30  1.3  mrg #include "libgomp.h"
     31  1.3  mrg #include <stdlib.h>
     32  1.3  mrg #include <string.h>
     33  1.3  mrg 
     34  1.3  mrg struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
     35  1.7  mrg int __gomp_team_num __attribute__((shared,nocommon));
     36  1.3  mrg 
     37  1.3  mrg static void gomp_thread_start (struct gomp_thread_pool *);
     38  1.3  mrg 
     39  1.3  mrg 
     40  1.3  mrg /* This externally visible function handles target region entry.  It
     41  1.3  mrg    sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
     42  1.3  mrg    in the master thread or gomp_thread_start in other threads.
     43  1.3  mrg 
     44  1.3  mrg    The name of this function is part of the interface with the compiler: for
     45  1.3  mrg    each target region, GCC emits a PTX .kernel function that sets up soft-stack
     46  1.3  mrg    and uniform-simt state and calls this function, passing in FN the original
     47  1.3  mrg    function outlined for the target region.  */
     48  1.3  mrg 
     49  1.3  mrg void
     50  1.3  mrg gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     51  1.3  mrg {
     52  1.3  mrg   int tid, ntids;
     53  1.3  mrg   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
     54  1.3  mrg   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
     55  1.3  mrg   if (tid == 0)
     56  1.3  mrg     {
     57  1.3  mrg       gomp_global_icv.nthreads_var = ntids;
     58  1.7  mrg       gomp_global_icv.thread_limit_var = ntids;
     59  1.3  mrg       /* Starting additional threads is not supported.  */
     60  1.3  mrg       gomp_global_icv.dyn_var = true;
     61  1.3  mrg 
     62  1.7  mrg       __gomp_team_num = 0;
     63  1.3  mrg       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
     64  1.3  mrg       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
     65  1.3  mrg 
     66  1.3  mrg       struct gomp_thread_pool *pool = alloca (sizeof (*pool));
     67  1.3  mrg       pool->threads = alloca (ntids * sizeof (*pool->threads));
     68  1.3  mrg       for (tid = 0; tid < ntids; tid++)
     69  1.3  mrg 	pool->threads[tid] = nvptx_thrs + tid;
     70  1.3  mrg       pool->threads_size = ntids;
     71  1.3  mrg       pool->threads_used = ntids;
     72  1.3  mrg       pool->threads_busy = 1;
     73  1.3  mrg       pool->last_team = NULL;
     74  1.3  mrg       gomp_simple_barrier_init (&pool->threads_dock, ntids);
     75  1.3  mrg 
     76  1.3  mrg       nvptx_thrs[0].thread_pool = pool;
     77  1.3  mrg       asm ("bar.sync 0;");
     78  1.3  mrg       fn (fn_data);
     79  1.3  mrg 
     80  1.3  mrg       gomp_free_thread (nvptx_thrs);
     81  1.3  mrg     }
     82  1.3  mrg   else
     83  1.3  mrg     {
     84  1.3  mrg       asm ("bar.sync 0;");
     85  1.3  mrg       gomp_thread_start (nvptx_thrs[0].thread_pool);
     86  1.3  mrg     }
     87  1.3  mrg }
     88  1.3  mrg 
     89  1.3  mrg /* This function contains the idle loop in which a thread waits
     90  1.3  mrg    to be called up to become part of a team.  */
     91  1.3  mrg 
     92  1.3  mrg static void
     93  1.3  mrg gomp_thread_start (struct gomp_thread_pool *pool)
     94  1.3  mrg {
     95  1.3  mrg   struct gomp_thread *thr = gomp_thread ();
     96  1.3  mrg 
     97  1.3  mrg   gomp_sem_init (&thr->release, 0);
     98  1.3  mrg   thr->thread_pool = pool;
     99  1.3  mrg 
    100  1.3  mrg   do
    101  1.3  mrg     {
    102  1.3  mrg       gomp_simple_barrier_wait (&pool->threads_dock);
    103  1.3  mrg       if (!thr->fn)
    104  1.3  mrg 	continue;
    105  1.3  mrg       thr->fn (thr->data);
    106  1.3  mrg       thr->fn = NULL;
    107  1.3  mrg 
    108  1.3  mrg       struct gomp_task *task = thr->task;
    109  1.3  mrg       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    110  1.3  mrg       gomp_finish_task (task);
    111  1.3  mrg     }
    112  1.3  mrg   /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
    113  1.3  mrg      it can trash stack pointer R1 in loops lacking exit edges.  Add a cheap
    114  1.3  mrg      artificial exit that the driver would not be able to optimize out.  */
    115  1.3  mrg   while (nvptx_thrs);
    116  1.3  mrg }
    117  1.3  mrg 
    118  1.3  mrg /* Launch a team.  */
    119  1.3  mrg 
    120  1.3  mrg void
    121  1.3  mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    122  1.5  mrg 		 unsigned flags, struct gomp_team *team,
    123  1.5  mrg 		 struct gomp_taskgroup *taskgroup)
    124  1.3  mrg {
    125  1.3  mrg   struct gomp_thread *thr, *nthr;
    126  1.3  mrg   struct gomp_task *task;
    127  1.3  mrg   struct gomp_task_icv *icv;
    128  1.3  mrg   struct gomp_thread_pool *pool;
    129  1.3  mrg   unsigned long nthreads_var;
    130  1.3  mrg 
    131  1.3  mrg   thr = gomp_thread ();
    132  1.3  mrg   pool = thr->thread_pool;
    133  1.3  mrg   task = thr->task;
    134  1.3  mrg   icv = task ? &task->icv : &gomp_global_icv;
    135  1.3  mrg 
    136  1.3  mrg   /* Always save the previous state, even if this isn't a nested team.
    137  1.3  mrg      In particular, we should save any work share state from an outer
    138  1.3  mrg      orphaned work share construct.  */
    139  1.3  mrg   team->prev_ts = thr->ts;
    140  1.3  mrg 
    141  1.3  mrg   thr->ts.team = team;
    142  1.3  mrg   thr->ts.team_id = 0;
    143  1.3  mrg   ++thr->ts.level;
    144  1.3  mrg   if (nthreads > 1)
    145  1.3  mrg     ++thr->ts.active_level;
    146  1.3  mrg   thr->ts.work_share = &team->work_shares[0];
    147  1.3  mrg   thr->ts.last_work_share = NULL;
    148  1.3  mrg   thr->ts.single_count = 0;
    149  1.3  mrg   thr->ts.static_trip = 0;
    150  1.3  mrg   thr->task = &team->implicit_task[0];
    151  1.3  mrg   nthreads_var = icv->nthreads_var;
    152  1.3  mrg   gomp_init_task (thr->task, task, icv);
    153  1.3  mrg   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    154  1.5  mrg   team->implicit_task[0].taskgroup = taskgroup;
    155  1.3  mrg 
    156  1.3  mrg   if (nthreads == 1)
    157  1.3  mrg     return;
    158  1.3  mrg 
    159  1.3  mrg   /* Release existing idle threads.  */
    160  1.3  mrg   for (unsigned i = 1; i < nthreads; ++i)
    161  1.3  mrg     {
    162  1.3  mrg       nthr = pool->threads[i];
    163  1.3  mrg       nthr->ts.team = team;
    164  1.3  mrg       nthr->ts.work_share = &team->work_shares[0];
    165  1.3  mrg       nthr->ts.last_work_share = NULL;
    166  1.3  mrg       nthr->ts.team_id = i;
    167  1.3  mrg       nthr->ts.level = team->prev_ts.level + 1;
    168  1.3  mrg       nthr->ts.active_level = thr->ts.active_level;
    169  1.3  mrg       nthr->ts.single_count = 0;
    170  1.3  mrg       nthr->ts.static_trip = 0;
    171  1.3  mrg       nthr->task = &team->implicit_task[i];
    172  1.3  mrg       gomp_init_task (nthr->task, task, icv);
    173  1.3  mrg       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    174  1.5  mrg       team->implicit_task[i].taskgroup = taskgroup;
    175  1.3  mrg       nthr->fn = fn;
    176  1.3  mrg       nthr->data = data;
    177  1.3  mrg       team->ordered_release[i] = &nthr->release;
    178  1.3  mrg     }
    179  1.3  mrg 
    180  1.3  mrg   gomp_simple_barrier_wait (&pool->threads_dock);
    181  1.3  mrg }
    182  1.3  mrg 
    183  1.5  mrg int
    184  1.5  mrg gomp_pause_host (void)
    185  1.5  mrg {
    186  1.5  mrg   return -1;
    187  1.5  mrg }
    188  1.5  mrg 
    189  1.3  mrg #include "../../team.c"
    190  1.3  mrg #endif
    191