Home | History | Annotate | Line # | Download | only in nvptx
team.c revision 1.1.1.3
      1  1.1.1.3  mrg /* Copyright (C) 2015-2018 Free Software Foundation, Inc.
      2  1.1.1.2  mrg    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3  1.1.1.2  mrg 
      4  1.1.1.2  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5  1.1.1.2  mrg    (libgomp).
      6  1.1.1.2  mrg 
      7  1.1.1.2  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8  1.1.1.2  mrg    under the terms of the GNU General Public License as published by
      9  1.1.1.2  mrg    the Free Software Foundation; either version 3, or (at your option)
     10  1.1.1.2  mrg    any later version.
     11  1.1.1.2  mrg 
     12  1.1.1.2  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1.1.2  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14  1.1.1.2  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15  1.1.1.2  mrg    more details.
     16  1.1.1.2  mrg 
     17  1.1.1.2  mrg    Under Section 7 of GPL version 3, you are granted additional
     18  1.1.1.2  mrg    permissions described in the GCC Runtime Library Exception, version
     19  1.1.1.2  mrg    3.1, as published by the Free Software Foundation.
     20  1.1.1.2  mrg 
     21  1.1.1.2  mrg    You should have received a copy of the GNU General Public License and
     22  1.1.1.2  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23  1.1.1.2  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1.1.2  mrg    <http://www.gnu.org/licenses/>.  */
     25  1.1.1.2  mrg 
     26  1.1.1.2  mrg /* This file handles maintainance of threads on NVPTX.  */
     27  1.1.1.2  mrg 
     28  1.1.1.2  mrg #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
     29  1.1.1.2  mrg 
     30  1.1.1.2  mrg #include "libgomp.h"
     31  1.1.1.2  mrg #include <stdlib.h>
     32  1.1.1.2  mrg #include <string.h>
     33  1.1.1.2  mrg 
     34  1.1.1.2  mrg struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
     35  1.1.1.2  mrg 
     36  1.1.1.2  mrg static void gomp_thread_start (struct gomp_thread_pool *);
     37  1.1.1.2  mrg 
     38  1.1.1.2  mrg 
     39  1.1.1.2  mrg /* This externally visible function handles target region entry.  It
     40  1.1.1.2  mrg    sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
     41  1.1.1.2  mrg    in the master thread or gomp_thread_start in other threads.
     42  1.1.1.2  mrg 
     43  1.1.1.2  mrg    The name of this function is part of the interface with the compiler: for
     44  1.1.1.2  mrg    each target region, GCC emits a PTX .kernel function that sets up soft-stack
     45  1.1.1.2  mrg    and uniform-simt state and calls this function, passing in FN the original
     46  1.1.1.2  mrg    function outlined for the target region.  */
     47  1.1.1.2  mrg 
     48  1.1.1.2  mrg void
     49  1.1.1.2  mrg gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     50  1.1.1.2  mrg {
     51  1.1.1.2  mrg   int tid, ntids;
     52  1.1.1.2  mrg   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
     53  1.1.1.2  mrg   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
     54  1.1.1.2  mrg   if (tid == 0)
     55  1.1.1.2  mrg     {
     56  1.1.1.2  mrg       gomp_global_icv.nthreads_var = ntids;
     57  1.1.1.2  mrg       /* Starting additional threads is not supported.  */
     58  1.1.1.2  mrg       gomp_global_icv.dyn_var = true;
     59  1.1.1.2  mrg 
     60  1.1.1.2  mrg       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
     61  1.1.1.2  mrg       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
     62  1.1.1.2  mrg 
     63  1.1.1.2  mrg       struct gomp_thread_pool *pool = alloca (sizeof (*pool));
     64  1.1.1.2  mrg       pool->threads = alloca (ntids * sizeof (*pool->threads));
     65  1.1.1.2  mrg       for (tid = 0; tid < ntids; tid++)
     66  1.1.1.2  mrg 	pool->threads[tid] = nvptx_thrs + tid;
     67  1.1.1.2  mrg       pool->threads_size = ntids;
     68  1.1.1.2  mrg       pool->threads_used = ntids;
     69  1.1.1.2  mrg       pool->threads_busy = 1;
     70  1.1.1.2  mrg       pool->last_team = NULL;
     71  1.1.1.2  mrg       gomp_simple_barrier_init (&pool->threads_dock, ntids);
     72  1.1.1.2  mrg 
     73  1.1.1.2  mrg       nvptx_thrs[0].thread_pool = pool;
     74  1.1.1.2  mrg       asm ("bar.sync 0;");
     75  1.1.1.2  mrg       fn (fn_data);
     76  1.1.1.2  mrg 
     77  1.1.1.2  mrg       gomp_free_thread (nvptx_thrs);
     78  1.1.1.2  mrg     }
     79  1.1.1.2  mrg   else
     80  1.1.1.2  mrg     {
     81  1.1.1.2  mrg       asm ("bar.sync 0;");
     82  1.1.1.2  mrg       gomp_thread_start (nvptx_thrs[0].thread_pool);
     83  1.1.1.2  mrg     }
     84  1.1.1.2  mrg }
     85  1.1.1.2  mrg 
     86  1.1.1.2  mrg /* This function contains the idle loop in which a thread waits
     87  1.1.1.2  mrg    to be called up to become part of a team.  */
     88  1.1.1.2  mrg 
     89  1.1.1.2  mrg static void
     90  1.1.1.2  mrg gomp_thread_start (struct gomp_thread_pool *pool)
     91  1.1.1.2  mrg {
     92  1.1.1.2  mrg   struct gomp_thread *thr = gomp_thread ();
     93  1.1.1.2  mrg 
     94  1.1.1.2  mrg   gomp_sem_init (&thr->release, 0);
     95  1.1.1.2  mrg   thr->thread_pool = pool;
     96  1.1.1.2  mrg 
     97  1.1.1.2  mrg   do
     98  1.1.1.2  mrg     {
     99  1.1.1.2  mrg       gomp_simple_barrier_wait (&pool->threads_dock);
    100  1.1.1.2  mrg       if (!thr->fn)
    101  1.1.1.2  mrg 	continue;
    102  1.1.1.2  mrg       thr->fn (thr->data);
    103  1.1.1.2  mrg       thr->fn = NULL;
    104  1.1.1.2  mrg 
    105  1.1.1.2  mrg       struct gomp_task *task = thr->task;
    106  1.1.1.2  mrg       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    107  1.1.1.2  mrg       gomp_finish_task (task);
    108  1.1.1.2  mrg     }
    109  1.1.1.2  mrg   /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
    110  1.1.1.2  mrg      it can trash stack pointer R1 in loops lacking exit edges.  Add a cheap
    111  1.1.1.2  mrg      artificial exit that the driver would not be able to optimize out.  */
    112  1.1.1.2  mrg   while (nvptx_thrs);
    113  1.1.1.2  mrg }
    114  1.1.1.2  mrg 
    115  1.1.1.2  mrg /* Launch a team.  */
    116  1.1.1.2  mrg 
    117  1.1.1.2  mrg void
    118  1.1.1.2  mrg gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    119  1.1.1.2  mrg 		 unsigned flags, struct gomp_team *team)
    120  1.1.1.2  mrg {
    121  1.1.1.2  mrg   struct gomp_thread *thr, *nthr;
    122  1.1.1.2  mrg   struct gomp_task *task;
    123  1.1.1.2  mrg   struct gomp_task_icv *icv;
    124  1.1.1.2  mrg   struct gomp_thread_pool *pool;
    125  1.1.1.2  mrg   unsigned long nthreads_var;
    126  1.1.1.2  mrg 
    127  1.1.1.2  mrg   thr = gomp_thread ();
    128  1.1.1.2  mrg   pool = thr->thread_pool;
    129  1.1.1.2  mrg   task = thr->task;
    130  1.1.1.2  mrg   icv = task ? &task->icv : &gomp_global_icv;
    131  1.1.1.2  mrg 
    132  1.1.1.2  mrg   /* Always save the previous state, even if this isn't a nested team.
    133  1.1.1.2  mrg      In particular, we should save any work share state from an outer
    134  1.1.1.2  mrg      orphaned work share construct.  */
    135  1.1.1.2  mrg   team->prev_ts = thr->ts;
    136  1.1.1.2  mrg 
    137  1.1.1.2  mrg   thr->ts.team = team;
    138  1.1.1.2  mrg   thr->ts.team_id = 0;
    139  1.1.1.2  mrg   ++thr->ts.level;
    140  1.1.1.2  mrg   if (nthreads > 1)
    141  1.1.1.2  mrg     ++thr->ts.active_level;
    142  1.1.1.2  mrg   thr->ts.work_share = &team->work_shares[0];
    143  1.1.1.2  mrg   thr->ts.last_work_share = NULL;
    144  1.1.1.2  mrg   thr->ts.single_count = 0;
    145  1.1.1.2  mrg   thr->ts.static_trip = 0;
    146  1.1.1.2  mrg   thr->task = &team->implicit_task[0];
    147  1.1.1.2  mrg   nthreads_var = icv->nthreads_var;
    148  1.1.1.2  mrg   gomp_init_task (thr->task, task, icv);
    149  1.1.1.2  mrg   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    150  1.1.1.2  mrg 
    151  1.1.1.2  mrg   if (nthreads == 1)
    152  1.1.1.2  mrg     return;
    153  1.1.1.2  mrg 
    154  1.1.1.2  mrg   /* Release existing idle threads.  */
    155  1.1.1.2  mrg   for (unsigned i = 1; i < nthreads; ++i)
    156  1.1.1.2  mrg     {
    157  1.1.1.2  mrg       nthr = pool->threads[i];
    158  1.1.1.2  mrg       nthr->ts.team = team;
    159  1.1.1.2  mrg       nthr->ts.work_share = &team->work_shares[0];
    160  1.1.1.2  mrg       nthr->ts.last_work_share = NULL;
    161  1.1.1.2  mrg       nthr->ts.team_id = i;
    162  1.1.1.2  mrg       nthr->ts.level = team->prev_ts.level + 1;
    163  1.1.1.2  mrg       nthr->ts.active_level = thr->ts.active_level;
    164  1.1.1.2  mrg       nthr->ts.single_count = 0;
    165  1.1.1.2  mrg       nthr->ts.static_trip = 0;
    166  1.1.1.2  mrg       nthr->task = &team->implicit_task[i];
    167  1.1.1.2  mrg       gomp_init_task (nthr->task, task, icv);
    168  1.1.1.2  mrg       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    169  1.1.1.2  mrg       nthr->fn = fn;
    170  1.1.1.2  mrg       nthr->data = data;
    171  1.1.1.2  mrg       team->ordered_release[i] = &nthr->release;
    172  1.1.1.2  mrg     }
    173  1.1.1.2  mrg 
    174  1.1.1.2  mrg   gomp_simple_barrier_wait (&pool->threads_dock);
    175  1.1.1.2  mrg }
    176  1.1.1.2  mrg 
    177  1.1.1.2  mrg #include "../../team.c"
    178  1.1.1.2  mrg #endif
    179