Home | History | Annotate | Line # | Download | only in nvptx
team.c revision 1.1.1.1.4.1
      1  1.1.1.1.4.1  christos /* Copyright (C) 2015-2017 Free Software Foundation, Inc.
      2  1.1.1.1.4.1  christos    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3  1.1.1.1.4.1  christos 
      4  1.1.1.1.4.1  christos    This file is part of the GNU Offloading and Multi Processing Library
      5  1.1.1.1.4.1  christos    (libgomp).
      6  1.1.1.1.4.1  christos 
      7  1.1.1.1.4.1  christos    Libgomp is free software; you can redistribute it and/or modify it
      8  1.1.1.1.4.1  christos    under the terms of the GNU General Public License as published by
      9  1.1.1.1.4.1  christos    the Free Software Foundation; either version 3, or (at your option)
     10  1.1.1.1.4.1  christos    any later version.
     11  1.1.1.1.4.1  christos 
     12  1.1.1.1.4.1  christos    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1.1.1.4.1  christos    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14  1.1.1.1.4.1  christos    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15  1.1.1.1.4.1  christos    more details.
     16  1.1.1.1.4.1  christos 
     17  1.1.1.1.4.1  christos    Under Section 7 of GPL version 3, you are granted additional
     18  1.1.1.1.4.1  christos    permissions described in the GCC Runtime Library Exception, version
     19  1.1.1.1.4.1  christos    3.1, as published by the Free Software Foundation.
     20  1.1.1.1.4.1  christos 
     21  1.1.1.1.4.1  christos    You should have received a copy of the GNU General Public License and
     22  1.1.1.1.4.1  christos    a copy of the GCC Runtime Library Exception along with this program;
     23  1.1.1.1.4.1  christos    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1.1.1.4.1  christos    <http://www.gnu.org/licenses/>.  */
     25  1.1.1.1.4.1  christos 
     26  1.1.1.1.4.1  christos /* This file handles maintainance of threads on NVPTX.  */
     27  1.1.1.1.4.1  christos 
     28  1.1.1.1.4.1  christos #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
     29  1.1.1.1.4.1  christos 
     30  1.1.1.1.4.1  christos #include "libgomp.h"
     31  1.1.1.1.4.1  christos #include <stdlib.h>
     32  1.1.1.1.4.1  christos #include <string.h>
     33  1.1.1.1.4.1  christos 
     34  1.1.1.1.4.1  christos struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
     35  1.1.1.1.4.1  christos 
     36  1.1.1.1.4.1  christos static void gomp_thread_start (struct gomp_thread_pool *);
     37  1.1.1.1.4.1  christos 
     38  1.1.1.1.4.1  christos 
     39  1.1.1.1.4.1  christos /* This externally visible function handles target region entry.  It
     40  1.1.1.1.4.1  christos    sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
     41  1.1.1.1.4.1  christos    in the master thread or gomp_thread_start in other threads.
     42  1.1.1.1.4.1  christos 
     43  1.1.1.1.4.1  christos    The name of this function is part of the interface with the compiler: for
     44  1.1.1.1.4.1  christos    each target region, GCC emits a PTX .kernel function that sets up soft-stack
     45  1.1.1.1.4.1  christos    and uniform-simt state and calls this function, passing in FN the original
     46  1.1.1.1.4.1  christos    function outlined for the target region.  */
     47  1.1.1.1.4.1  christos 
     48  1.1.1.1.4.1  christos void
     49  1.1.1.1.4.1  christos gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     50  1.1.1.1.4.1  christos {
     51  1.1.1.1.4.1  christos   int tid, ntids;
     52  1.1.1.1.4.1  christos   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
     53  1.1.1.1.4.1  christos   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
     54  1.1.1.1.4.1  christos   if (tid == 0)
     55  1.1.1.1.4.1  christos     {
     56  1.1.1.1.4.1  christos       gomp_global_icv.nthreads_var = ntids;
     57  1.1.1.1.4.1  christos       /* Starting additional threads is not supported.  */
     58  1.1.1.1.4.1  christos       gomp_global_icv.dyn_var = true;
     59  1.1.1.1.4.1  christos 
     60  1.1.1.1.4.1  christos       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
     61  1.1.1.1.4.1  christos       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
     62  1.1.1.1.4.1  christos 
     63  1.1.1.1.4.1  christos       struct gomp_thread_pool *pool = alloca (sizeof (*pool));
     64  1.1.1.1.4.1  christos       pool->threads = alloca (ntids * sizeof (*pool->threads));
     65  1.1.1.1.4.1  christos       for (tid = 0; tid < ntids; tid++)
     66  1.1.1.1.4.1  christos 	pool->threads[tid] = nvptx_thrs + tid;
     67  1.1.1.1.4.1  christos       pool->threads_size = ntids;
     68  1.1.1.1.4.1  christos       pool->threads_used = ntids;
     69  1.1.1.1.4.1  christos       pool->threads_busy = 1;
     70  1.1.1.1.4.1  christos       pool->last_team = NULL;
     71  1.1.1.1.4.1  christos       gomp_simple_barrier_init (&pool->threads_dock, ntids);
     72  1.1.1.1.4.1  christos 
     73  1.1.1.1.4.1  christos       nvptx_thrs[0].thread_pool = pool;
     74  1.1.1.1.4.1  christos       asm ("bar.sync 0;");
     75  1.1.1.1.4.1  christos       fn (fn_data);
     76  1.1.1.1.4.1  christos 
     77  1.1.1.1.4.1  christos       gomp_free_thread (nvptx_thrs);
     78  1.1.1.1.4.1  christos     }
     79  1.1.1.1.4.1  christos   else
     80  1.1.1.1.4.1  christos     {
     81  1.1.1.1.4.1  christos       asm ("bar.sync 0;");
     82  1.1.1.1.4.1  christos       gomp_thread_start (nvptx_thrs[0].thread_pool);
     83  1.1.1.1.4.1  christos     }
     84  1.1.1.1.4.1  christos }
     85  1.1.1.1.4.1  christos 
     86  1.1.1.1.4.1  christos /* This function contains the idle loop in which a thread waits
     87  1.1.1.1.4.1  christos    to be called up to become part of a team.  */
     88  1.1.1.1.4.1  christos 
     89  1.1.1.1.4.1  christos static void
     90  1.1.1.1.4.1  christos gomp_thread_start (struct gomp_thread_pool *pool)
     91  1.1.1.1.4.1  christos {
     92  1.1.1.1.4.1  christos   struct gomp_thread *thr = gomp_thread ();
     93  1.1.1.1.4.1  christos 
     94  1.1.1.1.4.1  christos   gomp_sem_init (&thr->release, 0);
     95  1.1.1.1.4.1  christos   thr->thread_pool = pool;
     96  1.1.1.1.4.1  christos 
     97  1.1.1.1.4.1  christos   do
     98  1.1.1.1.4.1  christos     {
     99  1.1.1.1.4.1  christos       gomp_simple_barrier_wait (&pool->threads_dock);
    100  1.1.1.1.4.1  christos       if (!thr->fn)
    101  1.1.1.1.4.1  christos 	continue;
    102  1.1.1.1.4.1  christos       thr->fn (thr->data);
    103  1.1.1.1.4.1  christos       thr->fn = NULL;
    104  1.1.1.1.4.1  christos 
    105  1.1.1.1.4.1  christos       struct gomp_task *task = thr->task;
    106  1.1.1.1.4.1  christos       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    107  1.1.1.1.4.1  christos       gomp_finish_task (task);
    108  1.1.1.1.4.1  christos     }
    109  1.1.1.1.4.1  christos   /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
    110  1.1.1.1.4.1  christos      it can trash stack pointer R1 in loops lacking exit edges.  Add a cheap
    111  1.1.1.1.4.1  christos      artificial exit that the driver would not be able to optimize out.  */
    112  1.1.1.1.4.1  christos   while (nvptx_thrs);
    113  1.1.1.1.4.1  christos }
    114  1.1.1.1.4.1  christos 
    115  1.1.1.1.4.1  christos /* Launch a team.  */
    116  1.1.1.1.4.1  christos 
    117  1.1.1.1.4.1  christos void
    118  1.1.1.1.4.1  christos gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    119  1.1.1.1.4.1  christos 		 unsigned flags, struct gomp_team *team)
    120  1.1.1.1.4.1  christos {
    121  1.1.1.1.4.1  christos   struct gomp_thread *thr, *nthr;
    122  1.1.1.1.4.1  christos   struct gomp_task *task;
    123  1.1.1.1.4.1  christos   struct gomp_task_icv *icv;
    124  1.1.1.1.4.1  christos   struct gomp_thread_pool *pool;
    125  1.1.1.1.4.1  christos   unsigned long nthreads_var;
    126  1.1.1.1.4.1  christos 
    127  1.1.1.1.4.1  christos   thr = gomp_thread ();
    128  1.1.1.1.4.1  christos   pool = thr->thread_pool;
    129  1.1.1.1.4.1  christos   task = thr->task;
    130  1.1.1.1.4.1  christos   icv = task ? &task->icv : &gomp_global_icv;
    131  1.1.1.1.4.1  christos 
    132  1.1.1.1.4.1  christos   /* Always save the previous state, even if this isn't a nested team.
    133  1.1.1.1.4.1  christos      In particular, we should save any work share state from an outer
    134  1.1.1.1.4.1  christos      orphaned work share construct.  */
    135  1.1.1.1.4.1  christos   team->prev_ts = thr->ts;
    136  1.1.1.1.4.1  christos 
    137  1.1.1.1.4.1  christos   thr->ts.team = team;
    138  1.1.1.1.4.1  christos   thr->ts.team_id = 0;
    139  1.1.1.1.4.1  christos   ++thr->ts.level;
    140  1.1.1.1.4.1  christos   if (nthreads > 1)
    141  1.1.1.1.4.1  christos     ++thr->ts.active_level;
    142  1.1.1.1.4.1  christos   thr->ts.work_share = &team->work_shares[0];
    143  1.1.1.1.4.1  christos   thr->ts.last_work_share = NULL;
    144  1.1.1.1.4.1  christos   thr->ts.single_count = 0;
    145  1.1.1.1.4.1  christos   thr->ts.static_trip = 0;
    146  1.1.1.1.4.1  christos   thr->task = &team->implicit_task[0];
    147  1.1.1.1.4.1  christos   nthreads_var = icv->nthreads_var;
    148  1.1.1.1.4.1  christos   gomp_init_task (thr->task, task, icv);
    149  1.1.1.1.4.1  christos   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    150  1.1.1.1.4.1  christos 
    151  1.1.1.1.4.1  christos   if (nthreads == 1)
    152  1.1.1.1.4.1  christos     return;
    153  1.1.1.1.4.1  christos 
    154  1.1.1.1.4.1  christos   /* Release existing idle threads.  */
    155  1.1.1.1.4.1  christos   for (unsigned i = 1; i < nthreads; ++i)
    156  1.1.1.1.4.1  christos     {
    157  1.1.1.1.4.1  christos       nthr = pool->threads[i];
    158  1.1.1.1.4.1  christos       nthr->ts.team = team;
    159  1.1.1.1.4.1  christos       nthr->ts.work_share = &team->work_shares[0];
    160  1.1.1.1.4.1  christos       nthr->ts.last_work_share = NULL;
    161  1.1.1.1.4.1  christos       nthr->ts.team_id = i;
    162  1.1.1.1.4.1  christos       nthr->ts.level = team->prev_ts.level + 1;
    163  1.1.1.1.4.1  christos       nthr->ts.active_level = thr->ts.active_level;
    164  1.1.1.1.4.1  christos       nthr->ts.single_count = 0;
    165  1.1.1.1.4.1  christos       nthr->ts.static_trip = 0;
    166  1.1.1.1.4.1  christos       nthr->task = &team->implicit_task[i];
    167  1.1.1.1.4.1  christos       gomp_init_task (nthr->task, task, icv);
    168  1.1.1.1.4.1  christos       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    169  1.1.1.1.4.1  christos       nthr->fn = fn;
    170  1.1.1.1.4.1  christos       nthr->data = data;
    171  1.1.1.1.4.1  christos       team->ordered_release[i] = &nthr->release;
    172  1.1.1.1.4.1  christos     }
    173  1.1.1.1.4.1  christos 
    174  1.1.1.1.4.1  christos   gomp_simple_barrier_wait (&pool->threads_dock);
    175  1.1.1.1.4.1  christos }
    176  1.1.1.1.4.1  christos 
    177  1.1.1.1.4.1  christos #include "../../team.c"
    178  1.1.1.1.4.1  christos #endif
    179