Home | History | Annotate | Line # | Download | only in nvptx
team.c revision 1.1.1.1.4.3
      1  1.1.1.1.4.3  martin /* Copyright (C) 2015-2017 Free Software Foundation, Inc.
      2  1.1.1.1.4.3  martin    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3  1.1.1.1.4.3  martin 
      4  1.1.1.1.4.3  martin    This file is part of the GNU Offloading and Multi Processing Library
      5  1.1.1.1.4.3  martin    (libgomp).
      6  1.1.1.1.4.3  martin 
      7  1.1.1.1.4.3  martin    Libgomp is free software; you can redistribute it and/or modify it
      8  1.1.1.1.4.3  martin    under the terms of the GNU General Public License as published by
      9  1.1.1.1.4.3  martin    the Free Software Foundation; either version 3, or (at your option)
     10  1.1.1.1.4.3  martin    any later version.
     11  1.1.1.1.4.3  martin 
     12  1.1.1.1.4.3  martin    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1.1.1.4.3  martin    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14  1.1.1.1.4.3  martin    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15  1.1.1.1.4.3  martin    more details.
     16  1.1.1.1.4.3  martin 
     17  1.1.1.1.4.3  martin    Under Section 7 of GPL version 3, you are granted additional
     18  1.1.1.1.4.3  martin    permissions described in the GCC Runtime Library Exception, version
     19  1.1.1.1.4.3  martin    3.1, as published by the Free Software Foundation.
     20  1.1.1.1.4.3  martin 
     21  1.1.1.1.4.3  martin    You should have received a copy of the GNU General Public License and
     22  1.1.1.1.4.3  martin    a copy of the GCC Runtime Library Exception along with this program;
     23  1.1.1.1.4.3  martin    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1.1.1.4.3  martin    <http://www.gnu.org/licenses/>.  */
     25  1.1.1.1.4.3  martin 
     26  1.1.1.1.4.3  martin /* This file handles maintainance of threads on NVPTX.  */
     27  1.1.1.1.4.3  martin 
     28  1.1.1.1.4.3  martin #if defined __nvptx_softstack__ && defined __nvptx_unisimt__
     29  1.1.1.1.4.3  martin 
     30  1.1.1.1.4.3  martin #include "libgomp.h"
     31  1.1.1.1.4.3  martin #include <stdlib.h>
     32  1.1.1.1.4.3  martin #include <string.h>
     33  1.1.1.1.4.3  martin 
     34  1.1.1.1.4.3  martin struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
     35  1.1.1.1.4.3  martin 
     36  1.1.1.1.4.3  martin static void gomp_thread_start (struct gomp_thread_pool *);
     37  1.1.1.1.4.3  martin 
     38  1.1.1.1.4.3  martin 
     39  1.1.1.1.4.3  martin /* This externally visible function handles target region entry.  It
     40  1.1.1.1.4.3  martin    sets up a per-team thread pool and transfers control by calling FN (FN_DATA)
     41  1.1.1.1.4.3  martin    in the master thread or gomp_thread_start in other threads.
     42  1.1.1.1.4.3  martin 
     43  1.1.1.1.4.3  martin    The name of this function is part of the interface with the compiler: for
     44  1.1.1.1.4.3  martin    each target region, GCC emits a PTX .kernel function that sets up soft-stack
     45  1.1.1.1.4.3  martin    and uniform-simt state and calls this function, passing in FN the original
     46  1.1.1.1.4.3  martin    function outlined for the target region.  */
     47  1.1.1.1.4.3  martin 
     48  1.1.1.1.4.3  martin void
     49  1.1.1.1.4.3  martin gomp_nvptx_main (void (*fn) (void *), void *fn_data)
     50  1.1.1.1.4.3  martin {
     51  1.1.1.1.4.3  martin   int tid, ntids;
     52  1.1.1.1.4.3  martin   asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
     53  1.1.1.1.4.3  martin   asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
     54  1.1.1.1.4.3  martin   if (tid == 0)
     55  1.1.1.1.4.3  martin     {
     56  1.1.1.1.4.3  martin       gomp_global_icv.nthreads_var = ntids;
     57  1.1.1.1.4.3  martin       /* Starting additional threads is not supported.  */
     58  1.1.1.1.4.3  martin       gomp_global_icv.dyn_var = true;
     59  1.1.1.1.4.3  martin 
     60  1.1.1.1.4.3  martin       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
     61  1.1.1.1.4.3  martin       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
     62  1.1.1.1.4.3  martin 
     63  1.1.1.1.4.3  martin       struct gomp_thread_pool *pool = alloca (sizeof (*pool));
     64  1.1.1.1.4.3  martin       pool->threads = alloca (ntids * sizeof (*pool->threads));
     65  1.1.1.1.4.3  martin       for (tid = 0; tid < ntids; tid++)
     66  1.1.1.1.4.3  martin 	pool->threads[tid] = nvptx_thrs + tid;
     67  1.1.1.1.4.3  martin       pool->threads_size = ntids;
     68  1.1.1.1.4.3  martin       pool->threads_used = ntids;
     69  1.1.1.1.4.3  martin       pool->threads_busy = 1;
     70  1.1.1.1.4.3  martin       pool->last_team = NULL;
     71  1.1.1.1.4.3  martin       gomp_simple_barrier_init (&pool->threads_dock, ntids);
     72  1.1.1.1.4.3  martin 
     73  1.1.1.1.4.3  martin       nvptx_thrs[0].thread_pool = pool;
     74  1.1.1.1.4.3  martin       asm ("bar.sync 0;");
     75  1.1.1.1.4.3  martin       fn (fn_data);
     76  1.1.1.1.4.3  martin 
     77  1.1.1.1.4.3  martin       gomp_free_thread (nvptx_thrs);
     78  1.1.1.1.4.3  martin     }
     79  1.1.1.1.4.3  martin   else
     80  1.1.1.1.4.3  martin     {
     81  1.1.1.1.4.3  martin       asm ("bar.sync 0;");
     82  1.1.1.1.4.3  martin       gomp_thread_start (nvptx_thrs[0].thread_pool);
     83  1.1.1.1.4.3  martin     }
     84  1.1.1.1.4.3  martin }
     85  1.1.1.1.4.3  martin 
     86  1.1.1.1.4.3  martin /* This function contains the idle loop in which a thread waits
     87  1.1.1.1.4.3  martin    to be called up to become part of a team.  */
     88  1.1.1.1.4.3  martin 
     89  1.1.1.1.4.3  martin static void
     90  1.1.1.1.4.3  martin gomp_thread_start (struct gomp_thread_pool *pool)
     91  1.1.1.1.4.3  martin {
     92  1.1.1.1.4.3  martin   struct gomp_thread *thr = gomp_thread ();
     93  1.1.1.1.4.3  martin 
     94  1.1.1.1.4.3  martin   gomp_sem_init (&thr->release, 0);
     95  1.1.1.1.4.3  martin   thr->thread_pool = pool;
     96  1.1.1.1.4.3  martin 
     97  1.1.1.1.4.3  martin   do
     98  1.1.1.1.4.3  martin     {
     99  1.1.1.1.4.3  martin       gomp_simple_barrier_wait (&pool->threads_dock);
    100  1.1.1.1.4.3  martin       if (!thr->fn)
    101  1.1.1.1.4.3  martin 	continue;
    102  1.1.1.1.4.3  martin       thr->fn (thr->data);
    103  1.1.1.1.4.3  martin       thr->fn = NULL;
    104  1.1.1.1.4.3  martin 
    105  1.1.1.1.4.3  martin       struct gomp_task *task = thr->task;
    106  1.1.1.1.4.3  martin       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
    107  1.1.1.1.4.3  martin       gomp_finish_task (task);
    108  1.1.1.1.4.3  martin     }
    109  1.1.1.1.4.3  martin   /* Work around an NVIDIA driver bug: when generating sm_50 machine code,
    110  1.1.1.1.4.3  martin      it can trash stack pointer R1 in loops lacking exit edges.  Add a cheap
    111  1.1.1.1.4.3  martin      artificial exit that the driver would not be able to optimize out.  */
    112  1.1.1.1.4.3  martin   while (nvptx_thrs);
    113  1.1.1.1.4.3  martin }
    114  1.1.1.1.4.3  martin 
    115  1.1.1.1.4.3  martin /* Launch a team.  */
    116  1.1.1.1.4.3  martin 
    117  1.1.1.1.4.3  martin void
    118  1.1.1.1.4.3  martin gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
    119  1.1.1.1.4.3  martin 		 unsigned flags, struct gomp_team *team)
    120  1.1.1.1.4.3  martin {
    121  1.1.1.1.4.3  martin   struct gomp_thread *thr, *nthr;
    122  1.1.1.1.4.3  martin   struct gomp_task *task;
    123  1.1.1.1.4.3  martin   struct gomp_task_icv *icv;
    124  1.1.1.1.4.3  martin   struct gomp_thread_pool *pool;
    125  1.1.1.1.4.3  martin   unsigned long nthreads_var;
    126  1.1.1.1.4.3  martin 
    127  1.1.1.1.4.3  martin   thr = gomp_thread ();
    128  1.1.1.1.4.3  martin   pool = thr->thread_pool;
    129  1.1.1.1.4.3  martin   task = thr->task;
    130  1.1.1.1.4.3  martin   icv = task ? &task->icv : &gomp_global_icv;
    131  1.1.1.1.4.3  martin 
    132  1.1.1.1.4.3  martin   /* Always save the previous state, even if this isn't a nested team.
    133  1.1.1.1.4.3  martin      In particular, we should save any work share state from an outer
    134  1.1.1.1.4.3  martin      orphaned work share construct.  */
    135  1.1.1.1.4.3  martin   team->prev_ts = thr->ts;
    136  1.1.1.1.4.3  martin 
    137  1.1.1.1.4.3  martin   thr->ts.team = team;
    138  1.1.1.1.4.3  martin   thr->ts.team_id = 0;
    139  1.1.1.1.4.3  martin   ++thr->ts.level;
    140  1.1.1.1.4.3  martin   if (nthreads > 1)
    141  1.1.1.1.4.3  martin     ++thr->ts.active_level;
    142  1.1.1.1.4.3  martin   thr->ts.work_share = &team->work_shares[0];
    143  1.1.1.1.4.3  martin   thr->ts.last_work_share = NULL;
    144  1.1.1.1.4.3  martin   thr->ts.single_count = 0;
    145  1.1.1.1.4.3  martin   thr->ts.static_trip = 0;
    146  1.1.1.1.4.3  martin   thr->task = &team->implicit_task[0];
    147  1.1.1.1.4.3  martin   nthreads_var = icv->nthreads_var;
    148  1.1.1.1.4.3  martin   gomp_init_task (thr->task, task, icv);
    149  1.1.1.1.4.3  martin   team->implicit_task[0].icv.nthreads_var = nthreads_var;
    150  1.1.1.1.4.3  martin 
    151  1.1.1.1.4.3  martin   if (nthreads == 1)
    152  1.1.1.1.4.3  martin     return;
    153  1.1.1.1.4.3  martin 
    154  1.1.1.1.4.3  martin   /* Release existing idle threads.  */
    155  1.1.1.1.4.3  martin   for (unsigned i = 1; i < nthreads; ++i)
    156  1.1.1.1.4.3  martin     {
    157  1.1.1.1.4.3  martin       nthr = pool->threads[i];
    158  1.1.1.1.4.3  martin       nthr->ts.team = team;
    159  1.1.1.1.4.3  martin       nthr->ts.work_share = &team->work_shares[0];
    160  1.1.1.1.4.3  martin       nthr->ts.last_work_share = NULL;
    161  1.1.1.1.4.3  martin       nthr->ts.team_id = i;
    162  1.1.1.1.4.3  martin       nthr->ts.level = team->prev_ts.level + 1;
    163  1.1.1.1.4.3  martin       nthr->ts.active_level = thr->ts.active_level;
    164  1.1.1.1.4.3  martin       nthr->ts.single_count = 0;
    165  1.1.1.1.4.3  martin       nthr->ts.static_trip = 0;
    166  1.1.1.1.4.3  martin       nthr->task = &team->implicit_task[i];
    167  1.1.1.1.4.3  martin       gomp_init_task (nthr->task, task, icv);
    168  1.1.1.1.4.3  martin       team->implicit_task[i].icv.nthreads_var = nthreads_var;
    169  1.1.1.1.4.3  martin       nthr->fn = fn;
    170  1.1.1.1.4.3  martin       nthr->data = data;
    171  1.1.1.1.4.3  martin       team->ordered_release[i] = &nthr->release;
    172  1.1.1.1.4.3  martin     }
    173  1.1.1.1.4.3  martin 
    174  1.1.1.1.4.3  martin   gomp_simple_barrier_wait (&pool->threads_dock);
    175  1.1.1.1.4.3  martin }
    176  1.1.1.1.4.3  martin 
    177  1.1.1.1.4.3  martin #include "../../team.c"
    178  1.1.1.1.4.3  martin #endif
    179