Home | History | Annotate | Line # | Download | only in nvptx
      1  1.1.1.8  mrg /* Copyright (C) 2015-2024 Free Software Foundation, Inc.
      2      1.1  mrg    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3      1.1  mrg 
      4      1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5      1.1  mrg    (libgomp).
      6      1.1  mrg 
      7      1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8      1.1  mrg    under the terms of the GNU General Public License as published by
      9      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10      1.1  mrg    any later version.
     11      1.1  mrg 
     12      1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13      1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14      1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15      1.1  mrg    more details.
     16      1.1  mrg 
     17      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19      1.1  mrg    3.1, as published by the Free Software Foundation.
     20      1.1  mrg 
     21      1.1  mrg    You should have received a copy of the GNU General Public License and
     22      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25      1.1  mrg 
     26      1.1  mrg /* This is an NVPTX specific implementation of a barrier synchronization
     27      1.1  mrg    mechanism for libgomp.  This type is private to the library.  This
     28      1.1  mrg    implementation uses atomic instructions and bar.sync instruction.  */
     29      1.1  mrg 
     30      1.1  mrg #ifndef GOMP_BARRIER_H
     31      1.1  mrg #define GOMP_BARRIER_H 1
     32      1.1  mrg 
     33      1.1  mrg #include "mutex.h"
     34      1.1  mrg 
     35      1.1  mrg typedef struct
     36      1.1  mrg {
     37      1.1  mrg   unsigned total;
     38      1.1  mrg   unsigned generation;
     39      1.1  mrg   unsigned awaited;
     40      1.1  mrg   unsigned awaited_final;
     41      1.1  mrg } gomp_barrier_t;
     42      1.1  mrg 
     43      1.1  mrg typedef unsigned int gomp_barrier_state_t;
     44      1.1  mrg 
     45      1.1  mrg /* The generation field contains a counter in the high bits, with a few
     46      1.1  mrg    low bits dedicated to flags.  Note that TASK_PENDING and WAS_LAST can
     47      1.1  mrg    share space because WAS_LAST is never stored back to generation.  */
     48      1.1  mrg #define BAR_TASK_PENDING	1
     49      1.1  mrg #define BAR_WAS_LAST		1
     50      1.1  mrg #define BAR_WAITING_FOR_TASK	2
     51      1.1  mrg #define BAR_CANCELLED		4
     52      1.1  mrg #define BAR_INCR		8
     53      1.1  mrg 
     54      1.1  mrg static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
     55      1.1  mrg {
     56      1.1  mrg   bar->total = count;
     57      1.1  mrg   bar->awaited = count;
     58      1.1  mrg   bar->awaited_final = count;
     59      1.1  mrg   bar->generation = 0;
     60      1.1  mrg }
     61      1.1  mrg 
     62      1.1  mrg static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
     63      1.1  mrg {
     64      1.1  mrg   __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
     65      1.1  mrg   bar->total = count;
     66      1.1  mrg }
     67      1.1  mrg 
     68      1.1  mrg static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
     69      1.1  mrg {
     70      1.1  mrg }
     71      1.1  mrg 
     72      1.1  mrg extern void gomp_barrier_wait (gomp_barrier_t *);
     73      1.1  mrg extern void gomp_barrier_wait_last (gomp_barrier_t *);
     74      1.1  mrg extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
     75      1.1  mrg extern void gomp_team_barrier_wait (gomp_barrier_t *);
     76      1.1  mrg extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
     77      1.1  mrg extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
     78      1.1  mrg 					gomp_barrier_state_t);
     79      1.1  mrg extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
     80      1.1  mrg extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
     81      1.1  mrg 					       gomp_barrier_state_t);
     82      1.1  mrg struct gomp_team;
     83      1.1  mrg extern void gomp_team_barrier_cancel (struct gomp_team *);
     84      1.1  mrg 
     85  1.1.1.8  mrg static inline void
     86  1.1.1.8  mrg gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
     87  1.1.1.8  mrg {
     88  1.1.1.8  mrg   /* We never "wake up" threads on nvptx.  Threads wait at barrier
     89  1.1.1.8  mrg      instructions till barrier fullfilled.  Do nothing here.  */
     90  1.1.1.8  mrg }
     91  1.1.1.8  mrg 
     92      1.1  mrg static inline gomp_barrier_state_t
     93      1.1  mrg gomp_barrier_wait_start (gomp_barrier_t *bar)
     94      1.1  mrg {
     95      1.1  mrg   unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
     96      1.1  mrg   ret &= -BAR_INCR | BAR_CANCELLED;
     97      1.1  mrg   /* A memory barrier is needed before exiting from the various forms
     98      1.1  mrg      of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
     99      1.1  mrg      2.8.6 flush Construct, which says there is an implicit flush during
    100      1.1  mrg      a barrier region.  This is a convenient place to add the barrier,
    101      1.1  mrg      so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE.  */
    102      1.1  mrg   if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0)
    103      1.1  mrg     ret |= BAR_WAS_LAST;
    104      1.1  mrg   return ret;
    105      1.1  mrg }
    106      1.1  mrg 
    107      1.1  mrg static inline gomp_barrier_state_t
    108      1.1  mrg gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
    109      1.1  mrg {
    110      1.1  mrg   return gomp_barrier_wait_start (bar);
    111      1.1  mrg }
    112      1.1  mrg 
    113      1.1  mrg /* This is like gomp_barrier_wait_start, except it decrements
    114      1.1  mrg    bar->awaited_final rather than bar->awaited and should be used
    115      1.1  mrg    for the gomp_team_end barrier only.  */
    116      1.1  mrg static inline gomp_barrier_state_t
    117      1.1  mrg gomp_barrier_wait_final_start (gomp_barrier_t *bar)
    118      1.1  mrg {
    119      1.1  mrg   unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
    120      1.1  mrg   ret &= -BAR_INCR | BAR_CANCELLED;
    121      1.1  mrg   /* See above gomp_barrier_wait_start comment.  */
    122      1.1  mrg   if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_ACQ_REL) == 0)
    123      1.1  mrg     ret |= BAR_WAS_LAST;
    124      1.1  mrg   return ret;
    125      1.1  mrg }
    126      1.1  mrg 
    127      1.1  mrg static inline bool
    128      1.1  mrg gomp_barrier_last_thread (gomp_barrier_state_t state)
    129      1.1  mrg {
    130      1.1  mrg   return state & BAR_WAS_LAST;
    131      1.1  mrg }
    132      1.1  mrg 
    133      1.1  mrg /* All the inlines below must be called with team->task_lock
    134      1.1  mrg    held.  */
    135      1.1  mrg 
    136      1.1  mrg static inline void
    137      1.1  mrg gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
    138      1.1  mrg {
    139      1.1  mrg   bar->generation |= BAR_TASK_PENDING;
    140      1.1  mrg }
    141      1.1  mrg 
    142      1.1  mrg static inline void
    143      1.1  mrg gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
    144      1.1  mrg {
    145      1.1  mrg   bar->generation &= ~BAR_TASK_PENDING;
    146      1.1  mrg }
    147      1.1  mrg 
    148      1.1  mrg static inline void
    149      1.1  mrg gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
    150      1.1  mrg {
    151      1.1  mrg   bar->generation |= BAR_WAITING_FOR_TASK;
    152      1.1  mrg }
    153      1.1  mrg 
    154      1.1  mrg static inline bool
    155      1.1  mrg gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
    156      1.1  mrg {
    157      1.1  mrg   return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
    158      1.1  mrg }
    159      1.1  mrg 
    160      1.1  mrg static inline bool
    161      1.1  mrg gomp_team_barrier_cancelled (gomp_barrier_t *bar)
    162      1.1  mrg {
    163      1.1  mrg   return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
    164      1.1  mrg }
    165      1.1  mrg 
    166      1.1  mrg static inline void
    167      1.1  mrg gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
    168      1.1  mrg {
    169      1.1  mrg   bar->generation = (state & -BAR_INCR) + BAR_INCR;
    170      1.1  mrg }
    171      1.1  mrg 
    172      1.1  mrg #endif /* GOMP_BARRIER_H */
    173