Home | History | Annotate | Line # | Download | only in nvptx
      1  1.1.1.8  mrg /* Copyright (C) 2015-2024 Free Software Foundation, Inc.
      2      1.1  mrg    Contributed by Alexander Monakov <amonakov (at) ispras.ru>
      3      1.1  mrg 
      4      1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5      1.1  mrg    (libgomp).
      6      1.1  mrg 
      7      1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8      1.1  mrg    under the terms of the GNU General Public License as published by
      9      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10      1.1  mrg    any later version.
     11      1.1  mrg 
     12      1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13      1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14      1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15      1.1  mrg    more details.
     16      1.1  mrg 
     17      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19      1.1  mrg    3.1, as published by the Free Software Foundation.
     20      1.1  mrg 
     21      1.1  mrg    You should have received a copy of the GNU General Public License and
     22      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25      1.1  mrg 
     26      1.1  mrg /* This is the NVPTX implementation of doacross spinning.  */
     27      1.1  mrg 
     28      1.1  mrg #ifndef GOMP_DOACROSS_H
     29      1.1  mrg #define GOMP_DOACROSS_H 1
     30      1.1  mrg 
     31      1.1  mrg #include "libgomp.h"
     32      1.1  mrg 
     33      1.1  mrg static int zero;
     34      1.1  mrg 
     35      1.1  mrg static inline int
     36      1.1  mrg cpu_relax (void)
     37      1.1  mrg {
     38      1.1  mrg   int r;
     39      1.1  mrg   /* Here we need a long-latency operation to make the current warp yield.
     40      1.1  mrg      We could use ld.cv, uncached load from system (host) memory, but that
     41      1.1  mrg      would require allocating locked memory in the plugin.  Alternatively,
     42      1.1  mrg      we can use ld.cg, which evicts from L1 and caches in L2.  */
     43      1.1  mrg   asm volatile ("ld.cg.s32 %0, [%1];" : "=r" (r) : "i" (&zero) : "memory");
     44      1.1  mrg   return r;
     45      1.1  mrg }
     46      1.1  mrg 
     47      1.1  mrg static inline void doacross_spin (unsigned long *addr, unsigned long expected,
     48      1.1  mrg 				  unsigned long cur)
     49      1.1  mrg {
     50      1.1  mrg   /* Prevent compiler from optimizing based on bounds of containing object.  */
     51      1.1  mrg   asm ("" : "+r" (addr));
     52      1.1  mrg   do
     53      1.1  mrg     {
     54      1.1  mrg       int i = cpu_relax ();
     55      1.1  mrg       cur = addr[i];
     56      1.1  mrg     }
     57      1.1  mrg   while (cur <= expected);
     58      1.1  mrg }
     59      1.1  mrg 
     60      1.1  mrg #endif /* GOMP_DOACROSS_H */
     61