1 1.1.1.8 mrg /* Copyright (C) 2015-2024 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Alexander Monakov <amonakov (at) ispras.ru> 3 1.1 mrg 4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg /* This is the NVPTX implementation of doacross spinning. */ 27 1.1 mrg 28 1.1 mrg #ifndef GOMP_DOACROSS_H 29 1.1 mrg #define GOMP_DOACROSS_H 1 30 1.1 mrg 31 1.1 mrg #include "libgomp.h" 32 1.1 mrg 33 1.1 mrg static int zero; 34 1.1 mrg 35 1.1 mrg static inline int 36 1.1 mrg cpu_relax (void) 37 1.1 mrg { 38 1.1 mrg int r; 39 1.1 mrg /* Here we need a long-latency operation to make the current warp yield. 40 1.1 mrg We could use ld.cv, uncached load from system (host) memory, but that 41 1.1 mrg would require allocating locked memory in the plugin. Alternatively, 42 1.1 mrg we can use ld.cg, which evicts from L1 and caches in L2. */ 43 1.1 mrg asm volatile ("ld.cg.s32 %0, [%1];" : "=r" (r) : "i" (&zero) : "memory"); 44 1.1 mrg return r; 45 1.1 mrg } 46 1.1 mrg 47 1.1 mrg static inline void doacross_spin (unsigned long *addr, unsigned long expected, 48 1.1 mrg unsigned long cur) 49 1.1 mrg { 50 1.1 mrg /* Prevent compiler from optimizing based on bounds of containing object. */ 51 1.1 mrg asm ("" : "+r" (addr)); 52 1.1 mrg do 53 1.1 mrg { 54 1.1 mrg int i = cpu_relax (); 55 1.1 mrg cur = addr[i]; 56 1.1 mrg } 57 1.1 mrg while (cur <= expected); 58 1.1 mrg } 59 1.1 mrg 60 1.1 mrg #endif /* GOMP_DOACROSS_H */ 61