1/**************************************************************************** 2 * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ****************************************************************************/ 23 24#ifndef __SWR_OS_H__ 25#define __SWR_OS_H__ 26 27#include <cstddef> 28#include "core/knobs.h" 29 30#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX) 31 32#define SWR_API __cdecl 33#define SWR_VISIBLE __declspec(dllexport) 34 35#ifndef NOMINMAX 36#define NOMINMAX 37#include <windows.h> 38#undef NOMINMAX 39#else 40#include <windows.h> 41#endif 42#include <intrin.h> 43#include <cstdint> 44 45#if defined(MemoryFence) 46// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence 47#undef MemoryFence 48#endif 49 50#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD 51 52#if defined(_DEBUG) 53// We compile Debug builds with inline function expansion enabled. This allows 54// functions compiled with __forceinline to be inlined even in Debug builds. 55// The inline_depth(0) pragma below will disable inline function expansion for 56// normal INLINE / inline functions, but not for __forceinline functions. 57// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in 58// Debug builds. 59#define INLINE inline 60#pragma inline_depth(0) 61#else 62// Use of __forceinline increases compile time dramatically in release builds 63// and provides almost 0 measurable benefit. Disable until we have a compelling 64// use-case 65// #define INLINE __forceinline 66#define INLINE inline 67#endif 68#ifndef FORCEINLINE 69#define FORCEINLINE __forceinline 70#endif 71 72#define DEBUGBREAK __debugbreak() 73 74#define PRAGMA_WARNING_PUSH_DISABLE(...) \ 75 __pragma(warning(push)); \ 76 __pragma(warning(disable : __VA_ARGS__)); 77 78#define PRAGMA_WARNING_POP() __pragma(warning(pop)) 79 80static inline void* AlignedMalloc(size_t _Size, size_t _Alignment) 81{ 82 return _aligned_malloc(_Size, _Alignment); 83} 84 85static inline void AlignedFree(void* p) 86{ 87 return _aligned_free(p); 88} 89 90#if defined(_WIN64) 91#define BitScanReverseSizeT BitScanReverse64 92#define BitScanForwardSizeT BitScanForward64 93#define _mm_popcount_sizeT _mm_popcnt_u64 94#else 95#define BitScanReverseSizeT BitScanReverse 96#define BitScanForwardSizeT BitScanForward 97#define _mm_popcount_sizeT _mm_popcnt_u32 98#endif 99 100#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__) 101 102#define SWR_API 103#define SWR_VISIBLE __attribute__((visibility("default"))) 104 105#include <stdlib.h> 106#include <string.h> 107#include <x86intrin.h> 108#include <stdint.h> 109#include <sys/types.h> 110#include <unistd.h> 111#include <sys/stat.h> 112#include <stdio.h> 113#include <limits.h> 114 115typedef void VOID; 116typedef void* LPVOID; 117typedef int INT; 118typedef unsigned int UINT; 119typedef void* HANDLE; 120typedef int LONG; 121typedef unsigned int DWORD; 122 123#undef FALSE 124#define FALSE 0 125 126#undef TRUE 127#define TRUE 1 128 129#define MAX_PATH PATH_MAX 130 131#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH))) 132#ifndef INLINE 133#define INLINE __inline 134#endif 135#ifndef FORCEINLINE 136#define FORCEINLINE INLINE 137#endif 138#define DEBUGBREAK asm("int $3") 139 140#if !defined(__CYGWIN__) 141 142#ifndef __cdecl 143#define __cdecl 144#endif 145#ifndef __stdcall 146#define __stdcall 147#endif 148 149#if defined(__GNUC__) && !defined(__INTEL_COMPILER) 150#define __declspec(x) __declspec_##x 151#define __declspec_align(y) __attribute__((aligned(y))) 152#define __declspec_deprecated __attribute__((deprecated)) 153#define __declspec_dllexport 154#define __declspec_dllimport 155#define __declspec_noinline __attribute__((__noinline__)) 156#define __declspec_nothrow __attribute__((nothrow)) 157#define __declspec_novtable 158#define __declspec_thread __thread 159#else 160#define __declspec(X) 161#endif 162 163#endif 164 165#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 166 167#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500) 168inline uint64_t __rdtsc() 169{ 170 long low, high; 171 asm volatile("rdtsc" : "=a"(low), "=d"(high)); 172 return (low | ((uint64_t)high << 32)); 173} 174#endif 175 176#if !defined(__clang__) && !defined(__INTEL_COMPILER) 177// Intrinsic not defined in gcc 178static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a) 179{ 180 _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a)); 181 _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1)); 182} 183 184// gcc prior to 4.9 doesn't have _mm*_undefined_* 185#if (__GNUC__) && (GCC_VERSION < 409000) 186#define _mm_undefined_si128 _mm_setzero_si128 187#define _mm256_undefined_ps _mm256_setzero_ps 188#endif 189#endif 190 191inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask) 192{ 193 *Index = __builtin_ctz(Mask); 194 return (Mask != 0); 195} 196 197inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask) 198{ 199 *Index = __builtin_ctz(Mask); 200 return (Mask != 0); 201} 202 203inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask) 204{ 205 *Index = 63 - __builtin_clz(Mask); 206 return (Mask != 0); 207} 208 209inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask) 210{ 211 *Index = 31 - __builtin_clz(Mask); 212 return (Mask != 0); 213} 214 215#define _BitScanForward64 _BitScanForward 216#define _BitScanReverse64 _BitScanReverse 217 218inline void* AlignedMalloc(size_t size, size_t alignment) 219{ 220 void* ret; 221 if (posix_memalign(&ret, alignment, size)) 222 { 223 return NULL; 224 } 225 return ret; 226} 227 228static inline void AlignedFree(void* p) 229{ 230 free(p); 231} 232 233#define _countof(a) (sizeof(a) / sizeof(*(a))) 234 235#define sprintf_s sprintf 236#define strcpy_s(dst, size, src) strncpy(dst, src, size) 237#define GetCurrentProcessId getpid 238 239#define InterlockedCompareExchange(Dest, Exchange, Comparand) \ 240 __sync_val_compare_and_swap(Dest, Comparand, Exchange) 241#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value) 242#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1) 243#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1) 244#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1) 245#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value) 246#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value) 247#define _ReadWriteBarrier() asm volatile("" ::: "memory") 248 249#define PRAGMA_WARNING_PUSH_DISABLE(...) 250#define PRAGMA_WARNING_POP() 251 252#define ZeroMemory(dst, size) memset(dst, 0, size) 253#else 254 255#error Unsupported OS/system. 256 257#endif 258 259#define THREAD thread_local 260 261// Universal types 262typedef uint8_t KILOBYTE[1024]; 263typedef KILOBYTE MEGABYTE[1024]; 264typedef MEGABYTE GIGABYTE[1024]; 265 266#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64) 267#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES) 268#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES) 269 270#include "common/swr_assert.h" 271 272#ifdef __GNUC__ 273#define ATTR_UNUSED __attribute__((unused)) 274#else 275#define ATTR_UNUSED 276#endif 277 278#define SWR_FUNC(_retType, _funcName, /* args */...) \ 279 typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \ 280 _retType SWR_API _funcName(__VA_ARGS__); 281 282// Defined in os.cpp 283void SWR_API SetCurrentThreadName(const char* pThreadName); 284void SWR_API CreateDirectoryPath(const std::string& path); 285 286/// Execute Command (block until finished) 287/// @returns process exit value 288int SWR_API 289 ExecCmd(const std::string& cmd, ///< (In) Command line string 290 const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process 291 std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text 292 std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text 293 const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text 294 295 296/// Helper for setting up FP state 297/// @returns old csr state 298static INLINE uint32_t SetOptimalVectorCSR() 299{ 300 uint32_t oldCSR = _mm_getcsr(); 301 302 uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK)); 303 newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); 304 _mm_setcsr(newCSR); 305 306 return oldCSR; 307} 308 309/// Set Vector CSR state. 310/// @param csrState - should be value returned from SetOptimalVectorCSR() 311static INLINE void RestoreVectorCSR(uint32_t csrState) 312{ 313 _mm_setcsr(csrState); 314} 315 316#endif //__SWR_OS_H__ 317