1/**************************************************************************** 2 * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ****************************************************************************/ 23 24#ifndef __SWR_OS_H__ 25#define __SWR_OS_H__ 26 27#include <cstddef> 28#include "core/knobs.h" 29 30#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX) 31 32#define SWR_API __cdecl 33#define SWR_VISIBLE __declspec(dllexport) 34 35#ifndef NOMINMAX 36#undef UNICODE 37#define NOMINMAX 38#include <windows.h> 39#undef NOMINMAX 40#define UNICODE 41#else 42#undef UNICODE 43#include <windows.h> 44#define UNICODE 45#endif 46#include <intrin.h> 47#include <cstdint> 48 49#if defined(MemoryFence) 50// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence 51#undef MemoryFence 52#endif 53 54#if defined(_MSC_VER) 55#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD 56#elif defined(__GNUC__) 57#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH))) 58#endif 59 60#if defined(_DEBUG) 61// We compile Debug builds with inline function expansion enabled. This allows 62// functions compiled with __forceinline to be inlined even in Debug builds. 63// The inline_depth(0) pragma below will disable inline function expansion for 64// normal INLINE / inline functions, but not for __forceinline functions. 65// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in 66// Debug builds. 67#define INLINE inline 68#pragma inline_depth(0) 69#else 70// Use of __forceinline increases compile time dramatically in release builds 71// and provides almost 0 measurable benefit. Disable until we have a compelling 72// use-case 73// #define INLINE __forceinline 74#define INLINE inline 75#endif 76#ifndef FORCEINLINE 77#define FORCEINLINE __forceinline 78#endif 79 80#define DEBUGBREAK __debugbreak() 81 82#define PRAGMA_WARNING_PUSH_DISABLE(...) \ 83 __pragma(warning(push)); \ 84 __pragma(warning(disable : __VA_ARGS__)); 85 86#define PRAGMA_WARNING_POP() __pragma(warning(pop)) 87 88static inline void* AlignedMalloc(size_t _Size, size_t _Alignment) 89{ 90 return _aligned_malloc(_Size, _Alignment); 91} 92 93static inline void AlignedFree(void* p) 94{ 95 return _aligned_free(p); 96} 97 98#if defined(_WIN64) 99#define BitScanReverseSizeT BitScanReverse64 100#define BitScanForwardSizeT BitScanForward64 101#define _mm_popcount_sizeT _mm_popcnt_u64 102#else 103#define BitScanReverseSizeT BitScanReverse 104#define BitScanForwardSizeT BitScanForward 105#define _mm_popcount_sizeT _mm_popcnt_u32 106#endif 107 108#if !defined(_WIN64) 109extern "C" { 110inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask) 111{ 112 if (Mask == 0) 113 return 0; 114#ifdef __GNUC__ 115 *Index = __builtin_ctzll(Mask); 116#else 117 *Index = 0; 118 for (int i = 0; i < 64; ++ i) 119 if ((1ULL << i) & Mask) 120 *Index = i; 121#endif 122 return 1; 123} 124 125inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask) 126{ 127 if (Mask == 0) 128 return 0; 129#ifdef __GNUC__ 130 *Index = 63 - __builtin_clzll(Mask); 131#else 132 *Index = 0; 133 for (int i = 63; i >= 0; -- i) 134 if ((1ULL << i) & Mask) 135 *Index = i; 136#endif 137 return 1; 138} 139} 140#endif 141 142#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__) 143 144#define SWR_API 145#define SWR_VISIBLE __attribute__((visibility("default"))) 146 147#include <stdlib.h> 148#include <string.h> 149#include <x86intrin.h> 150#include <stdint.h> 151#include <sys/types.h> 152#include <unistd.h> 153#include <sys/stat.h> 154#include <stdio.h> 155#include <limits.h> 156 157typedef void VOID; 158typedef void* LPVOID; 159typedef int INT; 160typedef unsigned int UINT; 161typedef void* HANDLE; 162typedef int LONG; 163typedef unsigned int DWORD; 164 165#undef FALSE 166#define FALSE 0 167 168#undef TRUE 169#define TRUE 1 170 171#define MAX_PATH PATH_MAX 172 173#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH))) 174#ifndef INLINE 175#define INLINE __inline 176#endif 177#ifndef FORCEINLINE 178#define FORCEINLINE INLINE 179#endif 180#define DEBUGBREAK asm("int $3") 181 182#if !defined(__CYGWIN__) 183 184#ifndef __cdecl 185#define __cdecl 186#endif 187#ifndef __stdcall 188#define __stdcall 189#endif 190 191#if defined(__GNUC__) && !defined(__INTEL_COMPILER) 192#define __declspec(x) __declspec_##x 193#define __declspec_align(y) __attribute__((aligned(y))) 194#define __declspec_deprecated __attribute__((deprecated)) 195#define __declspec_dllexport 196#define __declspec_dllimport 197#define __declspec_noinline __attribute__((__noinline__)) 198#define __declspec_nothrow __attribute__((nothrow)) 199#define __declspec_novtable 200#define __declspec_thread __thread 201#else 202#define __declspec(X) 203#endif 204 205#endif 206 207#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 208 209#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500) 210inline uint64_t __rdtsc() 211{ 212 long low, high; 213 asm volatile("rdtsc" : "=a"(low), "=d"(high)); 214 return (low | ((uint64_t)high << 32)); 215} 216#endif 217 218#if !defined(__clang__) && !defined(__INTEL_COMPILER) 219// Intrinsic not defined in gcc < 10 220#if (__GNUC__) && (GCC_VERSION < 100000) 221static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a) 222{ 223 _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a)); 224 _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1)); 225} 226#endif 227 228// gcc prior to 4.9 doesn't have _mm*_undefined_* 229#if (__GNUC__) && (GCC_VERSION < 40900) 230#define _mm_undefined_si128 _mm_setzero_si128 231#define _mm256_undefined_ps _mm256_setzero_ps 232#endif 233#endif 234 235inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask) 236{ 237 if (Mask == 0) 238 return 0; 239 *Index = __builtin_ctzll(Mask); 240 return 1; 241} 242 243inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask) 244{ 245 if (Mask == 0) 246 return 0; 247 *Index = __builtin_ctz(Mask); 248 return 1; 249} 250 251inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask) 252{ 253 if (Mask == 0) 254 return 0; 255 *Index = 63 - __builtin_clzll(Mask); 256 return 1; 257} 258 259inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask) 260{ 261 if (Mask == 0) 262 return 0; 263 *Index = 31 - __builtin_clz(Mask); 264 return 1; 265} 266 267inline void* AlignedMalloc(size_t size, size_t alignment) 268{ 269 void* ret; 270 if (posix_memalign(&ret, alignment, size)) 271 { 272 return NULL; 273 } 274 return ret; 275} 276 277static inline void AlignedFree(void* p) 278{ 279 free(p); 280} 281 282#define _countof(a) (sizeof(a) / sizeof(*(a))) 283 284#define sprintf_s sprintf 285#define strcpy_s(dst, size, src) strncpy(dst, src, size) 286#define GetCurrentProcessId getpid 287 288#define InterlockedCompareExchange(Dest, Exchange, Comparand) \ 289 __sync_val_compare_and_swap(Dest, Comparand, Exchange) 290#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value) 291#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1) 292#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1) 293#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1) 294#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value) 295#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value) 296#define _ReadWriteBarrier() asm volatile("" ::: "memory") 297 298#define PRAGMA_WARNING_PUSH_DISABLE(...) 299#define PRAGMA_WARNING_POP() 300 301#define ZeroMemory(dst, size) memset(dst, 0, size) 302#else 303 304#error Unsupported OS/system. 305 306#endif 307 308#define THREAD thread_local 309 310// Universal types 311typedef uint8_t KILOBYTE[1024]; 312typedef KILOBYTE MEGABYTE[1024]; 313typedef MEGABYTE GIGABYTE[1024]; 314 315#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64) 316#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES) 317#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES) 318 319#include "common/swr_assert.h" 320 321#ifdef __GNUC__ 322#define ATTR_UNUSED __attribute__((unused)) 323#else 324#define ATTR_UNUSED 325#endif 326 327#define SWR_FUNC(_retType, _funcName, /* args */...) \ 328 typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \ 329 _retType SWR_API _funcName(__VA_ARGS__); 330 331// Defined in os.cpp 332void SWR_API SetCurrentThreadName(const char* pThreadName); 333void SWR_API CreateDirectoryPath(const std::string& path); 334 335/// Execute Command (block until finished) 336/// @returns process exit value 337int SWR_API 338 ExecCmd(const std::string& cmd, ///< (In) Command line string 339 const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process 340 std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text 341 std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text 342 const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text 343 344 345/// Helper for setting up FP state 346/// @returns old csr state 347static INLINE uint32_t SetOptimalVectorCSR() 348{ 349 uint32_t oldCSR = _mm_getcsr(); 350 351 uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK)); 352 newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); 353 _mm_setcsr(newCSR); 354 355 return oldCSR; 356} 357 358/// Set Vector CSR state. 359/// @param csrState - should be value returned from SetOptimalVectorCSR() 360static INLINE void RestoreVectorCSR(uint32_t csrState) 361{ 362 _mm_setcsr(csrState); 363} 364 365#endif //__SWR_OS_H__ 366