1/****************************************************************************
2 * Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23
24#ifndef __SWR_OS_H__
25#define __SWR_OS_H__
26
27#include <cstddef>
28#include "core/knobs.h"
29
30#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
31
32#define SWR_API __cdecl
33#define SWR_VISIBLE __declspec(dllexport)
34
35#ifndef NOMINMAX
36#undef UNICODE
37#define NOMINMAX
38#include <windows.h>
39#undef NOMINMAX
40#define UNICODE
41#else
42#undef UNICODE
43#include <windows.h>
44#define UNICODE
45#endif
46#include <intrin.h>
47#include <cstdint>
48
49#if defined(MemoryFence)
50// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
51#undef MemoryFence
52#endif
53
54#if defined(_MSC_VER)
55#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
56#elif defined(__GNUC__)
57#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
58#endif
59
60#if defined(_DEBUG)
61// We compile Debug builds with inline function expansion enabled.  This allows
62// functions compiled with __forceinline to be inlined even in Debug builds.
63// The inline_depth(0) pragma below will disable inline function expansion for
64// normal INLINE / inline functions, but not for __forceinline functions.
65// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
66// Debug builds.
67#define INLINE inline
68#pragma inline_depth(0)
69#else
70// Use of __forceinline increases compile time dramatically in release builds
71// and provides almost 0 measurable benefit.  Disable until we have a compelling
72// use-case
73// #define INLINE __forceinline
74#define INLINE inline
75#endif
76#ifndef FORCEINLINE
77#define FORCEINLINE __forceinline
78#endif
79
80#define DEBUGBREAK __debugbreak()
81
82#define PRAGMA_WARNING_PUSH_DISABLE(...) \
83    __pragma(warning(push));             \
84    __pragma(warning(disable : __VA_ARGS__));
85
86#define PRAGMA_WARNING_POP() __pragma(warning(pop))
87
88static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
89{
90    return _aligned_malloc(_Size, _Alignment);
91}
92
93static inline void AlignedFree(void* p)
94{
95    return _aligned_free(p);
96}
97
98#if defined(_WIN64)
99#define BitScanReverseSizeT BitScanReverse64
100#define BitScanForwardSizeT BitScanForward64
101#define _mm_popcount_sizeT _mm_popcnt_u64
102#else
103#define BitScanReverseSizeT BitScanReverse
104#define BitScanForwardSizeT BitScanForward
105#define _mm_popcount_sizeT _mm_popcnt_u32
106#endif
107
108#if !defined(_WIN64)
109extern "C" {
110inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
111{
112    if (Mask == 0)
113      return 0;
114#ifdef __GNUC__
115    *Index = __builtin_ctzll(Mask);
116#else
117    *Index = 0;
118    for (int i = 0; i < 64; ++ i)
119      if ((1ULL << i) & Mask)
120        *Index = i;
121#endif
122    return 1;
123}
124
125inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
126{
127    if (Mask == 0)
128      return 0;
129#ifdef __GNUC__
130    *Index = 63 - __builtin_clzll(Mask);
131#else
132    *Index = 0;
133    for (int i = 63; i >= 0; -- i)
134      if ((1ULL << i) & Mask)
135        *Index = i;
136#endif
137    return 1;
138}
139}
140#endif
141
142#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
143
144#define SWR_API
145#define SWR_VISIBLE __attribute__((visibility("default")))
146
147#include <stdlib.h>
148#include <string.h>
149#include <x86intrin.h>
150#include <stdint.h>
151#include <sys/types.h>
152#include <unistd.h>
153#include <sys/stat.h>
154#include <stdio.h>
155#include <limits.h>
156
157typedef void         VOID;
158typedef void*        LPVOID;
159typedef int          INT;
160typedef unsigned int UINT;
161typedef void*        HANDLE;
162typedef int          LONG;
163typedef unsigned int DWORD;
164
165#undef FALSE
166#define FALSE 0
167
168#undef TRUE
169#define TRUE 1
170
171#define MAX_PATH PATH_MAX
172
173#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
174#ifndef INLINE
175#define INLINE __inline
176#endif
177#ifndef FORCEINLINE
178#define FORCEINLINE INLINE
179#endif
180#define DEBUGBREAK asm("int $3")
181
182#if !defined(__CYGWIN__)
183
184#ifndef __cdecl
185#define __cdecl
186#endif
187#ifndef __stdcall
188#define __stdcall
189#endif
190
191#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
192#define __declspec(x) __declspec_##x
193#define __declspec_align(y) __attribute__((aligned(y)))
194#define __declspec_deprecated __attribute__((deprecated))
195#define __declspec_dllexport
196#define __declspec_dllimport
197#define __declspec_noinline __attribute__((__noinline__))
198#define __declspec_nothrow __attribute__((nothrow))
199#define __declspec_novtable
200#define __declspec_thread __thread
201#else
202#define __declspec(X)
203#endif
204
205#endif
206
207#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
208
209#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
210inline uint64_t      __rdtsc()
211{
212    long low, high;
213    asm volatile("rdtsc" : "=a"(low), "=d"(high));
214    return (low | ((uint64_t)high << 32));
215}
216#endif
217
218#if !defined(__clang__) && !defined(__INTEL_COMPILER)
219// Intrinsic not defined in gcc < 10
220#if (__GNUC__) && (GCC_VERSION < 100000)
221static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
222{
223    _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
224    _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
225}
226#endif
227
228// gcc prior to 4.9 doesn't have _mm*_undefined_*
229#if (__GNUC__) && (GCC_VERSION < 40900)
230#define _mm_undefined_si128 _mm_setzero_si128
231#define _mm256_undefined_ps _mm256_setzero_ps
232#endif
233#endif
234
235inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
236{
237    if (Mask == 0)
238      return 0;
239    *Index = __builtin_ctzll(Mask);
240    return 1;
241}
242
243inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask)
244{
245    if (Mask == 0)
246      return 0;
247    *Index = __builtin_ctz(Mask);
248    return 1;
249}
250
251inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
252{
253    if (Mask == 0)
254      return 0;
255    *Index = 63 - __builtin_clzll(Mask);
256    return 1;
257}
258
259inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask)
260{
261    if (Mask == 0)
262      return 0;
263    *Index = 31 - __builtin_clz(Mask);
264    return 1;
265}
266
267inline void* AlignedMalloc(size_t size, size_t alignment)
268{
269    void* ret;
270    if (posix_memalign(&ret, alignment, size))
271    {
272        return NULL;
273    }
274    return ret;
275}
276
277static inline void AlignedFree(void* p)
278{
279    free(p);
280}
281
282#define _countof(a) (sizeof(a) / sizeof(*(a)))
283
284#define sprintf_s sprintf
285#define strcpy_s(dst, size, src) strncpy(dst, src, size)
286#define GetCurrentProcessId getpid
287
288#define InterlockedCompareExchange(Dest, Exchange, Comparand) \
289    __sync_val_compare_and_swap(Dest, Comparand, Exchange)
290#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
291#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
292#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
293#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
294#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
295#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
296#define _ReadWriteBarrier() asm volatile("" ::: "memory")
297
298#define PRAGMA_WARNING_PUSH_DISABLE(...)
299#define PRAGMA_WARNING_POP()
300
301#define ZeroMemory(dst, size) memset(dst, 0, size)
302#else
303
304#error Unsupported OS/system.
305
306#endif
307
308#define THREAD thread_local
309
310// Universal types
311typedef uint8_t  KILOBYTE[1024];
312typedef KILOBYTE MEGABYTE[1024];
313typedef MEGABYTE GIGABYTE[1024];
314
315#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
316#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
317#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
318
319#include "common/swr_assert.h"
320
321#ifdef __GNUC__
322#define ATTR_UNUSED __attribute__((unused))
323#else
324#define ATTR_UNUSED
325#endif
326
327#define SWR_FUNC(_retType, _funcName, /* args */...)        \
328    typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
329    _retType SWR_API _funcName(__VA_ARGS__);
330
331// Defined in os.cpp
332void SWR_API SetCurrentThreadName(const char* pThreadName);
333void SWR_API CreateDirectoryPath(const std::string& path);
334
335/// Execute Command (block until finished)
336/// @returns process exit value
337int SWR_API
338    ExecCmd(const std::string& cmd,                ///< (In) Command line string
339            const char*  pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
340            std::string* pOptStdOut     = nullptr,   ///< (Optional Out) Standard Output text
341            std::string* pOptStdErr     = nullptr,   ///< (Optional Out) Standard Error text
342            const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
343
344
345/// Helper for setting up FP state
346/// @returns old csr state
347static INLINE uint32_t SetOptimalVectorCSR()
348{
349    uint32_t oldCSR = _mm_getcsr();
350
351    uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
352    newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
353    _mm_setcsr(newCSR);
354
355    return oldCSR;
356}
357
358/// Set Vector CSR state.
359/// @param csrState - should be value returned from SetOptimalVectorCSR()
360static INLINE void RestoreVectorCSR(uint32_t csrState)
361{
362    _mm_setcsr(csrState);
363}
364
365#endif //__SWR_OS_H__
366