1/****************************************************************************
2 * Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23
24#ifndef __SWR_OS_H__
25#define __SWR_OS_H__
26
27#include <cstddef>
28#include "core/knobs.h"
29
30#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
31
32#define SWR_API __cdecl
33#define SWR_VISIBLE __declspec(dllexport)
34
35#ifndef NOMINMAX
36#define NOMINMAX
37#include <windows.h>
38#undef NOMINMAX
39#else
40#include <windows.h>
41#endif
42#include <intrin.h>
43#include <cstdint>
44
45#if defined(MemoryFence)
46// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
47#undef MemoryFence
48#endif
49
50#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
51
52#if defined(_DEBUG)
53// We compile Debug builds with inline function expansion enabled.  This allows
54// functions compiled with __forceinline to be inlined even in Debug builds.
55// The inline_depth(0) pragma below will disable inline function expansion for
56// normal INLINE / inline functions, but not for __forceinline functions.
57// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
58// Debug builds.
59#define INLINE inline
60#pragma inline_depth(0)
61#else
62// Use of __forceinline increases compile time dramatically in release builds
63// and provides almost 0 measurable benefit.  Disable until we have a compelling
64// use-case
65// #define INLINE __forceinline
66#define INLINE inline
67#endif
68#ifndef FORCEINLINE
69#define FORCEINLINE __forceinline
70#endif
71
72#define DEBUGBREAK __debugbreak()
73
74#define PRAGMA_WARNING_PUSH_DISABLE(...) \
75    __pragma(warning(push));             \
76    __pragma(warning(disable : __VA_ARGS__));
77
78#define PRAGMA_WARNING_POP() __pragma(warning(pop))
79
80static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
81{
82    return _aligned_malloc(_Size, _Alignment);
83}
84
85static inline void AlignedFree(void* p)
86{
87    return _aligned_free(p);
88}
89
90#if defined(_WIN64)
91#define BitScanReverseSizeT BitScanReverse64
92#define BitScanForwardSizeT BitScanForward64
93#define _mm_popcount_sizeT _mm_popcnt_u64
94#else
95#define BitScanReverseSizeT BitScanReverse
96#define BitScanForwardSizeT BitScanForward
97#define _mm_popcount_sizeT _mm_popcnt_u32
98#endif
99
100#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
101
102#define SWR_API
103#define SWR_VISIBLE __attribute__((visibility("default")))
104
105#include <stdlib.h>
106#include <string.h>
107#include <x86intrin.h>
108#include <stdint.h>
109#include <sys/types.h>
110#include <unistd.h>
111#include <sys/stat.h>
112#include <stdio.h>
113#include <limits.h>
114
115typedef void         VOID;
116typedef void*        LPVOID;
117typedef int          INT;
118typedef unsigned int UINT;
119typedef void*        HANDLE;
120typedef int          LONG;
121typedef unsigned int DWORD;
122
123#undef FALSE
124#define FALSE 0
125
126#undef TRUE
127#define TRUE 1
128
129#define MAX_PATH PATH_MAX
130
131#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
132#ifndef INLINE
133#define INLINE __inline
134#endif
135#ifndef FORCEINLINE
136#define FORCEINLINE INLINE
137#endif
138#define DEBUGBREAK asm("int $3")
139
140#if !defined(__CYGWIN__)
141
142#ifndef __cdecl
143#define __cdecl
144#endif
145#ifndef __stdcall
146#define __stdcall
147#endif
148
149#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
150#define __declspec(x) __declspec_##x
151#define __declspec_align(y) __attribute__((aligned(y)))
152#define __declspec_deprecated __attribute__((deprecated))
153#define __declspec_dllexport
154#define __declspec_dllimport
155#define __declspec_noinline __attribute__((__noinline__))
156#define __declspec_nothrow __attribute__((nothrow))
157#define __declspec_novtable
158#define __declspec_thread __thread
159#else
160#define __declspec(X)
161#endif
162
163#endif
164
165#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
166
167#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
168inline uint64_t      __rdtsc()
169{
170    long low, high;
171    asm volatile("rdtsc" : "=a"(low), "=d"(high));
172    return (low | ((uint64_t)high << 32));
173}
174#endif
175
176#if !defined(__clang__) && !defined(__INTEL_COMPILER)
177// Intrinsic not defined in gcc
178static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
179{
180    _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
181    _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
182}
183
184// gcc prior to 4.9 doesn't have _mm*_undefined_*
185#if (__GNUC__) && (GCC_VERSION < 409000)
186#define _mm_undefined_si128 _mm_setzero_si128
187#define _mm256_undefined_ps _mm256_setzero_ps
188#endif
189#endif
190
191inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask)
192{
193    *Index = __builtin_ctz(Mask);
194    return (Mask != 0);
195}
196
197inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask)
198{
199    *Index = __builtin_ctz(Mask);
200    return (Mask != 0);
201}
202
203inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask)
204{
205    *Index = 63 - __builtin_clz(Mask);
206    return (Mask != 0);
207}
208
209inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask)
210{
211    *Index = 31 - __builtin_clz(Mask);
212    return (Mask != 0);
213}
214
215#define _BitScanForward64 _BitScanForward
216#define _BitScanReverse64 _BitScanReverse
217
218inline void* AlignedMalloc(size_t size, size_t alignment)
219{
220    void* ret;
221    if (posix_memalign(&ret, alignment, size))
222    {
223        return NULL;
224    }
225    return ret;
226}
227
228static inline void AlignedFree(void* p)
229{
230    free(p);
231}
232
233#define _countof(a) (sizeof(a) / sizeof(*(a)))
234
235#define sprintf_s sprintf
236#define strcpy_s(dst, size, src) strncpy(dst, src, size)
237#define GetCurrentProcessId getpid
238
239#define InterlockedCompareExchange(Dest, Exchange, Comparand) \
240    __sync_val_compare_and_swap(Dest, Comparand, Exchange)
241#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
242#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
243#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
244#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
245#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
246#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
247#define _ReadWriteBarrier() asm volatile("" ::: "memory")
248
249#define PRAGMA_WARNING_PUSH_DISABLE(...)
250#define PRAGMA_WARNING_POP()
251
252#define ZeroMemory(dst, size) memset(dst, 0, size)
253#else
254
255#error Unsupported OS/system.
256
257#endif
258
259#define THREAD thread_local
260
261// Universal types
262typedef uint8_t  KILOBYTE[1024];
263typedef KILOBYTE MEGABYTE[1024];
264typedef MEGABYTE GIGABYTE[1024];
265
266#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
267#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
268#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
269
270#include "common/swr_assert.h"
271
272#ifdef __GNUC__
273#define ATTR_UNUSED __attribute__((unused))
274#else
275#define ATTR_UNUSED
276#endif
277
278#define SWR_FUNC(_retType, _funcName, /* args */...)        \
279    typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
280    _retType SWR_API _funcName(__VA_ARGS__);
281
282// Defined in os.cpp
283void SWR_API SetCurrentThreadName(const char* pThreadName);
284void SWR_API CreateDirectoryPath(const std::string& path);
285
286/// Execute Command (block until finished)
287/// @returns process exit value
288int SWR_API
289    ExecCmd(const std::string& cmd,                ///< (In) Command line string
290            const char*  pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
291            std::string* pOptStdOut     = nullptr,   ///< (Optional Out) Standard Output text
292            std::string* pOptStdErr     = nullptr,   ///< (Optional Out) Standard Error text
293            const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
294
295
296/// Helper for setting up FP state
297/// @returns old csr state
298static INLINE uint32_t SetOptimalVectorCSR()
299{
300    uint32_t oldCSR = _mm_getcsr();
301
302    uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
303    newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
304    _mm_setcsr(newCSR);
305
306    return oldCSR;
307}
308
309/// Set Vector CSR state.
310/// @param csrState - should be value returned from SetOptimalVectorCSR()
311static INLINE void RestoreVectorCSR(uint32_t csrState)
312{
313    _mm_setcsr(csrState);
314}
315
316#endif //__SWR_OS_H__
317