1/**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ****************************************************************************/ 23 24#ifndef __SWR_INTRIN_H__ 25#define __SWR_INTRIN_H__ 26 27#include "os.h" 28 29#if !defined(SIMD_ARCH) 30#define SIMD_ARCH KNOB_ARCH 31#endif 32 33#include "simdlib_types.hpp" 34 35typedef SIMDImpl::SIMD128Impl::Float simd4scalar; 36typedef SIMDImpl::SIMD128Impl::Double simd4scalard; 37typedef SIMDImpl::SIMD128Impl::Integer simd4scalari; 38typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector; 39typedef SIMDImpl::SIMD128Impl::Mask simd4mask; 40 41typedef SIMDImpl::SIMD256Impl::Float simd8scalar; 42typedef SIMDImpl::SIMD256Impl::Double simd8scalard; 43typedef SIMDImpl::SIMD256Impl::Integer simd8scalari; 44typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector; 45typedef SIMDImpl::SIMD256Impl::Mask simd8mask; 46 47typedef SIMDImpl::SIMD512Impl::Float simd16scalar; 48typedef SIMDImpl::SIMD512Impl::Double simd16scalard; 49typedef SIMDImpl::SIMD512Impl::Integer simd16scalari; 50typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector; 51typedef SIMDImpl::SIMD512Impl::Mask simd16mask; 52 53#if KNOB_SIMD_WIDTH == 8 54typedef simd8scalar simdscalar; 55typedef simd8scalard simdscalard; 56typedef simd8scalari simdscalari; 57typedef simd8vector simdvector; 58typedef simd8mask simdmask; 59#else 60#error Unsupported vector width 61#endif 62 63INLINE 64UINT pdep_u32(UINT a, UINT mask) 65{ 66#if KNOB_ARCH >= KNOB_ARCH_AVX2 67 return _pdep_u32(a, mask); 68#else 69 UINT result = 0; 70 71 // copied from http://wm.ite.pl/articles/pdep-soft-emu.html 72 // using bsf instead of funky loop 73 unsigned long maskIndex = 0; 74 while (_BitScanForward(&maskIndex, mask)) 75 { 76 // 1. isolate lowest set bit of mask 77 const UINT lowest = 1 << maskIndex; 78 79 // 2. populate LSB from src 80 const UINT LSB = (UINT)((int)(a << 31) >> 31); 81 82 // 3. copy bit from mask 83 result |= LSB & lowest; 84 85 // 4. clear lowest bit 86 mask &= ~lowest; 87 88 // 5. prepare for next iteration 89 a >>= 1; 90 } 91 92 return result; 93#endif 94} 95 96INLINE 97UINT pext_u32(UINT a, UINT mask) 98{ 99#if KNOB_ARCH >= KNOB_ARCH_AVX2 100 return _pext_u32(a, mask); 101#else 102 UINT result = 0; 103 unsigned long maskIndex; 104 uint32_t currentBit = 0; 105 while (_BitScanForward(&maskIndex, mask)) 106 { 107 // 1. isolate lowest set bit of mask 108 const UINT lowest = 1 << maskIndex; 109 110 // 2. copy bit from mask 111 result |= ((a & lowest) > 0) << currentBit++; 112 113 // 3. clear lowest bit 114 mask &= ~lowest; 115 } 116 return result; 117#endif 118} 119 120#endif //__SWR_INTRIN_H__ 121