1 1.1.1.3 mrg /* Copyright (C) 2018-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg /* Implemented from the specification included in the Intel C++ Compiler 25 1.1 mrg User Guide and Reference, version 9.0. 26 1.1 mrg 27 1.1 mrg NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */ 28 1.1 mrg 29 1.1 mrg #ifndef NO_WARN_X86_INTRINSICS 30 1.1 mrg /* This header is distributed to simplify porting x86_64 code that 31 1.1 mrg makes explicit use of Intel intrinsics to powerpc64le. 32 1.1 mrg It is the user's responsibility to determine if the results are 33 1.1 mrg acceptable and make additional changes as necessary. 34 1.1 mrg Note that much code that uses Intel intrinsics can be rewritten in 35 1.1 mrg standard C or GNU C extensions, which are more portable and better 36 1.1 mrg optimized across multiple targets. */ 37 1.1 mrg #endif 38 1.1 mrg 39 1.1 mrg #ifndef SMMINTRIN_H_ 40 1.1 mrg #define SMMINTRIN_H_ 41 1.1 mrg 42 1.1 mrg #include <altivec.h> 43 1.1 mrg #include <tmmintrin.h> 44 1.1 mrg 45 1.1.1.3 mrg /* Rounding mode macros. */ 46 1.1.1.3 mrg #define _MM_FROUND_TO_NEAREST_INT 0x00 47 1.1.1.3 mrg #define _MM_FROUND_TO_ZERO 0x01 48 1.1.1.3 mrg #define _MM_FROUND_TO_POS_INF 0x02 49 1.1.1.3 mrg #define _MM_FROUND_TO_NEG_INF 0x03 50 1.1.1.3 mrg #define _MM_FROUND_CUR_DIRECTION 0x04 51 1.1.1.3 mrg 52 1.1.1.3 mrg #define _MM_FROUND_NINT \ 53 1.1.1.3 mrg (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) 54 1.1.1.3 mrg #define _MM_FROUND_FLOOR \ 55 1.1.1.3 mrg (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) 56 1.1.1.3 mrg #define _MM_FROUND_CEIL \ 57 1.1.1.3 mrg (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) 58 1.1.1.3 mrg #define _MM_FROUND_TRUNC \ 59 1.1.1.3 mrg (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) 60 1.1.1.3 mrg #define _MM_FROUND_RINT \ 61 1.1.1.3 mrg (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) 62 1.1.1.3 mrg #define _MM_FROUND_NEARBYINT \ 63 1.1.1.3 mrg (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) 64 1.1.1.3 mrg 65 1.1.1.3 mrg #define _MM_FROUND_RAISE_EXC 0x00 66 1.1.1.3 mrg #define _MM_FROUND_NO_EXC 0x08 67 1.1.1.3 mrg 68 1.1.1.3 mrg extern __inline __m128d 69 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 70 1.1.1.3 mrg _mm_round_pd (__m128d __A, int __rounding) 71 1.1.1.3 mrg { 72 1.1.1.3 mrg __v2df __r; 73 1.1.1.3 mrg union { 74 1.1.1.3 mrg double __fr; 75 1.1.1.3 mrg long long __fpscr; 76 1.1.1.3 mrg } __enables_save, __fpscr_save; 77 1.1.1.3 mrg 78 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC) 79 1.1.1.3 mrg { 80 1.1.1.3 mrg /* Save enabled exceptions, disable all exceptions, 81 1.1.1.3 mrg and preserve the rounding mode. */ 82 1.1.1.3 mrg #ifdef _ARCH_PWR9 83 1.1.1.3 mrg __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); 84 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 85 1.1.1.3 mrg #else 86 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffs (); 87 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 88 1.1.1.3 mrg __fpscr_save.__fpscr &= ~0xf8; 89 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr); 90 1.1.1.3 mrg #endif 91 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable 92 1.1.1.3 mrg read below, to ensure the compiler does not schedule 93 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above. 94 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 95 1.1.1.3 mrg */ 96 1.1.1.3 mrg __asm__ ("" : "+wa" (__A)); 97 1.1.1.3 mrg } 98 1.1.1.3 mrg 99 1.1.1.3 mrg switch (__rounding) 100 1.1.1.3 mrg { 101 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT: 102 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl (); 103 1.1.1.3 mrg __attribute__ ((fallthrough)); 104 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: 105 1.1.1.3 mrg __builtin_set_fpscr_rn (0b00); 106 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable 107 1.1.1.3 mrg read below, to ensure the compiler does not schedule 108 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above. 109 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 110 1.1.1.3 mrg */ 111 1.1.1.3 mrg __asm__ ("" : "+wa" (__A)); 112 1.1.1.3 mrg 113 1.1.1.3 mrg __r = vec_rint ((__v2df) __A); 114 1.1.1.3 mrg 115 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written 116 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation 117 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below. 118 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 119 1.1.1.3 mrg */ 120 1.1.1.3 mrg __asm__ ("" : : "wa" (__r)); 121 1.1.1.3 mrg __builtin_set_fpscr_rn (__fpscr_save.__fpscr); 122 1.1.1.3 mrg break; 123 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF: 124 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: 125 1.1.1.3 mrg __r = vec_floor ((__v2df) __A); 126 1.1.1.3 mrg break; 127 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF: 128 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: 129 1.1.1.3 mrg __r = vec_ceil ((__v2df) __A); 130 1.1.1.3 mrg break; 131 1.1.1.3 mrg case _MM_FROUND_TO_ZERO: 132 1.1.1.3 mrg case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: 133 1.1.1.3 mrg __r = vec_trunc ((__v2df) __A); 134 1.1.1.3 mrg break; 135 1.1.1.3 mrg case _MM_FROUND_CUR_DIRECTION: 136 1.1.1.3 mrg __r = vec_rint ((__v2df) __A); 137 1.1.1.3 mrg break; 138 1.1.1.3 mrg } 139 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC) 140 1.1.1.3 mrg { 141 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written 142 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation 143 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below. 144 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 145 1.1.1.3 mrg */ 146 1.1.1.3 mrg __asm__ ("" : : "wa" (__r)); 147 1.1.1.3 mrg /* Restore enabled exceptions. */ 148 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl (); 149 1.1.1.3 mrg __fpscr_save.__fpscr |= __enables_save.__fpscr; 150 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr); 151 1.1.1.3 mrg } 152 1.1.1.3 mrg return (__m128d) __r; 153 1.1.1.3 mrg } 154 1.1.1.3 mrg 155 1.1.1.3 mrg extern __inline __m128d 156 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 157 1.1.1.3 mrg _mm_round_sd (__m128d __A, __m128d __B, int __rounding) 158 1.1.1.3 mrg { 159 1.1.1.3 mrg __B = _mm_round_pd (__B, __rounding); 160 1.1.1.3 mrg __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; 161 1.1.1.3 mrg return (__m128d) __r; 162 1.1.1.3 mrg } 163 1.1.1.3 mrg 164 1.1.1.3 mrg extern __inline __m128 165 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 166 1.1.1.3 mrg _mm_round_ps (__m128 __A, int __rounding) 167 1.1.1.3 mrg { 168 1.1.1.3 mrg __v4sf __r; 169 1.1.1.3 mrg union { 170 1.1.1.3 mrg double __fr; 171 1.1.1.3 mrg long long __fpscr; 172 1.1.1.3 mrg } __enables_save, __fpscr_save; 173 1.1.1.3 mrg 174 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC) 175 1.1.1.3 mrg { 176 1.1.1.3 mrg /* Save enabled exceptions, disable all exceptions, 177 1.1.1.3 mrg and preserve the rounding mode. */ 178 1.1.1.3 mrg #ifdef _ARCH_PWR9 179 1.1.1.3 mrg __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); 180 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 181 1.1.1.3 mrg #else 182 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffs (); 183 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 184 1.1.1.3 mrg __fpscr_save.__fpscr &= ~0xf8; 185 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr); 186 1.1.1.3 mrg #endif 187 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable 188 1.1.1.3 mrg read below, to ensure the compiler does not schedule 189 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above. 190 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 191 1.1.1.3 mrg */ 192 1.1.1.3 mrg __asm__ ("" : "+wa" (__A)); 193 1.1.1.3 mrg } 194 1.1.1.3 mrg 195 1.1.1.3 mrg switch (__rounding) 196 1.1.1.3 mrg { 197 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT: 198 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl (); 199 1.1.1.3 mrg __attribute__ ((fallthrough)); 200 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: 201 1.1.1.3 mrg __builtin_set_fpscr_rn (0b00); 202 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable 203 1.1.1.3 mrg read below, to ensure the compiler does not schedule 204 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above. 205 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 206 1.1.1.3 mrg */ 207 1.1.1.3 mrg __asm__ ("" : "+wa" (__A)); 208 1.1.1.3 mrg 209 1.1.1.3 mrg __r = vec_rint ((__v4sf) __A); 210 1.1.1.3 mrg 211 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written 212 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation 213 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below. 214 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 215 1.1.1.3 mrg */ 216 1.1.1.3 mrg __asm__ ("" : : "wa" (__r)); 217 1.1.1.3 mrg __builtin_set_fpscr_rn (__fpscr_save.__fpscr); 218 1.1.1.3 mrg break; 219 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF: 220 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: 221 1.1.1.3 mrg __r = vec_floor ((__v4sf) __A); 222 1.1.1.3 mrg break; 223 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF: 224 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: 225 1.1.1.3 mrg __r = vec_ceil ((__v4sf) __A); 226 1.1.1.3 mrg break; 227 1.1.1.3 mrg case _MM_FROUND_TO_ZERO: 228 1.1.1.3 mrg case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: 229 1.1.1.3 mrg __r = vec_trunc ((__v4sf) __A); 230 1.1.1.3 mrg break; 231 1.1.1.3 mrg case _MM_FROUND_CUR_DIRECTION: 232 1.1.1.3 mrg __r = vec_rint ((__v4sf) __A); 233 1.1.1.3 mrg break; 234 1.1.1.3 mrg } 235 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC) 236 1.1.1.3 mrg { 237 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written 238 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation 239 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below. 240 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed. 241 1.1.1.3 mrg */ 242 1.1.1.3 mrg __asm__ ("" : : "wa" (__r)); 243 1.1.1.3 mrg /* Restore enabled exceptions. */ 244 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl (); 245 1.1.1.3 mrg __fpscr_save.__fpscr |= __enables_save.__fpscr; 246 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr); 247 1.1.1.3 mrg } 248 1.1.1.3 mrg return (__m128) __r; 249 1.1.1.3 mrg } 250 1.1.1.3 mrg 251 1.1.1.3 mrg extern __inline __m128 252 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 253 1.1.1.3 mrg _mm_round_ss (__m128 __A, __m128 __B, int __rounding) 254 1.1.1.3 mrg { 255 1.1.1.3 mrg __B = _mm_round_ps (__B, __rounding); 256 1.1.1.3 mrg __v4sf __r = (__v4sf) __A; 257 1.1.1.3 mrg __r[0] = ((__v4sf) __B)[0]; 258 1.1.1.3 mrg return (__m128) __r; 259 1.1.1.3 mrg } 260 1.1.1.3 mrg 261 1.1.1.3 mrg #define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) 262 1.1.1.3 mrg #define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) 263 1.1.1.3 mrg 264 1.1.1.3 mrg #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) 265 1.1.1.3 mrg #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) 266 1.1.1.3 mrg 267 1.1.1.3 mrg #define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) 268 1.1.1.3 mrg #define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) 269 1.1.1.3 mrg 270 1.1.1.3 mrg #define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) 271 1.1.1.3 mrg #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) 272 1.1.1.3 mrg 273 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 274 1.1.1.3 mrg _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) 275 1.1.1.3 mrg { 276 1.1.1.3 mrg __v16qi __result = (__v16qi)__A; 277 1.1.1.3 mrg 278 1.1.1.3 mrg __result [__N & 0xf] = __D; 279 1.1.1.3 mrg 280 1.1.1.3 mrg return (__m128i) __result; 281 1.1.1.3 mrg } 282 1.1.1.3 mrg 283 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 284 1.1.1.3 mrg _mm_insert_epi32 (__m128i const __A, int const __D, int const __N) 285 1.1.1.3 mrg { 286 1.1.1.3 mrg __v4si __result = (__v4si)__A; 287 1.1.1.3 mrg 288 1.1.1.3 mrg __result [__N & 3] = __D; 289 1.1.1.3 mrg 290 1.1.1.3 mrg return (__m128i) __result; 291 1.1.1.3 mrg } 292 1.1.1.3 mrg 293 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 294 1.1.1.3 mrg _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N) 295 1.1.1.3 mrg { 296 1.1.1.3 mrg __v2di __result = (__v2di)__A; 297 1.1.1.3 mrg 298 1.1.1.3 mrg __result [__N & 1] = __D; 299 1.1.1.3 mrg 300 1.1.1.3 mrg return (__m128i) __result; 301 1.1.1.3 mrg } 302 1.1.1.3 mrg 303 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 304 1.1 mrg _mm_extract_epi8 (__m128i __X, const int __N) 305 1.1 mrg { 306 1.1 mrg return (unsigned char) ((__v16qi)__X)[__N & 15]; 307 1.1 mrg } 308 1.1 mrg 309 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 310 1.1 mrg _mm_extract_epi32 (__m128i __X, const int __N) 311 1.1 mrg { 312 1.1 mrg return ((__v4si)__X)[__N & 3]; 313 1.1 mrg } 314 1.1 mrg 315 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 316 1.1 mrg _mm_extract_epi64 (__m128i __X, const int __N) 317 1.1 mrg { 318 1.1 mrg return ((__v2di)__X)[__N & 1]; 319 1.1 mrg } 320 1.1 mrg 321 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 322 1.1 mrg _mm_extract_ps (__m128 __X, const int __N) 323 1.1 mrg { 324 1.1 mrg return ((__v4si)__X)[__N & 3]; 325 1.1 mrg } 326 1.1 mrg 327 1.1.1.3 mrg #ifdef _ARCH_PWR8 328 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 329 1.1 mrg _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8) 330 1.1 mrg { 331 1.1 mrg __v16qi __charmask = vec_splats ((signed char) __imm8); 332 1.1 mrg __charmask = vec_gb (__charmask); 333 1.1 mrg __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask); 334 1.1 mrg #ifdef __BIG_ENDIAN__ 335 1.1 mrg __shortmask = vec_reve (__shortmask); 336 1.1 mrg #endif 337 1.1 mrg return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask); 338 1.1 mrg } 339 1.1.1.3 mrg #endif 340 1.1 mrg 341 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 342 1.1 mrg _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) 343 1.1 mrg { 344 1.1.1.3 mrg #ifdef _ARCH_PWR10 345 1.1.1.3 mrg return (__m128i) vec_blendv ((__v16qi) __A, (__v16qi) __B, (__v16qu) __mask); 346 1.1.1.3 mrg #else 347 1.1 mrg const __v16qu __seven = vec_splats ((unsigned char) 0x07); 348 1.1 mrg __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven); 349 1.1.1.3 mrg return (__m128i) vec_sel ((__v16qi) __A, (__v16qi) __B, __lmask); 350 1.1.1.3 mrg #endif 351 1.1.1.3 mrg } 352 1.1.1.3 mrg 353 1.1.1.3 mrg extern __inline __m128 354 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 355 1.1.1.3 mrg _mm_blend_ps (__m128 __A, __m128 __B, const int __imm8) 356 1.1.1.3 mrg { 357 1.1.1.3 mrg __v16qu __pcv[] = 358 1.1.1.3 mrg { 359 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 360 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 361 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 }, 362 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 }, 363 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15 }, 364 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15 }, 365 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 }, 366 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 }, 367 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31 }, 368 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31 }, 369 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 }, 370 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 }, 371 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 }, 372 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 }, 373 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }, 374 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }, 375 1.1.1.3 mrg }; 376 1.1.1.3 mrg __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]); 377 1.1.1.3 mrg return (__m128) __r; 378 1.1.1.3 mrg } 379 1.1.1.3 mrg 380 1.1.1.3 mrg extern __inline __m128 381 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 382 1.1.1.3 mrg _mm_blendv_ps (__m128 __A, __m128 __B, __m128 __mask) 383 1.1.1.3 mrg { 384 1.1.1.3 mrg #ifdef _ARCH_PWR10 385 1.1.1.3 mrg return (__m128) vec_blendv ((__v4sf) __A, (__v4sf) __B, (__v4su) __mask); 386 1.1.1.3 mrg #else 387 1.1.1.3 mrg const __v4si __zero = {0}; 388 1.1.1.3 mrg const __vector __bool int __boolmask = vec_cmplt ((__v4si) __mask, __zero); 389 1.1.1.3 mrg return (__m128) vec_sel ((__v4su) __A, (__v4su) __B, (__v4su) __boolmask); 390 1.1.1.3 mrg #endif 391 1.1.1.3 mrg } 392 1.1.1.3 mrg 393 1.1.1.3 mrg extern __inline __m128d 394 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 395 1.1.1.3 mrg _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8) 396 1.1.1.3 mrg { 397 1.1.1.3 mrg __v16qu __pcv[] = 398 1.1.1.3 mrg { 399 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 400 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 }, 401 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 }, 402 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 } 403 1.1.1.3 mrg }; 404 1.1.1.3 mrg __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]); 405 1.1.1.3 mrg return (__m128d) __r; 406 1.1.1.3 mrg } 407 1.1.1.3 mrg 408 1.1.1.3 mrg #ifdef _ARCH_PWR8 409 1.1.1.3 mrg extern __inline __m128d 410 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 411 1.1.1.3 mrg _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask) 412 1.1.1.3 mrg { 413 1.1.1.3 mrg #ifdef _ARCH_PWR10 414 1.1.1.3 mrg return (__m128d) vec_blendv ((__v2df) __A, (__v2df) __B, (__v2du) __mask); 415 1.1.1.3 mrg #else 416 1.1.1.3 mrg const __v2di __zero = {0}; 417 1.1.1.3 mrg const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero); 418 1.1.1.3 mrg return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask); 419 1.1.1.3 mrg #endif 420 1.1.1.3 mrg } 421 1.1.1.3 mrg #endif 422 1.1.1.3 mrg 423 1.1.1.3 mrg extern __inline int 424 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 425 1.1.1.3 mrg _mm_testz_si128 (__m128i __A, __m128i __B) 426 1.1.1.3 mrg { 427 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */ 428 1.1.1.3 mrg const __v16qu __zero = {0}; 429 1.1.1.3 mrg return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero); 430 1.1.1.3 mrg } 431 1.1.1.3 mrg 432 1.1.1.3 mrg extern __inline int 433 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 434 1.1.1.3 mrg _mm_testc_si128 (__m128i __A, __m128i __B) 435 1.1.1.3 mrg { 436 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */ 437 1.1.1.3 mrg const __v16qu __zero = {0}; 438 1.1.1.3 mrg const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); 439 1.1.1.3 mrg return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero); 440 1.1.1.3 mrg } 441 1.1.1.3 mrg 442 1.1.1.3 mrg extern __inline int 443 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 444 1.1.1.3 mrg _mm_testnzc_si128 (__m128i __A, __m128i __B) 445 1.1.1.3 mrg { 446 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */ 447 1.1.1.3 mrg return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0; 448 1.1.1.3 mrg } 449 1.1.1.3 mrg 450 1.1.1.3 mrg #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) 451 1.1.1.3 mrg 452 1.1.1.3 mrg #define _mm_test_all_ones(V) \ 453 1.1.1.3 mrg _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V))) 454 1.1.1.3 mrg 455 1.1.1.3 mrg #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) 456 1.1.1.3 mrg 457 1.1.1.3 mrg #ifdef _ARCH_PWR8 458 1.1.1.3 mrg extern __inline __m128i 459 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 460 1.1.1.3 mrg _mm_cmpeq_epi64 (__m128i __X, __m128i __Y) 461 1.1.1.3 mrg { 462 1.1.1.3 mrg return (__m128i) vec_cmpeq ((__v2di) __X, (__v2di) __Y); 463 1.1.1.3 mrg } 464 1.1.1.3 mrg #endif 465 1.1.1.3 mrg 466 1.1.1.3 mrg extern __inline __m128i 467 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 468 1.1.1.3 mrg _mm_min_epi8 (__m128i __X, __m128i __Y) 469 1.1.1.3 mrg { 470 1.1.1.3 mrg return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y); 471 1.1.1.3 mrg } 472 1.1.1.3 mrg 473 1.1.1.3 mrg extern __inline __m128i 474 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 475 1.1.1.3 mrg _mm_min_epu16 (__m128i __X, __m128i __Y) 476 1.1.1.3 mrg { 477 1.1.1.3 mrg return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y); 478 1.1 mrg } 479 1.1 mrg 480 1.1.1.3 mrg extern __inline __m128i 481 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 482 1.1.1.3 mrg _mm_min_epi32 (__m128i __X, __m128i __Y) 483 1.1.1.3 mrg { 484 1.1.1.3 mrg return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y); 485 1.1.1.3 mrg } 486 1.1.1.3 mrg 487 1.1.1.3 mrg extern __inline __m128i 488 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 489 1.1.1.3 mrg _mm_min_epu32 (__m128i __X, __m128i __Y) 490 1.1.1.3 mrg { 491 1.1.1.3 mrg return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y); 492 1.1.1.3 mrg } 493 1.1.1.3 mrg 494 1.1.1.3 mrg extern __inline __m128i 495 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 496 1.1.1.3 mrg _mm_max_epi8 (__m128i __X, __m128i __Y) 497 1.1.1.3 mrg { 498 1.1.1.3 mrg return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y); 499 1.1.1.3 mrg } 500 1.1.1.3 mrg 501 1.1.1.3 mrg extern __inline __m128i 502 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 503 1.1.1.3 mrg _mm_max_epu16 (__m128i __X, __m128i __Y) 504 1.1.1.3 mrg { 505 1.1.1.3 mrg return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y); 506 1.1.1.3 mrg } 507 1.1.1.3 mrg 508 1.1.1.3 mrg extern __inline __m128i 509 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 510 1.1.1.3 mrg _mm_max_epi32 (__m128i __X, __m128i __Y) 511 1.1.1.3 mrg { 512 1.1.1.3 mrg return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y); 513 1.1.1.3 mrg } 514 1.1.1.3 mrg 515 1.1.1.3 mrg extern __inline __m128i 516 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 517 1.1.1.3 mrg _mm_max_epu32 (__m128i __X, __m128i __Y) 518 1.1.1.3 mrg { 519 1.1.1.3 mrg return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); 520 1.1.1.3 mrg } 521 1.1.1.3 mrg 522 1.1.1.3 mrg extern __inline __m128i 523 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 524 1.1.1.3 mrg _mm_mullo_epi32 (__m128i __X, __m128i __Y) 525 1.1.1.3 mrg { 526 1.1.1.3 mrg return (__m128i) vec_mul ((__v4su) __X, (__v4su) __Y); 527 1.1.1.3 mrg } 528 1.1.1.3 mrg 529 1.1.1.3 mrg #ifdef _ARCH_PWR8 530 1.1.1.3 mrg extern __inline __m128i 531 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 532 1.1.1.3 mrg _mm_mul_epi32 (__m128i __X, __m128i __Y) 533 1.1.1.3 mrg { 534 1.1.1.3 mrg return (__m128i) vec_mule ((__v4si) __X, (__v4si) __Y); 535 1.1.1.3 mrg } 536 1.1.1.3 mrg #endif 537 1.1.1.3 mrg 538 1.1.1.3 mrg extern __inline __m128i 539 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 540 1.1.1.3 mrg _mm_cvtepi8_epi16 (__m128i __A) 541 1.1.1.3 mrg { 542 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v16qi) __A); 543 1.1.1.3 mrg } 544 1.1.1.3 mrg 545 1.1.1.3 mrg extern __inline __m128i 546 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 547 1.1.1.3 mrg _mm_cvtepi8_epi32 (__m128i __A) 548 1.1.1.3 mrg { 549 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v16qi) __A); 550 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v8hi) __A); 551 1.1.1.3 mrg } 552 1.1.1.3 mrg 553 1.1.1.3 mrg #ifdef _ARCH_PWR8 554 1.1.1.3 mrg extern __inline __m128i 555 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 556 1.1.1.3 mrg _mm_cvtepi8_epi64 (__m128i __A) 557 1.1.1.3 mrg { 558 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v16qi) __A); 559 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v8hi) __A); 560 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A); 561 1.1.1.3 mrg } 562 1.1.1.3 mrg #endif 563 1.1.1.3 mrg 564 1.1.1.3 mrg extern __inline __m128i 565 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 566 1.1.1.3 mrg _mm_cvtepi16_epi32 (__m128i __A) 567 1.1.1.3 mrg { 568 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v8hi) __A); 569 1.1.1.3 mrg } 570 1.1.1.3 mrg 571 1.1.1.3 mrg #ifdef _ARCH_PWR8 572 1.1.1.3 mrg extern __inline __m128i 573 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 574 1.1.1.3 mrg _mm_cvtepi16_epi64 (__m128i __A) 575 1.1.1.3 mrg { 576 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v8hi) __A); 577 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A); 578 1.1.1.3 mrg } 579 1.1.1.3 mrg #endif 580 1.1.1.3 mrg 581 1.1.1.3 mrg #ifdef _ARCH_PWR8 582 1.1.1.3 mrg extern __inline __m128i 583 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 584 1.1.1.3 mrg _mm_cvtepi32_epi64 (__m128i __A) 585 1.1.1.3 mrg { 586 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A); 587 1.1.1.3 mrg } 588 1.1.1.3 mrg #endif 589 1.1.1.3 mrg 590 1.1.1.3 mrg extern __inline __m128i 591 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 592 1.1.1.3 mrg _mm_cvtepu8_epi16 (__m128i __A) 593 1.1.1.3 mrg { 594 1.1.1.3 mrg const __v16qu __zero = {0}; 595 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 596 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); 597 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 598 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); 599 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 600 1.1.1.3 mrg return __A; 601 1.1.1.3 mrg } 602 1.1.1.3 mrg 603 1.1.1.3 mrg extern __inline __m128i 604 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 605 1.1.1.3 mrg _mm_cvtepu8_epi32 (__m128i __A) 606 1.1.1.3 mrg { 607 1.1.1.3 mrg const __v16qu __zero = {0}; 608 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 609 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); 610 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero); 611 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 612 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); 613 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A); 614 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 615 1.1.1.3 mrg return __A; 616 1.1.1.3 mrg } 617 1.1.1.3 mrg 618 1.1.1.3 mrg extern __inline __m128i 619 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 620 1.1.1.3 mrg _mm_cvtepu8_epi64 (__m128i __A) 621 1.1.1.3 mrg { 622 1.1.1.3 mrg const __v16qu __zero = {0}; 623 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 624 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); 625 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero); 626 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero); 627 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 628 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); 629 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A); 630 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A); 631 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 632 1.1.1.3 mrg return __A; 633 1.1.1.3 mrg } 634 1.1.1.3 mrg 635 1.1.1.3 mrg extern __inline __m128i 636 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 637 1.1.1.3 mrg _mm_cvtepu16_epi32 (__m128i __A) 638 1.1.1.3 mrg { 639 1.1.1.3 mrg const __v8hu __zero = {0}; 640 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 641 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero); 642 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 643 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A); 644 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 645 1.1.1.3 mrg return __A; 646 1.1.1.3 mrg } 647 1.1.1.3 mrg 648 1.1.1.3 mrg extern __inline __m128i 649 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 650 1.1.1.3 mrg _mm_cvtepu16_epi64 (__m128i __A) 651 1.1.1.3 mrg { 652 1.1.1.3 mrg const __v8hu __zero = {0}; 653 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 654 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero); 655 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero); 656 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 657 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A); 658 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A); 659 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 660 1.1.1.3 mrg return __A; 661 1.1.1.3 mrg } 662 1.1.1.3 mrg 663 1.1.1.3 mrg extern __inline __m128i 664 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 665 1.1.1.3 mrg _mm_cvtepu32_epi64 (__m128i __A) 666 1.1.1.3 mrg { 667 1.1.1.3 mrg const __v4su __zero = {0}; 668 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__ 669 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, __zero); 670 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */ 671 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v4su) __A); 672 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */ 673 1.1.1.3 mrg return __A; 674 1.1.1.3 mrg } 675 1.1.1.3 mrg 676 1.1.1.3 mrg /* Return horizontal packed word minimum and its index in bits [15:0] 677 1.1.1.3 mrg and bits [18:16] respectively. */ 678 1.1.1.3 mrg extern __inline __m128i 679 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 680 1.1.1.3 mrg _mm_minpos_epu16 (__m128i __A) 681 1.1.1.3 mrg { 682 1.1.1.3 mrg union __u 683 1.1.1.3 mrg { 684 1.1.1.3 mrg __m128i __m; 685 1.1.1.3 mrg __v8hu __uh; 686 1.1.1.3 mrg }; 687 1.1.1.3 mrg union __u __u = { .__m = __A }, __r = { .__m = {0} }; 688 1.1.1.3 mrg unsigned short __ridx = 0; 689 1.1.1.3 mrg unsigned short __rmin = __u.__uh[__ridx]; 690 1.1.1.3 mrg unsigned long __i; 691 1.1.1.3 mrg for (__i = 1; __i < 8; __i++) 692 1.1.1.3 mrg { 693 1.1.1.3 mrg if (__u.__uh[__i] < __rmin) 694 1.1.1.3 mrg { 695 1.1.1.3 mrg __rmin = __u.__uh[__i]; 696 1.1.1.3 mrg __ridx = __i; 697 1.1.1.3 mrg } 698 1.1.1.3 mrg } 699 1.1.1.3 mrg __r.__uh[0] = __rmin; 700 1.1.1.3 mrg __r.__uh[1] = __ridx; 701 1.1.1.3 mrg return __r.__m; 702 1.1.1.3 mrg } 703 1.1.1.3 mrg 704 1.1.1.3 mrg extern __inline __m128i 705 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 706 1.1.1.3 mrg _mm_packus_epi32 (__m128i __X, __m128i __Y) 707 1.1.1.3 mrg { 708 1.1.1.3 mrg return (__m128i) vec_packsu ((__v4si) __X, (__v4si) __Y); 709 1.1.1.3 mrg } 710 1.1.1.3 mrg 711 1.1.1.3 mrg #ifdef _ARCH_PWR8 712 1.1.1.3 mrg extern __inline __m128i 713 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 714 1.1.1.3 mrg _mm_cmpgt_epi64 (__m128i __X, __m128i __Y) 715 1.1.1.3 mrg { 716 1.1.1.3 mrg return (__m128i) vec_cmpgt ((__v2di) __X, (__v2di) __Y); 717 1.1.1.3 mrg } 718 1.1.1.3 mrg #endif 719 1.1.1.3 mrg 720 1.1 mrg #endif 721