smmintrin.h revision 1.1.1.3 1 1.1.1.3 mrg /* Copyright (C) 2018-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg /* Implemented from the specification included in the Intel C++ Compiler
25 1.1 mrg User Guide and Reference, version 9.0.
26 1.1 mrg
27 1.1 mrg NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
28 1.1 mrg
29 1.1 mrg #ifndef NO_WARN_X86_INTRINSICS
30 1.1 mrg /* This header is distributed to simplify porting x86_64 code that
31 1.1 mrg makes explicit use of Intel intrinsics to powerpc64le.
32 1.1 mrg It is the user's responsibility to determine if the results are
33 1.1 mrg acceptable and make additional changes as necessary.
34 1.1 mrg Note that much code that uses Intel intrinsics can be rewritten in
35 1.1 mrg standard C or GNU C extensions, which are more portable and better
36 1.1 mrg optimized across multiple targets. */
37 1.1 mrg #endif
38 1.1 mrg
39 1.1 mrg #ifndef SMMINTRIN_H_
40 1.1 mrg #define SMMINTRIN_H_
41 1.1 mrg
42 1.1 mrg #include <altivec.h>
43 1.1 mrg #include <tmmintrin.h>
44 1.1 mrg
45 1.1.1.3 mrg /* Rounding mode macros. */
46 1.1.1.3 mrg #define _MM_FROUND_TO_NEAREST_INT 0x00
47 1.1.1.3 mrg #define _MM_FROUND_TO_ZERO 0x01
48 1.1.1.3 mrg #define _MM_FROUND_TO_POS_INF 0x02
49 1.1.1.3 mrg #define _MM_FROUND_TO_NEG_INF 0x03
50 1.1.1.3 mrg #define _MM_FROUND_CUR_DIRECTION 0x04
51 1.1.1.3 mrg
52 1.1.1.3 mrg #define _MM_FROUND_NINT \
53 1.1.1.3 mrg (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
54 1.1.1.3 mrg #define _MM_FROUND_FLOOR \
55 1.1.1.3 mrg (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
56 1.1.1.3 mrg #define _MM_FROUND_CEIL \
57 1.1.1.3 mrg (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
58 1.1.1.3 mrg #define _MM_FROUND_TRUNC \
59 1.1.1.3 mrg (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
60 1.1.1.3 mrg #define _MM_FROUND_RINT \
61 1.1.1.3 mrg (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
62 1.1.1.3 mrg #define _MM_FROUND_NEARBYINT \
63 1.1.1.3 mrg (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
64 1.1.1.3 mrg
65 1.1.1.3 mrg #define _MM_FROUND_RAISE_EXC 0x00
66 1.1.1.3 mrg #define _MM_FROUND_NO_EXC 0x08
67 1.1.1.3 mrg
68 1.1.1.3 mrg extern __inline __m128d
69 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70 1.1.1.3 mrg _mm_round_pd (__m128d __A, int __rounding)
71 1.1.1.3 mrg {
72 1.1.1.3 mrg __v2df __r;
73 1.1.1.3 mrg union {
74 1.1.1.3 mrg double __fr;
75 1.1.1.3 mrg long long __fpscr;
76 1.1.1.3 mrg } __enables_save, __fpscr_save;
77 1.1.1.3 mrg
78 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC)
79 1.1.1.3 mrg {
80 1.1.1.3 mrg /* Save enabled exceptions, disable all exceptions,
81 1.1.1.3 mrg and preserve the rounding mode. */
82 1.1.1.3 mrg #ifdef _ARCH_PWR9
83 1.1.1.3 mrg __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr));
84 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
85 1.1.1.3 mrg #else
86 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffs ();
87 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
88 1.1.1.3 mrg __fpscr_save.__fpscr &= ~0xf8;
89 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
90 1.1.1.3 mrg #endif
91 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable
92 1.1.1.3 mrg read below, to ensure the compiler does not schedule
93 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above.
94 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
95 1.1.1.3 mrg */
96 1.1.1.3 mrg __asm__ ("" : "+wa" (__A));
97 1.1.1.3 mrg }
98 1.1.1.3 mrg
99 1.1.1.3 mrg switch (__rounding)
100 1.1.1.3 mrg {
101 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT:
102 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl ();
103 1.1.1.3 mrg __attribute__ ((fallthrough));
104 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
105 1.1.1.3 mrg __builtin_set_fpscr_rn (0b00);
106 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable
107 1.1.1.3 mrg read below, to ensure the compiler does not schedule
108 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above.
109 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
110 1.1.1.3 mrg */
111 1.1.1.3 mrg __asm__ ("" : "+wa" (__A));
112 1.1.1.3 mrg
113 1.1.1.3 mrg __r = vec_rint ((__v2df) __A);
114 1.1.1.3 mrg
115 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written
116 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation
117 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below.
118 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
119 1.1.1.3 mrg */
120 1.1.1.3 mrg __asm__ ("" : : "wa" (__r));
121 1.1.1.3 mrg __builtin_set_fpscr_rn (__fpscr_save.__fpscr);
122 1.1.1.3 mrg break;
123 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF:
124 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
125 1.1.1.3 mrg __r = vec_floor ((__v2df) __A);
126 1.1.1.3 mrg break;
127 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF:
128 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
129 1.1.1.3 mrg __r = vec_ceil ((__v2df) __A);
130 1.1.1.3 mrg break;
131 1.1.1.3 mrg case _MM_FROUND_TO_ZERO:
132 1.1.1.3 mrg case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
133 1.1.1.3 mrg __r = vec_trunc ((__v2df) __A);
134 1.1.1.3 mrg break;
135 1.1.1.3 mrg case _MM_FROUND_CUR_DIRECTION:
136 1.1.1.3 mrg __r = vec_rint ((__v2df) __A);
137 1.1.1.3 mrg break;
138 1.1.1.3 mrg }
139 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC)
140 1.1.1.3 mrg {
141 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written
142 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation
143 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below.
144 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
145 1.1.1.3 mrg */
146 1.1.1.3 mrg __asm__ ("" : : "wa" (__r));
147 1.1.1.3 mrg /* Restore enabled exceptions. */
148 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl ();
149 1.1.1.3 mrg __fpscr_save.__fpscr |= __enables_save.__fpscr;
150 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
151 1.1.1.3 mrg }
152 1.1.1.3 mrg return (__m128d) __r;
153 1.1.1.3 mrg }
154 1.1.1.3 mrg
155 1.1.1.3 mrg extern __inline __m128d
156 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
157 1.1.1.3 mrg _mm_round_sd (__m128d __A, __m128d __B, int __rounding)
158 1.1.1.3 mrg {
159 1.1.1.3 mrg __B = _mm_round_pd (__B, __rounding);
160 1.1.1.3 mrg __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] };
161 1.1.1.3 mrg return (__m128d) __r;
162 1.1.1.3 mrg }
163 1.1.1.3 mrg
164 1.1.1.3 mrg extern __inline __m128
165 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
166 1.1.1.3 mrg _mm_round_ps (__m128 __A, int __rounding)
167 1.1.1.3 mrg {
168 1.1.1.3 mrg __v4sf __r;
169 1.1.1.3 mrg union {
170 1.1.1.3 mrg double __fr;
171 1.1.1.3 mrg long long __fpscr;
172 1.1.1.3 mrg } __enables_save, __fpscr_save;
173 1.1.1.3 mrg
174 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC)
175 1.1.1.3 mrg {
176 1.1.1.3 mrg /* Save enabled exceptions, disable all exceptions,
177 1.1.1.3 mrg and preserve the rounding mode. */
178 1.1.1.3 mrg #ifdef _ARCH_PWR9
179 1.1.1.3 mrg __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr));
180 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
181 1.1.1.3 mrg #else
182 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffs ();
183 1.1.1.3 mrg __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
184 1.1.1.3 mrg __fpscr_save.__fpscr &= ~0xf8;
185 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
186 1.1.1.3 mrg #endif
187 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable
188 1.1.1.3 mrg read below, to ensure the compiler does not schedule
189 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above.
190 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
191 1.1.1.3 mrg */
192 1.1.1.3 mrg __asm__ ("" : "+wa" (__A));
193 1.1.1.3 mrg }
194 1.1.1.3 mrg
195 1.1.1.3 mrg switch (__rounding)
196 1.1.1.3 mrg {
197 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT:
198 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl ();
199 1.1.1.3 mrg __attribute__ ((fallthrough));
200 1.1.1.3 mrg case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
201 1.1.1.3 mrg __builtin_set_fpscr_rn (0b00);
202 1.1.1.3 mrg /* Insert an artificial "read/write" reference to the variable
203 1.1.1.3 mrg read below, to ensure the compiler does not schedule
204 1.1.1.3 mrg a read/use of the variable before the FPSCR is modified, above.
205 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
206 1.1.1.3 mrg */
207 1.1.1.3 mrg __asm__ ("" : "+wa" (__A));
208 1.1.1.3 mrg
209 1.1.1.3 mrg __r = vec_rint ((__v4sf) __A);
210 1.1.1.3 mrg
211 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written
212 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation
213 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below.
214 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
215 1.1.1.3 mrg */
216 1.1.1.3 mrg __asm__ ("" : : "wa" (__r));
217 1.1.1.3 mrg __builtin_set_fpscr_rn (__fpscr_save.__fpscr);
218 1.1.1.3 mrg break;
219 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF:
220 1.1.1.3 mrg case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
221 1.1.1.3 mrg __r = vec_floor ((__v4sf) __A);
222 1.1.1.3 mrg break;
223 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF:
224 1.1.1.3 mrg case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
225 1.1.1.3 mrg __r = vec_ceil ((__v4sf) __A);
226 1.1.1.3 mrg break;
227 1.1.1.3 mrg case _MM_FROUND_TO_ZERO:
228 1.1.1.3 mrg case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
229 1.1.1.3 mrg __r = vec_trunc ((__v4sf) __A);
230 1.1.1.3 mrg break;
231 1.1.1.3 mrg case _MM_FROUND_CUR_DIRECTION:
232 1.1.1.3 mrg __r = vec_rint ((__v4sf) __A);
233 1.1.1.3 mrg break;
234 1.1.1.3 mrg }
235 1.1.1.3 mrg if (__rounding & _MM_FROUND_NO_EXC)
236 1.1.1.3 mrg {
237 1.1.1.3 mrg /* Insert an artificial "read" reference to the variable written
238 1.1.1.3 mrg above, to ensure the compiler does not schedule the computation
239 1.1.1.3 mrg of the value after the manipulation of the FPSCR, below.
240 1.1.1.3 mrg This can be removed if and when GCC PR102783 is fixed.
241 1.1.1.3 mrg */
242 1.1.1.3 mrg __asm__ ("" : : "wa" (__r));
243 1.1.1.3 mrg /* Restore enabled exceptions. */
244 1.1.1.3 mrg __fpscr_save.__fr = __builtin_mffsl ();
245 1.1.1.3 mrg __fpscr_save.__fpscr |= __enables_save.__fpscr;
246 1.1.1.3 mrg __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
247 1.1.1.3 mrg }
248 1.1.1.3 mrg return (__m128) __r;
249 1.1.1.3 mrg }
250 1.1.1.3 mrg
251 1.1.1.3 mrg extern __inline __m128
252 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253 1.1.1.3 mrg _mm_round_ss (__m128 __A, __m128 __B, int __rounding)
254 1.1.1.3 mrg {
255 1.1.1.3 mrg __B = _mm_round_ps (__B, __rounding);
256 1.1.1.3 mrg __v4sf __r = (__v4sf) __A;
257 1.1.1.3 mrg __r[0] = ((__v4sf) __B)[0];
258 1.1.1.3 mrg return (__m128) __r;
259 1.1.1.3 mrg }
260 1.1.1.3 mrg
261 1.1.1.3 mrg #define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
262 1.1.1.3 mrg #define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
263 1.1.1.3 mrg
264 1.1.1.3 mrg #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
265 1.1.1.3 mrg #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
266 1.1.1.3 mrg
267 1.1.1.3 mrg #define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
268 1.1.1.3 mrg #define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
269 1.1.1.3 mrg
270 1.1.1.3 mrg #define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
271 1.1.1.3 mrg #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
272 1.1.1.3 mrg
273 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
274 1.1.1.3 mrg _mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
275 1.1.1.3 mrg {
276 1.1.1.3 mrg __v16qi __result = (__v16qi)__A;
277 1.1.1.3 mrg
278 1.1.1.3 mrg __result [__N & 0xf] = __D;
279 1.1.1.3 mrg
280 1.1.1.3 mrg return (__m128i) __result;
281 1.1.1.3 mrg }
282 1.1.1.3 mrg
283 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
284 1.1.1.3 mrg _mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
285 1.1.1.3 mrg {
286 1.1.1.3 mrg __v4si __result = (__v4si)__A;
287 1.1.1.3 mrg
288 1.1.1.3 mrg __result [__N & 3] = __D;
289 1.1.1.3 mrg
290 1.1.1.3 mrg return (__m128i) __result;
291 1.1.1.3 mrg }
292 1.1.1.3 mrg
293 1.1.1.3 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294 1.1.1.3 mrg _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
295 1.1.1.3 mrg {
296 1.1.1.3 mrg __v2di __result = (__v2di)__A;
297 1.1.1.3 mrg
298 1.1.1.3 mrg __result [__N & 1] = __D;
299 1.1.1.3 mrg
300 1.1.1.3 mrg return (__m128i) __result;
301 1.1.1.3 mrg }
302 1.1.1.3 mrg
303 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
304 1.1 mrg _mm_extract_epi8 (__m128i __X, const int __N)
305 1.1 mrg {
306 1.1 mrg return (unsigned char) ((__v16qi)__X)[__N & 15];
307 1.1 mrg }
308 1.1 mrg
309 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
310 1.1 mrg _mm_extract_epi32 (__m128i __X, const int __N)
311 1.1 mrg {
312 1.1 mrg return ((__v4si)__X)[__N & 3];
313 1.1 mrg }
314 1.1 mrg
315 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
316 1.1 mrg _mm_extract_epi64 (__m128i __X, const int __N)
317 1.1 mrg {
318 1.1 mrg return ((__v2di)__X)[__N & 1];
319 1.1 mrg }
320 1.1 mrg
321 1.1 mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
322 1.1 mrg _mm_extract_ps (__m128 __X, const int __N)
323 1.1 mrg {
324 1.1 mrg return ((__v4si)__X)[__N & 3];
325 1.1 mrg }
326 1.1 mrg
327 1.1.1.3 mrg #ifdef _ARCH_PWR8
328 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
329 1.1 mrg _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
330 1.1 mrg {
331 1.1 mrg __v16qi __charmask = vec_splats ((signed char) __imm8);
332 1.1 mrg __charmask = vec_gb (__charmask);
333 1.1 mrg __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask);
334 1.1 mrg #ifdef __BIG_ENDIAN__
335 1.1 mrg __shortmask = vec_reve (__shortmask);
336 1.1 mrg #endif
337 1.1 mrg return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
338 1.1 mrg }
339 1.1.1.3 mrg #endif
340 1.1 mrg
341 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
342 1.1 mrg _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
343 1.1 mrg {
344 1.1.1.3 mrg #ifdef _ARCH_PWR10
345 1.1.1.3 mrg return (__m128i) vec_blendv ((__v16qi) __A, (__v16qi) __B, (__v16qu) __mask);
346 1.1.1.3 mrg #else
347 1.1 mrg const __v16qu __seven = vec_splats ((unsigned char) 0x07);
348 1.1 mrg __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven);
349 1.1.1.3 mrg return (__m128i) vec_sel ((__v16qi) __A, (__v16qi) __B, __lmask);
350 1.1.1.3 mrg #endif
351 1.1.1.3 mrg }
352 1.1.1.3 mrg
353 1.1.1.3 mrg extern __inline __m128
354 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 1.1.1.3 mrg _mm_blend_ps (__m128 __A, __m128 __B, const int __imm8)
356 1.1.1.3 mrg {
357 1.1.1.3 mrg __v16qu __pcv[] =
358 1.1.1.3 mrg {
359 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
360 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
361 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 },
362 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 },
363 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15 },
364 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15 },
365 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
366 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
367 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31 },
368 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31 },
369 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 },
370 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 },
371 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 },
372 1.1.1.3 mrg { 16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 },
373 1.1.1.3 mrg { 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
374 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
375 1.1.1.3 mrg };
376 1.1.1.3 mrg __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
377 1.1.1.3 mrg return (__m128) __r;
378 1.1.1.3 mrg }
379 1.1.1.3 mrg
380 1.1.1.3 mrg extern __inline __m128
381 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
382 1.1.1.3 mrg _mm_blendv_ps (__m128 __A, __m128 __B, __m128 __mask)
383 1.1.1.3 mrg {
384 1.1.1.3 mrg #ifdef _ARCH_PWR10
385 1.1.1.3 mrg return (__m128) vec_blendv ((__v4sf) __A, (__v4sf) __B, (__v4su) __mask);
386 1.1.1.3 mrg #else
387 1.1.1.3 mrg const __v4si __zero = {0};
388 1.1.1.3 mrg const __vector __bool int __boolmask = vec_cmplt ((__v4si) __mask, __zero);
389 1.1.1.3 mrg return (__m128) vec_sel ((__v4su) __A, (__v4su) __B, (__v4su) __boolmask);
390 1.1.1.3 mrg #endif
391 1.1.1.3 mrg }
392 1.1.1.3 mrg
393 1.1.1.3 mrg extern __inline __m128d
394 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 1.1.1.3 mrg _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8)
396 1.1.1.3 mrg {
397 1.1.1.3 mrg __v16qu __pcv[] =
398 1.1.1.3 mrg {
399 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
400 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 },
401 1.1.1.3 mrg { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 },
402 1.1.1.3 mrg { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }
403 1.1.1.3 mrg };
404 1.1.1.3 mrg __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
405 1.1.1.3 mrg return (__m128d) __r;
406 1.1.1.3 mrg }
407 1.1.1.3 mrg
408 1.1.1.3 mrg #ifdef _ARCH_PWR8
409 1.1.1.3 mrg extern __inline __m128d
410 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
411 1.1.1.3 mrg _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
412 1.1.1.3 mrg {
413 1.1.1.3 mrg #ifdef _ARCH_PWR10
414 1.1.1.3 mrg return (__m128d) vec_blendv ((__v2df) __A, (__v2df) __B, (__v2du) __mask);
415 1.1.1.3 mrg #else
416 1.1.1.3 mrg const __v2di __zero = {0};
417 1.1.1.3 mrg const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero);
418 1.1.1.3 mrg return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask);
419 1.1.1.3 mrg #endif
420 1.1.1.3 mrg }
421 1.1.1.3 mrg #endif
422 1.1.1.3 mrg
423 1.1.1.3 mrg extern __inline int
424 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 1.1.1.3 mrg _mm_testz_si128 (__m128i __A, __m128i __B)
426 1.1.1.3 mrg {
427 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */
428 1.1.1.3 mrg const __v16qu __zero = {0};
429 1.1.1.3 mrg return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
430 1.1.1.3 mrg }
431 1.1.1.3 mrg
432 1.1.1.3 mrg extern __inline int
433 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 1.1.1.3 mrg _mm_testc_si128 (__m128i __A, __m128i __B)
435 1.1.1.3 mrg {
436 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */
437 1.1.1.3 mrg const __v16qu __zero = {0};
438 1.1.1.3 mrg const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
439 1.1.1.3 mrg return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
440 1.1.1.3 mrg }
441 1.1.1.3 mrg
442 1.1.1.3 mrg extern __inline int
443 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 1.1.1.3 mrg _mm_testnzc_si128 (__m128i __A, __m128i __B)
445 1.1.1.3 mrg {
446 1.1.1.3 mrg /* Note: This implementation does NOT set "zero" or "carry" flags. */
447 1.1.1.3 mrg return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
448 1.1.1.3 mrg }
449 1.1.1.3 mrg
450 1.1.1.3 mrg #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
451 1.1.1.3 mrg
452 1.1.1.3 mrg #define _mm_test_all_ones(V) \
453 1.1.1.3 mrg _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
454 1.1.1.3 mrg
455 1.1.1.3 mrg #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
456 1.1.1.3 mrg
457 1.1.1.3 mrg #ifdef _ARCH_PWR8
458 1.1.1.3 mrg extern __inline __m128i
459 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
460 1.1.1.3 mrg _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
461 1.1.1.3 mrg {
462 1.1.1.3 mrg return (__m128i) vec_cmpeq ((__v2di) __X, (__v2di) __Y);
463 1.1.1.3 mrg }
464 1.1.1.3 mrg #endif
465 1.1.1.3 mrg
466 1.1.1.3 mrg extern __inline __m128i
467 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
468 1.1.1.3 mrg _mm_min_epi8 (__m128i __X, __m128i __Y)
469 1.1.1.3 mrg {
470 1.1.1.3 mrg return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y);
471 1.1.1.3 mrg }
472 1.1.1.3 mrg
473 1.1.1.3 mrg extern __inline __m128i
474 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 1.1.1.3 mrg _mm_min_epu16 (__m128i __X, __m128i __Y)
476 1.1.1.3 mrg {
477 1.1.1.3 mrg return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y);
478 1.1 mrg }
479 1.1 mrg
480 1.1.1.3 mrg extern __inline __m128i
481 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
482 1.1.1.3 mrg _mm_min_epi32 (__m128i __X, __m128i __Y)
483 1.1.1.3 mrg {
484 1.1.1.3 mrg return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y);
485 1.1.1.3 mrg }
486 1.1.1.3 mrg
487 1.1.1.3 mrg extern __inline __m128i
488 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
489 1.1.1.3 mrg _mm_min_epu32 (__m128i __X, __m128i __Y)
490 1.1.1.3 mrg {
491 1.1.1.3 mrg return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y);
492 1.1.1.3 mrg }
493 1.1.1.3 mrg
494 1.1.1.3 mrg extern __inline __m128i
495 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496 1.1.1.3 mrg _mm_max_epi8 (__m128i __X, __m128i __Y)
497 1.1.1.3 mrg {
498 1.1.1.3 mrg return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y);
499 1.1.1.3 mrg }
500 1.1.1.3 mrg
501 1.1.1.3 mrg extern __inline __m128i
502 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503 1.1.1.3 mrg _mm_max_epu16 (__m128i __X, __m128i __Y)
504 1.1.1.3 mrg {
505 1.1.1.3 mrg return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y);
506 1.1.1.3 mrg }
507 1.1.1.3 mrg
508 1.1.1.3 mrg extern __inline __m128i
509 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510 1.1.1.3 mrg _mm_max_epi32 (__m128i __X, __m128i __Y)
511 1.1.1.3 mrg {
512 1.1.1.3 mrg return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y);
513 1.1.1.3 mrg }
514 1.1.1.3 mrg
515 1.1.1.3 mrg extern __inline __m128i
516 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517 1.1.1.3 mrg _mm_max_epu32 (__m128i __X, __m128i __Y)
518 1.1.1.3 mrg {
519 1.1.1.3 mrg return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
520 1.1.1.3 mrg }
521 1.1.1.3 mrg
522 1.1.1.3 mrg extern __inline __m128i
523 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
524 1.1.1.3 mrg _mm_mullo_epi32 (__m128i __X, __m128i __Y)
525 1.1.1.3 mrg {
526 1.1.1.3 mrg return (__m128i) vec_mul ((__v4su) __X, (__v4su) __Y);
527 1.1.1.3 mrg }
528 1.1.1.3 mrg
529 1.1.1.3 mrg #ifdef _ARCH_PWR8
530 1.1.1.3 mrg extern __inline __m128i
531 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532 1.1.1.3 mrg _mm_mul_epi32 (__m128i __X, __m128i __Y)
533 1.1.1.3 mrg {
534 1.1.1.3 mrg return (__m128i) vec_mule ((__v4si) __X, (__v4si) __Y);
535 1.1.1.3 mrg }
536 1.1.1.3 mrg #endif
537 1.1.1.3 mrg
538 1.1.1.3 mrg extern __inline __m128i
539 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540 1.1.1.3 mrg _mm_cvtepi8_epi16 (__m128i __A)
541 1.1.1.3 mrg {
542 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v16qi) __A);
543 1.1.1.3 mrg }
544 1.1.1.3 mrg
545 1.1.1.3 mrg extern __inline __m128i
546 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 1.1.1.3 mrg _mm_cvtepi8_epi32 (__m128i __A)
548 1.1.1.3 mrg {
549 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v16qi) __A);
550 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v8hi) __A);
551 1.1.1.3 mrg }
552 1.1.1.3 mrg
553 1.1.1.3 mrg #ifdef _ARCH_PWR8
554 1.1.1.3 mrg extern __inline __m128i
555 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
556 1.1.1.3 mrg _mm_cvtepi8_epi64 (__m128i __A)
557 1.1.1.3 mrg {
558 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v16qi) __A);
559 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v8hi) __A);
560 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A);
561 1.1.1.3 mrg }
562 1.1.1.3 mrg #endif
563 1.1.1.3 mrg
564 1.1.1.3 mrg extern __inline __m128i
565 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
566 1.1.1.3 mrg _mm_cvtepi16_epi32 (__m128i __A)
567 1.1.1.3 mrg {
568 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v8hi) __A);
569 1.1.1.3 mrg }
570 1.1.1.3 mrg
571 1.1.1.3 mrg #ifdef _ARCH_PWR8
572 1.1.1.3 mrg extern __inline __m128i
573 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574 1.1.1.3 mrg _mm_cvtepi16_epi64 (__m128i __A)
575 1.1.1.3 mrg {
576 1.1.1.3 mrg __A = (__m128i) vec_unpackh ((__v8hi) __A);
577 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A);
578 1.1.1.3 mrg }
579 1.1.1.3 mrg #endif
580 1.1.1.3 mrg
581 1.1.1.3 mrg #ifdef _ARCH_PWR8
582 1.1.1.3 mrg extern __inline __m128i
583 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
584 1.1.1.3 mrg _mm_cvtepi32_epi64 (__m128i __A)
585 1.1.1.3 mrg {
586 1.1.1.3 mrg return (__m128i) vec_unpackh ((__v4si) __A);
587 1.1.1.3 mrg }
588 1.1.1.3 mrg #endif
589 1.1.1.3 mrg
590 1.1.1.3 mrg extern __inline __m128i
591 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 1.1.1.3 mrg _mm_cvtepu8_epi16 (__m128i __A)
593 1.1.1.3 mrg {
594 1.1.1.3 mrg const __v16qu __zero = {0};
595 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
596 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
597 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
598 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
599 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
600 1.1.1.3 mrg return __A;
601 1.1.1.3 mrg }
602 1.1.1.3 mrg
603 1.1.1.3 mrg extern __inline __m128i
604 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
605 1.1.1.3 mrg _mm_cvtepu8_epi32 (__m128i __A)
606 1.1.1.3 mrg {
607 1.1.1.3 mrg const __v16qu __zero = {0};
608 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
609 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
610 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
611 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
612 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
613 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
614 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
615 1.1.1.3 mrg return __A;
616 1.1.1.3 mrg }
617 1.1.1.3 mrg
618 1.1.1.3 mrg extern __inline __m128i
619 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620 1.1.1.3 mrg _mm_cvtepu8_epi64 (__m128i __A)
621 1.1.1.3 mrg {
622 1.1.1.3 mrg const __v16qu __zero = {0};
623 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
624 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
625 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
626 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
627 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
628 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
629 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
630 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
631 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
632 1.1.1.3 mrg return __A;
633 1.1.1.3 mrg }
634 1.1.1.3 mrg
635 1.1.1.3 mrg extern __inline __m128i
636 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
637 1.1.1.3 mrg _mm_cvtepu16_epi32 (__m128i __A)
638 1.1.1.3 mrg {
639 1.1.1.3 mrg const __v8hu __zero = {0};
640 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
641 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
642 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
643 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
644 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
645 1.1.1.3 mrg return __A;
646 1.1.1.3 mrg }
647 1.1.1.3 mrg
648 1.1.1.3 mrg extern __inline __m128i
649 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 1.1.1.3 mrg _mm_cvtepu16_epi64 (__m128i __A)
651 1.1.1.3 mrg {
652 1.1.1.3 mrg const __v8hu __zero = {0};
653 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
654 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
655 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
656 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
657 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
658 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
659 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
660 1.1.1.3 mrg return __A;
661 1.1.1.3 mrg }
662 1.1.1.3 mrg
663 1.1.1.3 mrg extern __inline __m128i
664 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665 1.1.1.3 mrg _mm_cvtepu32_epi64 (__m128i __A)
666 1.1.1.3 mrg {
667 1.1.1.3 mrg const __v4su __zero = {0};
668 1.1.1.3 mrg #ifdef __LITTLE_ENDIAN__
669 1.1.1.3 mrg __A = (__m128i) vec_mergeh ((__v4su) __A, __zero);
670 1.1.1.3 mrg #else /* __BIG_ENDIAN__. */
671 1.1.1.3 mrg __A = (__m128i) vec_mergeh (__zero, (__v4su) __A);
672 1.1.1.3 mrg #endif /* __BIG_ENDIAN__. */
673 1.1.1.3 mrg return __A;
674 1.1.1.3 mrg }
675 1.1.1.3 mrg
676 1.1.1.3 mrg /* Return horizontal packed word minimum and its index in bits [15:0]
677 1.1.1.3 mrg and bits [18:16] respectively. */
678 1.1.1.3 mrg extern __inline __m128i
679 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
680 1.1.1.3 mrg _mm_minpos_epu16 (__m128i __A)
681 1.1.1.3 mrg {
682 1.1.1.3 mrg union __u
683 1.1.1.3 mrg {
684 1.1.1.3 mrg __m128i __m;
685 1.1.1.3 mrg __v8hu __uh;
686 1.1.1.3 mrg };
687 1.1.1.3 mrg union __u __u = { .__m = __A }, __r = { .__m = {0} };
688 1.1.1.3 mrg unsigned short __ridx = 0;
689 1.1.1.3 mrg unsigned short __rmin = __u.__uh[__ridx];
690 1.1.1.3 mrg unsigned long __i;
691 1.1.1.3 mrg for (__i = 1; __i < 8; __i++)
692 1.1.1.3 mrg {
693 1.1.1.3 mrg if (__u.__uh[__i] < __rmin)
694 1.1.1.3 mrg {
695 1.1.1.3 mrg __rmin = __u.__uh[__i];
696 1.1.1.3 mrg __ridx = __i;
697 1.1.1.3 mrg }
698 1.1.1.3 mrg }
699 1.1.1.3 mrg __r.__uh[0] = __rmin;
700 1.1.1.3 mrg __r.__uh[1] = __ridx;
701 1.1.1.3 mrg return __r.__m;
702 1.1.1.3 mrg }
703 1.1.1.3 mrg
704 1.1.1.3 mrg extern __inline __m128i
705 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 1.1.1.3 mrg _mm_packus_epi32 (__m128i __X, __m128i __Y)
707 1.1.1.3 mrg {
708 1.1.1.3 mrg return (__m128i) vec_packsu ((__v4si) __X, (__v4si) __Y);
709 1.1.1.3 mrg }
710 1.1.1.3 mrg
711 1.1.1.3 mrg #ifdef _ARCH_PWR8
712 1.1.1.3 mrg extern __inline __m128i
713 1.1.1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
714 1.1.1.3 mrg _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
715 1.1.1.3 mrg {
716 1.1.1.3 mrg return (__m128i) vec_cmpgt ((__v2di) __X, (__v2di) __Y);
717 1.1.1.3 mrg }
718 1.1.1.3 mrg #endif
719 1.1.1.3 mrg
720 1.1 mrg #endif
721