Home | History | Annotate | Line # | Download | only in rs6000
      1  1.1.1.3  mrg /* Copyright (C) 2018-2022 Free Software Foundation, Inc.
      2      1.1  mrg 
      3      1.1  mrg    This file is part of GCC.
      4      1.1  mrg 
      5      1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6      1.1  mrg    it under the terms of the GNU General Public License as published by
      7      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8      1.1  mrg    any later version.
      9      1.1  mrg 
     10      1.1  mrg    GCC is distributed in the hope that it will be useful,
     11      1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13      1.1  mrg    GNU General Public License for more details.
     14      1.1  mrg 
     15      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17      1.1  mrg    3.1, as published by the Free Software Foundation.
     18      1.1  mrg 
     19      1.1  mrg    You should have received a copy of the GNU General Public License and
     20      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23      1.1  mrg 
     24      1.1  mrg /* Implemented from the specification included in the Intel C++ Compiler
     25      1.1  mrg    User Guide and Reference, version 9.0.
     26      1.1  mrg 
     27      1.1  mrg    NOTE: This is NOT a complete implementation of the SSE4 intrinsics!  */
     28      1.1  mrg 
     29      1.1  mrg #ifndef NO_WARN_X86_INTRINSICS
     30      1.1  mrg /* This header is distributed to simplify porting x86_64 code that
     31      1.1  mrg    makes explicit use of Intel intrinsics to powerpc64le.
     32      1.1  mrg    It is the user's responsibility to determine if the results are
     33      1.1  mrg    acceptable and make additional changes as necessary.
     34      1.1  mrg    Note that much code that uses Intel intrinsics can be rewritten in
     35      1.1  mrg    standard C or GNU C extensions, which are more portable and better
     36      1.1  mrg    optimized across multiple targets.  */
     37      1.1  mrg #endif
     38      1.1  mrg 
     39      1.1  mrg #ifndef SMMINTRIN_H_
     40      1.1  mrg #define SMMINTRIN_H_
     41      1.1  mrg 
     42      1.1  mrg #include <altivec.h>
     43      1.1  mrg #include <tmmintrin.h>
     44      1.1  mrg 
     45  1.1.1.3  mrg /* Rounding mode macros. */
     46  1.1.1.3  mrg #define _MM_FROUND_TO_NEAREST_INT       0x00
     47  1.1.1.3  mrg #define _MM_FROUND_TO_ZERO              0x01
     48  1.1.1.3  mrg #define _MM_FROUND_TO_POS_INF           0x02
     49  1.1.1.3  mrg #define _MM_FROUND_TO_NEG_INF           0x03
     50  1.1.1.3  mrg #define _MM_FROUND_CUR_DIRECTION        0x04
     51  1.1.1.3  mrg 
     52  1.1.1.3  mrg #define _MM_FROUND_NINT		\
     53  1.1.1.3  mrg   (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
     54  1.1.1.3  mrg #define _MM_FROUND_FLOOR	\
     55  1.1.1.3  mrg   (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
     56  1.1.1.3  mrg #define _MM_FROUND_CEIL		\
     57  1.1.1.3  mrg   (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
     58  1.1.1.3  mrg #define _MM_FROUND_TRUNC	\
     59  1.1.1.3  mrg   (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
     60  1.1.1.3  mrg #define _MM_FROUND_RINT		\
     61  1.1.1.3  mrg   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
     62  1.1.1.3  mrg #define _MM_FROUND_NEARBYINT	\
     63  1.1.1.3  mrg   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
     64  1.1.1.3  mrg 
     65  1.1.1.3  mrg #define _MM_FROUND_RAISE_EXC            0x00
     66  1.1.1.3  mrg #define _MM_FROUND_NO_EXC               0x08
     67  1.1.1.3  mrg 
     68  1.1.1.3  mrg extern __inline __m128d
     69  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     70  1.1.1.3  mrg _mm_round_pd (__m128d __A, int __rounding)
     71  1.1.1.3  mrg {
     72  1.1.1.3  mrg   __v2df __r;
     73  1.1.1.3  mrg   union {
     74  1.1.1.3  mrg     double __fr;
     75  1.1.1.3  mrg     long long __fpscr;
     76  1.1.1.3  mrg   } __enables_save, __fpscr_save;
     77  1.1.1.3  mrg 
     78  1.1.1.3  mrg   if (__rounding & _MM_FROUND_NO_EXC)
     79  1.1.1.3  mrg     {
     80  1.1.1.3  mrg       /* Save enabled exceptions, disable all exceptions,
     81  1.1.1.3  mrg 	 and preserve the rounding mode.  */
     82  1.1.1.3  mrg #ifdef _ARCH_PWR9
     83  1.1.1.3  mrg       __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr));
     84  1.1.1.3  mrg       __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
     85  1.1.1.3  mrg #else
     86  1.1.1.3  mrg       __fpscr_save.__fr = __builtin_mffs ();
     87  1.1.1.3  mrg       __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
     88  1.1.1.3  mrg       __fpscr_save.__fpscr &= ~0xf8;
     89  1.1.1.3  mrg       __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
     90  1.1.1.3  mrg #endif
     91  1.1.1.3  mrg       /* Insert an artificial "read/write" reference to the variable
     92  1.1.1.3  mrg 	 read below, to ensure the compiler does not schedule
     93  1.1.1.3  mrg 	 a read/use of the variable before the FPSCR is modified, above.
     94  1.1.1.3  mrg 	 This can be removed if and when GCC PR102783 is fixed.
     95  1.1.1.3  mrg        */
     96  1.1.1.3  mrg       __asm__ ("" : "+wa" (__A));
     97  1.1.1.3  mrg     }
     98  1.1.1.3  mrg 
     99  1.1.1.3  mrg   switch (__rounding)
    100  1.1.1.3  mrg     {
    101  1.1.1.3  mrg       case _MM_FROUND_TO_NEAREST_INT:
    102  1.1.1.3  mrg 	__fpscr_save.__fr = __builtin_mffsl ();
    103  1.1.1.3  mrg 	__attribute__ ((fallthrough));
    104  1.1.1.3  mrg       case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
    105  1.1.1.3  mrg 	__builtin_set_fpscr_rn (0b00);
    106  1.1.1.3  mrg 	/* Insert an artificial "read/write" reference to the variable
    107  1.1.1.3  mrg 	   read below, to ensure the compiler does not schedule
    108  1.1.1.3  mrg 	   a read/use of the variable before the FPSCR is modified, above.
    109  1.1.1.3  mrg 	   This can be removed if and when GCC PR102783 is fixed.
    110  1.1.1.3  mrg 	 */
    111  1.1.1.3  mrg 	__asm__ ("" : "+wa" (__A));
    112  1.1.1.3  mrg 
    113  1.1.1.3  mrg 	__r = vec_rint ((__v2df) __A);
    114  1.1.1.3  mrg 
    115  1.1.1.3  mrg 	/* Insert an artificial "read" reference to the variable written
    116  1.1.1.3  mrg 	   above, to ensure the compiler does not schedule the computation
    117  1.1.1.3  mrg 	   of the value after the manipulation of the FPSCR, below.
    118  1.1.1.3  mrg 	   This can be removed if and when GCC PR102783 is fixed.
    119  1.1.1.3  mrg 	 */
    120  1.1.1.3  mrg 	__asm__ ("" : : "wa" (__r));
    121  1.1.1.3  mrg 	__builtin_set_fpscr_rn (__fpscr_save.__fpscr);
    122  1.1.1.3  mrg 	break;
    123  1.1.1.3  mrg       case _MM_FROUND_TO_NEG_INF:
    124  1.1.1.3  mrg       case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
    125  1.1.1.3  mrg 	__r = vec_floor ((__v2df) __A);
    126  1.1.1.3  mrg 	break;
    127  1.1.1.3  mrg       case _MM_FROUND_TO_POS_INF:
    128  1.1.1.3  mrg       case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
    129  1.1.1.3  mrg 	__r = vec_ceil ((__v2df) __A);
    130  1.1.1.3  mrg 	break;
    131  1.1.1.3  mrg       case _MM_FROUND_TO_ZERO:
    132  1.1.1.3  mrg       case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
    133  1.1.1.3  mrg 	__r = vec_trunc ((__v2df) __A);
    134  1.1.1.3  mrg 	break;
    135  1.1.1.3  mrg       case _MM_FROUND_CUR_DIRECTION:
    136  1.1.1.3  mrg 	__r = vec_rint ((__v2df) __A);
    137  1.1.1.3  mrg 	break;
    138  1.1.1.3  mrg     }
    139  1.1.1.3  mrg   if (__rounding & _MM_FROUND_NO_EXC)
    140  1.1.1.3  mrg     {
    141  1.1.1.3  mrg       /* Insert an artificial "read" reference to the variable written
    142  1.1.1.3  mrg 	 above, to ensure the compiler does not schedule the computation
    143  1.1.1.3  mrg 	 of the value after the manipulation of the FPSCR, below.
    144  1.1.1.3  mrg 	 This can be removed if and when GCC PR102783 is fixed.
    145  1.1.1.3  mrg        */
    146  1.1.1.3  mrg       __asm__ ("" : : "wa" (__r));
    147  1.1.1.3  mrg       /* Restore enabled exceptions.  */
    148  1.1.1.3  mrg       __fpscr_save.__fr = __builtin_mffsl ();
    149  1.1.1.3  mrg       __fpscr_save.__fpscr |= __enables_save.__fpscr;
    150  1.1.1.3  mrg       __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
    151  1.1.1.3  mrg     }
    152  1.1.1.3  mrg   return (__m128d) __r;
    153  1.1.1.3  mrg }
    154  1.1.1.3  mrg 
    155  1.1.1.3  mrg extern __inline __m128d
    156  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    157  1.1.1.3  mrg _mm_round_sd (__m128d __A, __m128d __B, int __rounding)
    158  1.1.1.3  mrg {
    159  1.1.1.3  mrg   __B = _mm_round_pd (__B, __rounding);
    160  1.1.1.3  mrg   __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] };
    161  1.1.1.3  mrg   return (__m128d) __r;
    162  1.1.1.3  mrg }
    163  1.1.1.3  mrg 
    164  1.1.1.3  mrg extern __inline __m128
    165  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    166  1.1.1.3  mrg _mm_round_ps (__m128 __A, int __rounding)
    167  1.1.1.3  mrg {
    168  1.1.1.3  mrg   __v4sf __r;
    169  1.1.1.3  mrg   union {
    170  1.1.1.3  mrg     double __fr;
    171  1.1.1.3  mrg     long long __fpscr;
    172  1.1.1.3  mrg   } __enables_save, __fpscr_save;
    173  1.1.1.3  mrg 
    174  1.1.1.3  mrg   if (__rounding & _MM_FROUND_NO_EXC)
    175  1.1.1.3  mrg     {
    176  1.1.1.3  mrg       /* Save enabled exceptions, disable all exceptions,
    177  1.1.1.3  mrg 	 and preserve the rounding mode.  */
    178  1.1.1.3  mrg #ifdef _ARCH_PWR9
    179  1.1.1.3  mrg       __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr));
    180  1.1.1.3  mrg       __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
    181  1.1.1.3  mrg #else
    182  1.1.1.3  mrg       __fpscr_save.__fr = __builtin_mffs ();
    183  1.1.1.3  mrg       __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
    184  1.1.1.3  mrg       __fpscr_save.__fpscr &= ~0xf8;
    185  1.1.1.3  mrg       __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
    186  1.1.1.3  mrg #endif
    187  1.1.1.3  mrg       /* Insert an artificial "read/write" reference to the variable
    188  1.1.1.3  mrg 	 read below, to ensure the compiler does not schedule
    189  1.1.1.3  mrg 	 a read/use of the variable before the FPSCR is modified, above.
    190  1.1.1.3  mrg 	 This can be removed if and when GCC PR102783 is fixed.
    191  1.1.1.3  mrg        */
    192  1.1.1.3  mrg       __asm__ ("" : "+wa" (__A));
    193  1.1.1.3  mrg     }
    194  1.1.1.3  mrg 
    195  1.1.1.3  mrg   switch (__rounding)
    196  1.1.1.3  mrg     {
    197  1.1.1.3  mrg       case _MM_FROUND_TO_NEAREST_INT:
    198  1.1.1.3  mrg 	__fpscr_save.__fr = __builtin_mffsl ();
    199  1.1.1.3  mrg 	__attribute__ ((fallthrough));
    200  1.1.1.3  mrg       case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
    201  1.1.1.3  mrg 	__builtin_set_fpscr_rn (0b00);
    202  1.1.1.3  mrg 	/* Insert an artificial "read/write" reference to the variable
    203  1.1.1.3  mrg 	   read below, to ensure the compiler does not schedule
    204  1.1.1.3  mrg 	   a read/use of the variable before the FPSCR is modified, above.
    205  1.1.1.3  mrg 	   This can be removed if and when GCC PR102783 is fixed.
    206  1.1.1.3  mrg 	 */
    207  1.1.1.3  mrg 	__asm__ ("" : "+wa" (__A));
    208  1.1.1.3  mrg 
    209  1.1.1.3  mrg 	__r = vec_rint ((__v4sf) __A);
    210  1.1.1.3  mrg 
    211  1.1.1.3  mrg 	/* Insert an artificial "read" reference to the variable written
    212  1.1.1.3  mrg 	   above, to ensure the compiler does not schedule the computation
    213  1.1.1.3  mrg 	   of the value after the manipulation of the FPSCR, below.
    214  1.1.1.3  mrg 	   This can be removed if and when GCC PR102783 is fixed.
    215  1.1.1.3  mrg 	 */
    216  1.1.1.3  mrg 	__asm__ ("" : : "wa" (__r));
    217  1.1.1.3  mrg 	__builtin_set_fpscr_rn (__fpscr_save.__fpscr);
    218  1.1.1.3  mrg 	break;
    219  1.1.1.3  mrg       case _MM_FROUND_TO_NEG_INF:
    220  1.1.1.3  mrg       case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
    221  1.1.1.3  mrg 	__r = vec_floor ((__v4sf) __A);
    222  1.1.1.3  mrg 	break;
    223  1.1.1.3  mrg       case _MM_FROUND_TO_POS_INF:
    224  1.1.1.3  mrg       case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
    225  1.1.1.3  mrg 	__r = vec_ceil ((__v4sf) __A);
    226  1.1.1.3  mrg 	break;
    227  1.1.1.3  mrg       case _MM_FROUND_TO_ZERO:
    228  1.1.1.3  mrg       case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
    229  1.1.1.3  mrg 	__r = vec_trunc ((__v4sf) __A);
    230  1.1.1.3  mrg 	break;
    231  1.1.1.3  mrg       case _MM_FROUND_CUR_DIRECTION:
    232  1.1.1.3  mrg 	__r = vec_rint ((__v4sf) __A);
    233  1.1.1.3  mrg 	break;
    234  1.1.1.3  mrg     }
    235  1.1.1.3  mrg   if (__rounding & _MM_FROUND_NO_EXC)
    236  1.1.1.3  mrg     {
    237  1.1.1.3  mrg       /* Insert an artificial "read" reference to the variable written
    238  1.1.1.3  mrg 	 above, to ensure the compiler does not schedule the computation
    239  1.1.1.3  mrg 	 of the value after the manipulation of the FPSCR, below.
    240  1.1.1.3  mrg 	 This can be removed if and when GCC PR102783 is fixed.
    241  1.1.1.3  mrg        */
    242  1.1.1.3  mrg       __asm__ ("" : : "wa" (__r));
    243  1.1.1.3  mrg       /* Restore enabled exceptions.  */
    244  1.1.1.3  mrg       __fpscr_save.__fr = __builtin_mffsl ();
    245  1.1.1.3  mrg       __fpscr_save.__fpscr |= __enables_save.__fpscr;
    246  1.1.1.3  mrg       __builtin_mtfsf (0b00000011, __fpscr_save.__fr);
    247  1.1.1.3  mrg     }
    248  1.1.1.3  mrg   return (__m128) __r;
    249  1.1.1.3  mrg }
    250  1.1.1.3  mrg 
    251  1.1.1.3  mrg extern __inline __m128
    252  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    253  1.1.1.3  mrg _mm_round_ss (__m128 __A, __m128 __B, int __rounding)
    254  1.1.1.3  mrg {
    255  1.1.1.3  mrg   __B = _mm_round_ps (__B, __rounding);
    256  1.1.1.3  mrg   __v4sf __r = (__v4sf) __A;
    257  1.1.1.3  mrg   __r[0] = ((__v4sf) __B)[0];
    258  1.1.1.3  mrg   return (__m128) __r;
    259  1.1.1.3  mrg }
    260  1.1.1.3  mrg 
    261  1.1.1.3  mrg #define _mm_ceil_pd(V)	   _mm_round_pd ((V), _MM_FROUND_CEIL)
    262  1.1.1.3  mrg #define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
    263  1.1.1.3  mrg 
    264  1.1.1.3  mrg #define _mm_floor_pd(V)	   _mm_round_pd((V), _MM_FROUND_FLOOR)
    265  1.1.1.3  mrg #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
    266  1.1.1.3  mrg 
    267  1.1.1.3  mrg #define _mm_ceil_ps(V)	   _mm_round_ps ((V), _MM_FROUND_CEIL)
    268  1.1.1.3  mrg #define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
    269  1.1.1.3  mrg 
    270  1.1.1.3  mrg #define _mm_floor_ps(V)	   _mm_round_ps ((V), _MM_FROUND_FLOOR)
    271  1.1.1.3  mrg #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
    272  1.1.1.3  mrg 
    273  1.1.1.3  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    274  1.1.1.3  mrg _mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
    275  1.1.1.3  mrg {
    276  1.1.1.3  mrg   __v16qi __result = (__v16qi)__A;
    277  1.1.1.3  mrg 
    278  1.1.1.3  mrg   __result [__N & 0xf] = __D;
    279  1.1.1.3  mrg 
    280  1.1.1.3  mrg   return (__m128i) __result;
    281  1.1.1.3  mrg }
    282  1.1.1.3  mrg 
    283  1.1.1.3  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    284  1.1.1.3  mrg _mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
    285  1.1.1.3  mrg {
    286  1.1.1.3  mrg   __v4si __result = (__v4si)__A;
    287  1.1.1.3  mrg 
    288  1.1.1.3  mrg   __result [__N & 3] = __D;
    289  1.1.1.3  mrg 
    290  1.1.1.3  mrg   return (__m128i) __result;
    291  1.1.1.3  mrg }
    292  1.1.1.3  mrg 
    293  1.1.1.3  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    294  1.1.1.3  mrg _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
    295  1.1.1.3  mrg {
    296  1.1.1.3  mrg   __v2di __result = (__v2di)__A;
    297  1.1.1.3  mrg 
    298  1.1.1.3  mrg   __result [__N & 1] = __D;
    299  1.1.1.3  mrg 
    300  1.1.1.3  mrg   return (__m128i) __result;
    301  1.1.1.3  mrg }
    302  1.1.1.3  mrg 
    303      1.1  mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    304      1.1  mrg _mm_extract_epi8 (__m128i __X, const int __N)
    305      1.1  mrg {
    306      1.1  mrg   return (unsigned char) ((__v16qi)__X)[__N & 15];
    307      1.1  mrg }
    308      1.1  mrg 
    309      1.1  mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    310      1.1  mrg _mm_extract_epi32 (__m128i __X, const int __N)
    311      1.1  mrg {
    312      1.1  mrg   return ((__v4si)__X)[__N & 3];
    313      1.1  mrg }
    314      1.1  mrg 
    315      1.1  mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    316      1.1  mrg _mm_extract_epi64 (__m128i __X, const int __N)
    317      1.1  mrg {
    318      1.1  mrg   return ((__v2di)__X)[__N & 1];
    319      1.1  mrg }
    320      1.1  mrg 
    321      1.1  mrg extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    322      1.1  mrg _mm_extract_ps (__m128 __X, const int __N)
    323      1.1  mrg {
    324      1.1  mrg   return ((__v4si)__X)[__N & 3];
    325      1.1  mrg }
    326      1.1  mrg 
    327  1.1.1.3  mrg #ifdef _ARCH_PWR8
    328      1.1  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    329      1.1  mrg _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
    330      1.1  mrg {
    331      1.1  mrg   __v16qi __charmask = vec_splats ((signed char) __imm8);
    332      1.1  mrg   __charmask = vec_gb (__charmask);
    333      1.1  mrg   __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask);
    334      1.1  mrg   #ifdef __BIG_ENDIAN__
    335      1.1  mrg   __shortmask = vec_reve (__shortmask);
    336      1.1  mrg   #endif
    337      1.1  mrg   return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
    338      1.1  mrg }
    339  1.1.1.3  mrg #endif
    340      1.1  mrg 
    341      1.1  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    342      1.1  mrg _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
    343      1.1  mrg {
    344  1.1.1.3  mrg #ifdef _ARCH_PWR10
    345  1.1.1.3  mrg   return (__m128i) vec_blendv ((__v16qi) __A, (__v16qi) __B, (__v16qu) __mask);
    346  1.1.1.3  mrg #else
    347      1.1  mrg   const __v16qu __seven = vec_splats ((unsigned char) 0x07);
    348      1.1  mrg   __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven);
    349  1.1.1.3  mrg   return (__m128i) vec_sel ((__v16qi) __A, (__v16qi) __B, __lmask);
    350  1.1.1.3  mrg #endif
    351  1.1.1.3  mrg }
    352  1.1.1.3  mrg 
    353  1.1.1.3  mrg extern __inline __m128
    354  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    355  1.1.1.3  mrg _mm_blend_ps (__m128 __A, __m128 __B, const int __imm8)
    356  1.1.1.3  mrg {
    357  1.1.1.3  mrg   __v16qu __pcv[] =
    358  1.1.1.3  mrg     {
    359  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
    360  1.1.1.3  mrg       { 16, 17, 18, 19,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
    361  1.1.1.3  mrg       {  0,  1,  2,  3, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
    362  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
    363  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 12, 13, 14, 15 },
    364  1.1.1.3  mrg       { 16, 17, 18, 19,  4,  5,  6,  7, 24, 25, 26, 27, 12, 13, 14, 15 },
    365  1.1.1.3  mrg       {  0,  1,  2,  3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
    366  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
    367  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 28, 29, 30, 31 },
    368  1.1.1.3  mrg       { 16, 17, 18, 19,  4,  5,  6,  7,  8,  9, 10, 11, 28, 29, 30, 31 },
    369  1.1.1.3  mrg       {  0,  1,  2,  3, 20, 21, 22, 23,  8,  9, 10, 11, 28, 29, 30, 31 },
    370  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 28, 29, 30, 31 },
    371  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
    372  1.1.1.3  mrg       { 16, 17, 18, 19,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
    373  1.1.1.3  mrg       {  0,  1,  2,  3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
    374  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
    375  1.1.1.3  mrg     };
    376  1.1.1.3  mrg   __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
    377  1.1.1.3  mrg   return (__m128) __r;
    378  1.1.1.3  mrg }
    379  1.1.1.3  mrg 
    380  1.1.1.3  mrg extern __inline __m128
    381  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    382  1.1.1.3  mrg _mm_blendv_ps (__m128 __A, __m128 __B, __m128 __mask)
    383  1.1.1.3  mrg {
    384  1.1.1.3  mrg #ifdef _ARCH_PWR10
    385  1.1.1.3  mrg   return (__m128) vec_blendv ((__v4sf) __A, (__v4sf) __B, (__v4su) __mask);
    386  1.1.1.3  mrg #else
    387  1.1.1.3  mrg   const __v4si __zero = {0};
    388  1.1.1.3  mrg   const __vector __bool int __boolmask = vec_cmplt ((__v4si) __mask, __zero);
    389  1.1.1.3  mrg   return (__m128) vec_sel ((__v4su) __A, (__v4su) __B, (__v4su) __boolmask);
    390  1.1.1.3  mrg #endif
    391  1.1.1.3  mrg }
    392  1.1.1.3  mrg 
    393  1.1.1.3  mrg extern __inline __m128d
    394  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    395  1.1.1.3  mrg _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8)
    396  1.1.1.3  mrg {
    397  1.1.1.3  mrg   __v16qu __pcv[] =
    398  1.1.1.3  mrg     {
    399  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
    400  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
    401  1.1.1.3  mrg       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
    402  1.1.1.3  mrg       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }
    403  1.1.1.3  mrg     };
    404  1.1.1.3  mrg   __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
    405  1.1.1.3  mrg   return (__m128d) __r;
    406  1.1.1.3  mrg }
    407  1.1.1.3  mrg 
    408  1.1.1.3  mrg #ifdef _ARCH_PWR8
    409  1.1.1.3  mrg extern __inline __m128d
    410  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    411  1.1.1.3  mrg _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
    412  1.1.1.3  mrg {
    413  1.1.1.3  mrg #ifdef _ARCH_PWR10
    414  1.1.1.3  mrg   return (__m128d) vec_blendv ((__v2df) __A, (__v2df) __B, (__v2du) __mask);
    415  1.1.1.3  mrg #else
    416  1.1.1.3  mrg   const __v2di __zero = {0};
    417  1.1.1.3  mrg   const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero);
    418  1.1.1.3  mrg   return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask);
    419  1.1.1.3  mrg #endif
    420  1.1.1.3  mrg }
    421  1.1.1.3  mrg #endif
    422  1.1.1.3  mrg 
    423  1.1.1.3  mrg extern __inline int
    424  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    425  1.1.1.3  mrg _mm_testz_si128 (__m128i __A, __m128i __B)
    426  1.1.1.3  mrg {
    427  1.1.1.3  mrg   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
    428  1.1.1.3  mrg   const __v16qu __zero = {0};
    429  1.1.1.3  mrg   return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
    430  1.1.1.3  mrg }
    431  1.1.1.3  mrg 
    432  1.1.1.3  mrg extern __inline int
    433  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    434  1.1.1.3  mrg _mm_testc_si128 (__m128i __A, __m128i __B)
    435  1.1.1.3  mrg {
    436  1.1.1.3  mrg   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
    437  1.1.1.3  mrg   const __v16qu __zero = {0};
    438  1.1.1.3  mrg   const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
    439  1.1.1.3  mrg   return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
    440  1.1.1.3  mrg }
    441  1.1.1.3  mrg 
    442  1.1.1.3  mrg extern __inline int
    443  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    444  1.1.1.3  mrg _mm_testnzc_si128 (__m128i __A, __m128i __B)
    445  1.1.1.3  mrg {
    446  1.1.1.3  mrg   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
    447  1.1.1.3  mrg   return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
    448  1.1.1.3  mrg }
    449  1.1.1.3  mrg 
    450  1.1.1.3  mrg #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
    451  1.1.1.3  mrg 
    452  1.1.1.3  mrg #define _mm_test_all_ones(V) \
    453  1.1.1.3  mrg   _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
    454  1.1.1.3  mrg 
    455  1.1.1.3  mrg #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
    456  1.1.1.3  mrg 
    457  1.1.1.3  mrg #ifdef _ARCH_PWR8
    458  1.1.1.3  mrg extern __inline __m128i
    459  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    460  1.1.1.3  mrg _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
    461  1.1.1.3  mrg {
    462  1.1.1.3  mrg   return (__m128i) vec_cmpeq ((__v2di) __X, (__v2di) __Y);
    463  1.1.1.3  mrg }
    464  1.1.1.3  mrg #endif
    465  1.1.1.3  mrg 
    466  1.1.1.3  mrg extern __inline __m128i
    467  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    468  1.1.1.3  mrg _mm_min_epi8 (__m128i __X, __m128i __Y)
    469  1.1.1.3  mrg {
    470  1.1.1.3  mrg   return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y);
    471  1.1.1.3  mrg }
    472  1.1.1.3  mrg 
    473  1.1.1.3  mrg extern __inline __m128i
    474  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    475  1.1.1.3  mrg _mm_min_epu16 (__m128i __X, __m128i __Y)
    476  1.1.1.3  mrg {
    477  1.1.1.3  mrg   return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y);
    478      1.1  mrg }
    479      1.1  mrg 
    480  1.1.1.3  mrg extern __inline __m128i
    481  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    482  1.1.1.3  mrg _mm_min_epi32 (__m128i __X, __m128i __Y)
    483  1.1.1.3  mrg {
    484  1.1.1.3  mrg   return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y);
    485  1.1.1.3  mrg }
    486  1.1.1.3  mrg 
    487  1.1.1.3  mrg extern __inline __m128i
    488  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    489  1.1.1.3  mrg _mm_min_epu32 (__m128i __X, __m128i __Y)
    490  1.1.1.3  mrg {
    491  1.1.1.3  mrg   return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y);
    492  1.1.1.3  mrg }
    493  1.1.1.3  mrg 
    494  1.1.1.3  mrg extern __inline __m128i
    495  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    496  1.1.1.3  mrg _mm_max_epi8 (__m128i __X, __m128i __Y)
    497  1.1.1.3  mrg {
    498  1.1.1.3  mrg   return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y);
    499  1.1.1.3  mrg }
    500  1.1.1.3  mrg 
    501  1.1.1.3  mrg extern __inline __m128i
    502  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    503  1.1.1.3  mrg _mm_max_epu16 (__m128i __X, __m128i __Y)
    504  1.1.1.3  mrg {
    505  1.1.1.3  mrg   return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y);
    506  1.1.1.3  mrg }
    507  1.1.1.3  mrg 
    508  1.1.1.3  mrg extern __inline __m128i
    509  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    510  1.1.1.3  mrg _mm_max_epi32 (__m128i __X, __m128i __Y)
    511  1.1.1.3  mrg {
    512  1.1.1.3  mrg   return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y);
    513  1.1.1.3  mrg }
    514  1.1.1.3  mrg 
    515  1.1.1.3  mrg extern __inline __m128i
    516  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    517  1.1.1.3  mrg _mm_max_epu32 (__m128i __X, __m128i __Y)
    518  1.1.1.3  mrg {
    519  1.1.1.3  mrg   return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
    520  1.1.1.3  mrg }
    521  1.1.1.3  mrg 
    522  1.1.1.3  mrg extern __inline __m128i
    523  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    524  1.1.1.3  mrg _mm_mullo_epi32 (__m128i __X, __m128i __Y)
    525  1.1.1.3  mrg {
    526  1.1.1.3  mrg   return (__m128i) vec_mul ((__v4su) __X, (__v4su) __Y);
    527  1.1.1.3  mrg }
    528  1.1.1.3  mrg 
    529  1.1.1.3  mrg #ifdef _ARCH_PWR8
    530  1.1.1.3  mrg extern __inline __m128i
    531  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    532  1.1.1.3  mrg _mm_mul_epi32 (__m128i __X, __m128i __Y)
    533  1.1.1.3  mrg {
    534  1.1.1.3  mrg   return (__m128i) vec_mule ((__v4si) __X, (__v4si) __Y);
    535  1.1.1.3  mrg }
    536  1.1.1.3  mrg #endif
    537  1.1.1.3  mrg 
    538  1.1.1.3  mrg extern __inline __m128i
    539  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    540  1.1.1.3  mrg _mm_cvtepi8_epi16 (__m128i __A)
    541  1.1.1.3  mrg {
    542  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v16qi) __A);
    543  1.1.1.3  mrg }
    544  1.1.1.3  mrg 
    545  1.1.1.3  mrg extern __inline __m128i
    546  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    547  1.1.1.3  mrg _mm_cvtepi8_epi32 (__m128i __A)
    548  1.1.1.3  mrg {
    549  1.1.1.3  mrg   __A = (__m128i) vec_unpackh ((__v16qi) __A);
    550  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v8hi) __A);
    551  1.1.1.3  mrg }
    552  1.1.1.3  mrg 
    553  1.1.1.3  mrg #ifdef _ARCH_PWR8
    554  1.1.1.3  mrg extern __inline __m128i
    555  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    556  1.1.1.3  mrg _mm_cvtepi8_epi64 (__m128i __A)
    557  1.1.1.3  mrg {
    558  1.1.1.3  mrg   __A = (__m128i) vec_unpackh ((__v16qi) __A);
    559  1.1.1.3  mrg   __A = (__m128i) vec_unpackh ((__v8hi) __A);
    560  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v4si) __A);
    561  1.1.1.3  mrg }
    562  1.1.1.3  mrg #endif
    563  1.1.1.3  mrg 
    564  1.1.1.3  mrg extern __inline __m128i
    565  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    566  1.1.1.3  mrg _mm_cvtepi16_epi32 (__m128i __A)
    567  1.1.1.3  mrg {
    568  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v8hi) __A);
    569  1.1.1.3  mrg }
    570  1.1.1.3  mrg 
    571  1.1.1.3  mrg #ifdef _ARCH_PWR8
    572  1.1.1.3  mrg extern __inline __m128i
    573  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    574  1.1.1.3  mrg _mm_cvtepi16_epi64 (__m128i __A)
    575  1.1.1.3  mrg {
    576  1.1.1.3  mrg   __A = (__m128i) vec_unpackh ((__v8hi) __A);
    577  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v4si) __A);
    578  1.1.1.3  mrg }
    579  1.1.1.3  mrg #endif
    580  1.1.1.3  mrg 
    581  1.1.1.3  mrg #ifdef _ARCH_PWR8
    582  1.1.1.3  mrg extern __inline __m128i
    583  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    584  1.1.1.3  mrg _mm_cvtepi32_epi64 (__m128i __A)
    585  1.1.1.3  mrg {
    586  1.1.1.3  mrg   return (__m128i) vec_unpackh ((__v4si) __A);
    587  1.1.1.3  mrg }
    588  1.1.1.3  mrg #endif
    589  1.1.1.3  mrg 
    590  1.1.1.3  mrg extern __inline __m128i
    591  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    592  1.1.1.3  mrg _mm_cvtepu8_epi16 (__m128i __A)
    593  1.1.1.3  mrg {
    594  1.1.1.3  mrg   const __v16qu __zero = {0};
    595  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    596  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
    597  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    598  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
    599  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    600  1.1.1.3  mrg   return __A;
    601  1.1.1.3  mrg }
    602  1.1.1.3  mrg 
    603  1.1.1.3  mrg extern __inline __m128i
    604  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    605  1.1.1.3  mrg _mm_cvtepu8_epi32 (__m128i __A)
    606  1.1.1.3  mrg {
    607  1.1.1.3  mrg   const __v16qu __zero = {0};
    608  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    609  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
    610  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
    611  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    612  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
    613  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
    614  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    615  1.1.1.3  mrg   return __A;
    616  1.1.1.3  mrg }
    617  1.1.1.3  mrg 
    618  1.1.1.3  mrg extern __inline __m128i
    619  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    620  1.1.1.3  mrg _mm_cvtepu8_epi64 (__m128i __A)
    621  1.1.1.3  mrg {
    622  1.1.1.3  mrg   const __v16qu __zero = {0};
    623  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    624  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
    625  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
    626  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
    627  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    628  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
    629  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
    630  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
    631  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    632  1.1.1.3  mrg   return __A;
    633  1.1.1.3  mrg }
    634  1.1.1.3  mrg 
    635  1.1.1.3  mrg extern __inline __m128i
    636  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    637  1.1.1.3  mrg _mm_cvtepu16_epi32 (__m128i __A)
    638  1.1.1.3  mrg {
    639  1.1.1.3  mrg   const __v8hu __zero = {0};
    640  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    641  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
    642  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    643  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
    644  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    645  1.1.1.3  mrg   return __A;
    646  1.1.1.3  mrg }
    647  1.1.1.3  mrg 
    648  1.1.1.3  mrg extern __inline __m128i
    649  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    650  1.1.1.3  mrg _mm_cvtepu16_epi64 (__m128i __A)
    651  1.1.1.3  mrg {
    652  1.1.1.3  mrg   const __v8hu __zero = {0};
    653  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    654  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
    655  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
    656  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    657  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
    658  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
    659  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    660  1.1.1.3  mrg   return __A;
    661  1.1.1.3  mrg }
    662  1.1.1.3  mrg 
    663  1.1.1.3  mrg extern __inline __m128i
    664  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    665  1.1.1.3  mrg _mm_cvtepu32_epi64 (__m128i __A)
    666  1.1.1.3  mrg {
    667  1.1.1.3  mrg   const __v4su __zero = {0};
    668  1.1.1.3  mrg #ifdef __LITTLE_ENDIAN__
    669  1.1.1.3  mrg   __A = (__m128i) vec_mergeh ((__v4su) __A, __zero);
    670  1.1.1.3  mrg #else /* __BIG_ENDIAN__.  */
    671  1.1.1.3  mrg   __A = (__m128i) vec_mergeh (__zero, (__v4su) __A);
    672  1.1.1.3  mrg #endif /* __BIG_ENDIAN__.  */
    673  1.1.1.3  mrg   return __A;
    674  1.1.1.3  mrg }
    675  1.1.1.3  mrg 
    676  1.1.1.3  mrg /* Return horizontal packed word minimum and its index in bits [15:0]
    677  1.1.1.3  mrg    and bits [18:16] respectively.  */
    678  1.1.1.3  mrg extern __inline __m128i
    679  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    680  1.1.1.3  mrg _mm_minpos_epu16 (__m128i __A)
    681  1.1.1.3  mrg {
    682  1.1.1.3  mrg   union __u
    683  1.1.1.3  mrg     {
    684  1.1.1.3  mrg       __m128i __m;
    685  1.1.1.3  mrg       __v8hu __uh;
    686  1.1.1.3  mrg     };
    687  1.1.1.3  mrg   union __u __u = { .__m = __A }, __r = { .__m = {0} };
    688  1.1.1.3  mrg   unsigned short __ridx = 0;
    689  1.1.1.3  mrg   unsigned short __rmin = __u.__uh[__ridx];
    690  1.1.1.3  mrg   unsigned long __i;
    691  1.1.1.3  mrg   for (__i = 1; __i < 8; __i++)
    692  1.1.1.3  mrg     {
    693  1.1.1.3  mrg       if (__u.__uh[__i] < __rmin)
    694  1.1.1.3  mrg 	{
    695  1.1.1.3  mrg 	  __rmin = __u.__uh[__i];
    696  1.1.1.3  mrg 	  __ridx = __i;
    697  1.1.1.3  mrg 	}
    698  1.1.1.3  mrg     }
    699  1.1.1.3  mrg   __r.__uh[0] = __rmin;
    700  1.1.1.3  mrg   __r.__uh[1] = __ridx;
    701  1.1.1.3  mrg   return __r.__m;
    702  1.1.1.3  mrg }
    703  1.1.1.3  mrg 
    704  1.1.1.3  mrg extern __inline __m128i
    705  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    706  1.1.1.3  mrg _mm_packus_epi32 (__m128i __X, __m128i __Y)
    707  1.1.1.3  mrg {
    708  1.1.1.3  mrg   return (__m128i) vec_packsu ((__v4si) __X, (__v4si) __Y);
    709  1.1.1.3  mrg }
    710  1.1.1.3  mrg 
    711  1.1.1.3  mrg #ifdef _ARCH_PWR8
    712  1.1.1.3  mrg extern __inline __m128i
    713  1.1.1.3  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    714  1.1.1.3  mrg _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
    715  1.1.1.3  mrg {
    716  1.1.1.3  mrg   return (__m128i) vec_cmpgt ((__v2di) __X, (__v2di) __Y);
    717  1.1.1.3  mrg }
    718  1.1.1.3  mrg #endif
    719  1.1.1.3  mrg 
    720      1.1  mrg #endif
    721