Home | History | Annotate | Line # | Download | only in rs6000
tmmintrin.h revision 1.1.1.1
      1 /* Copyright (C) 2003-2019 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 /* Implemented from the specification included in the Intel C++ Compiler
     25    User Guide and Reference, version 9.0.  */
     26 
     27 #ifndef NO_WARN_X86_INTRINSICS
     28 /* This header is distributed to simplify porting x86_64 code that
     29    makes explicit use of Intel intrinsics to powerpc64le.
     30    It is the user's responsibility to determine if the results are
     31    acceptable and make additional changes as necessary.
     32    Note that much code that uses Intel intrinsics can be rewritten in
     33    standard C or GNU C extensions, which are more portable and better
     34    optimized across multiple targets.  */
     35 #endif
     36 
     37 #ifndef TMMINTRIN_H_
     38 #define TMMINTRIN_H_
     39 
     40 #include <altivec.h>
     41 #include <assert.h>
     42 
     43 /* We need definitions from the SSE header files.  */
     44 #include <pmmintrin.h>
     45 
     46 extern __inline __m128i
     47 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     48 _mm_abs_epi16 (__m128i __A)
     49 {
     50   return (__m128i) vec_abs ((__v8hi) __A);
     51 }
     52 
     53 extern __inline __m128i
     54 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     55 _mm_abs_epi32 (__m128i __A)
     56 {
     57   return (__m128i) vec_abs ((__v4si) __A);
     58 }
     59 
     60 extern __inline __m128i
     61 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     62 _mm_abs_epi8 (__m128i __A)
     63 {
     64   return (__m128i) vec_abs ((__v16qi) __A);
     65 }
     66 
     67 extern __inline __m64
     68 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     69 _mm_abs_pi16 (__m64 __A)
     70 {
     71   __v8hi __B = (__v8hi) (__v2du) { __A, __A };
     72   return (__m64) ((__v2du) vec_abs (__B))[0];
     73 }
     74 
     75 extern __inline __m64
     76 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     77 _mm_abs_pi32 (__m64 __A)
     78 {
     79   __v4si __B = (__v4si) (__v2du) { __A, __A };
     80   return (__m64) ((__v2du) vec_abs (__B))[0];
     81 }
     82 
     83 extern __inline __m64
     84 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85 _mm_abs_pi8 (__m64 __A)
     86 {
     87   __v16qi __B = (__v16qi) (__v2du) { __A, __A };
     88   return (__m64) ((__v2du) vec_abs (__B))[0];
     89 }
     90 
     91 extern __inline __m128i
     92 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     93 _mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
     94 {
     95   if (__builtin_constant_p (__count) && __count < 16)
     96     {
     97 #ifdef __LITTLE_ENDIAN__
     98       __A = (__m128i) vec_reve ((__v16qu) __A);
     99       __B = (__m128i) vec_reve ((__v16qu) __B);
    100 #endif
    101       __A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
    102 #ifdef __LITTLE_ENDIAN__
    103       __A = (__m128i) vec_reve ((__v16qu) __A);
    104 #endif
    105       return __A;
    106     }
    107 
    108   if (__count == 0)
    109     return __B;
    110 
    111   if (__count >= 16)
    112     {
    113       if (__count >= 32)
    114 	{
    115 	  const __v16qu zero = { 0 };
    116 	  return (__m128i) zero;
    117 	}
    118       else
    119 	{
    120 	  const __v16qu __shift =
    121 	    vec_splats ((unsigned char) ((__count - 16) * 8));
    122 #ifdef __LITTLE_ENDIAN__
    123 	  return (__m128i) vec_sro ((__v16qu) __A, __shift);
    124 #else
    125 	  return (__m128i) vec_slo ((__v16qu) __A, __shift);
    126 #endif
    127 	}
    128     }
    129   else
    130     {
    131       const __v16qu __shiftA =
    132 	vec_splats ((unsigned char) ((16 - __count) * 8));
    133       const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
    134 #ifdef __LITTLE_ENDIAN__
    135       __A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
    136       __B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
    137 #else
    138       __A = (__m128i) vec_sro ((__v16qu) __A, __shiftA);
    139       __B = (__m128i) vec_slo ((__v16qu) __B, __shiftB);
    140 #endif
    141       return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B);
    142     }
    143 }
    144 
    145 extern __inline __m64
    146 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    147 _mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count)
    148 {
    149   if (__count < 16)
    150     {
    151       __v2du __C = { __B, __A };
    152 #ifdef __LITTLE_ENDIAN__
    153       const __v4su __shift = { __count << 3, 0, 0, 0 };
    154       __C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
    155 #else
    156       const __v4su __shift = { 0, 0, 0, __count << 3 };
    157       __C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift);
    158 #endif
    159       return (__m64) __C[0];
    160     }
    161   else
    162     {
    163       const __m64 __zero = { 0 };
    164       return __zero;
    165     }
    166 }
    167 
    168 extern __inline __m128i
    169 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    170 _mm_hadd_epi16 (__m128i __A, __m128i __B)
    171 {
    172   const __v16qu __P =
    173     {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
    174   const __v16qu __Q =
    175     {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
    176   __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
    177   __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
    178   return (__m128i) vec_add (__C, __D);
    179 }
    180 
    181 extern __inline __m128i
    182 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    183 _mm_hadd_epi32 (__m128i __A, __m128i __B)
    184 {
    185   const __v16qu __P =
    186     {  0,  1,  2,  3,  8,  9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
    187   const __v16qu __Q =
    188     {  4,  5,  6,  7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
    189   __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
    190   __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
    191   return (__m128i) vec_add (__C, __D);
    192 }
    193 
    194 extern __inline __m64
    195 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    196 _mm_hadd_pi16 (__m64 __A, __m64 __B)
    197 {
    198   __v8hi __C = (__v8hi) (__v2du) { __A, __B };
    199   const __v16qu __P =
    200     {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
    201   const __v16qu __Q =
    202     {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
    203   __v8hi __D = vec_perm (__C, __C, __Q);
    204   __C = vec_perm (__C, __C, __P);
    205   __C = vec_add (__C, __D);
    206   return (__m64) ((__v2du) __C)[1];
    207 }
    208 
    209 extern __inline __m64
    210 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    211 _mm_hadd_pi32 (__m64 __A, __m64 __B)
    212 {
    213   __v4si __C = (__v4si) (__v2du) { __A, __B };
    214   const __v16qu __P =
    215     {  0,  1,  2,  3,  8,  9, 10, 11,  0,  1,  2,  3,  8,  9, 10, 11 };
    216   const __v16qu __Q =
    217     {  4,  5,  6,  7, 12, 13, 14, 15,  4,  5,  6,  7, 12, 13, 14, 15 };
    218   __v4si __D = vec_perm (__C, __C, __Q);
    219   __C = vec_perm (__C, __C, __P);
    220   __C = vec_add (__C, __D);
    221   return (__m64) ((__v2du) __C)[1];
    222 }
    223 
    224 extern __inline __m128i
    225 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    226 _mm_hadds_epi16 (__m128i __A, __m128i __B)
    227 {
    228   __v4si __C = { 0 }, __D = { 0 };
    229   __C = vec_sum4s ((__v8hi) __A, __C);
    230   __D = vec_sum4s ((__v8hi) __B, __D);
    231   __C = (__v4si) vec_packs (__C, __D);
    232   return (__m128i) __C;
    233 }
    234 
    235 extern __inline __m64
    236 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    237 _mm_hadds_pi16 (__m64 __A, __m64 __B)
    238 {
    239   const __v4si __zero = { 0 };
    240   __v8hi __C = (__v8hi) (__v2du) { __A, __B };
    241   __v4si __D = vec_sum4s (__C, __zero);
    242   __C = vec_packs (__D, __D);
    243   return (__m64) ((__v2du) __C)[1];
    244 }
    245 
    246 extern __inline __m128i
    247 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    248 _mm_hsub_epi16 (__m128i __A, __m128i __B)
    249 {
    250   const __v16qu __P =
    251     {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
    252   const __v16qu __Q =
    253     {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
    254   __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
    255   __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
    256   return (__m128i) vec_sub (__C, __D);
    257 }
    258 
    259 extern __inline __m128i
    260 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    261 _mm_hsub_epi32 (__m128i __A, __m128i __B)
    262 {
    263   const __v16qu __P =
    264     {  0,  1,  2,  3,  8,  9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
    265   const __v16qu __Q =
    266     {  4,  5,  6,  7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
    267   __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
    268   __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
    269   return (__m128i) vec_sub (__C, __D);
    270 }
    271 
    272 extern __inline __m64
    273 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    274 _mm_hsub_pi16 (__m64 __A, __m64 __B)
    275 {
    276   const __v16qu __P =
    277     {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
    278   const __v16qu __Q =
    279     {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
    280   __v8hi __C = (__v8hi) (__v2du) { __A, __B };
    281   __v8hi __D = vec_perm (__C, __C, __Q);
    282   __C = vec_perm (__C, __C, __P);
    283   __C = vec_sub (__C, __D);
    284   return (__m64) ((__v2du) __C)[1];
    285 }
    286 
    287 extern __inline __m64
    288 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    289 _mm_hsub_pi32 (__m64 __A, __m64 __B)
    290 {
    291   const __v16qu __P =
    292     {  0,  1,  2,  3,  8,  9, 10, 11,  0,  1,  2,  3,  8,  9, 10, 11 };
    293   const __v16qu __Q =
    294     {  4,  5,  6,  7, 12, 13, 14, 15,  4,  5,  6,  7, 12, 13, 14, 15 };
    295   __v4si __C = (__v4si) (__v2du) { __A, __B };
    296   __v4si __D = vec_perm (__C, __C, __Q);
    297   __C = vec_perm (__C, __C, __P);
    298   __C = vec_sub (__C, __D);
    299   return (__m64) ((__v2du) __C)[1];
    300 }
    301 
    302 extern __inline __m128i
    303 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    304 _mm_hsubs_epi16 (__m128i __A, __m128i __B)
    305 {
    306   const __v16qu __P =
    307     {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
    308   const __v16qu __Q =
    309     {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
    310   __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
    311   __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
    312   return (__m128i) vec_subs (__C, __D);
    313 }
    314 
    315 extern __inline __m64
    316 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    317 _mm_hsubs_pi16 (__m64 __A, __m64 __B)
    318 {
    319   const __v16qu __P =
    320     {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
    321   const __v16qu __Q =
    322     {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
    323   __v8hi __C = (__v8hi) (__v2du) { __A, __B };
    324   __v8hi __D = vec_perm (__C, __C, __P);
    325   __v8hi __E = vec_perm (__C, __C, __Q);
    326   __C = vec_subs (__D, __E);
    327   return (__m64) ((__v2du) __C)[1];
    328 }
    329 
    330 extern __inline __m128i
    331 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    332 _mm_shuffle_epi8 (__m128i __A, __m128i __B)
    333 {
    334   const __v16qi __zero = { 0 };
    335   __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
    336   __v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
    337   return (__m128i) vec_sel (__C, __zero, __select);
    338 }
    339 
    340 extern __inline __m64
    341 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    342 _mm_shuffle_pi8 (__m64 __A, __m64 __B)
    343 {
    344   const __v16qi __zero = { 0 };
    345   __v16qi __C = (__v16qi) (__v2du) { __A, __A };
    346   __v16qi __D = (__v16qi) (__v2du) { __B, __B };
    347   __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
    348   __C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
    349   __C = vec_sel (__C, __zero, __select);
    350   return (__m64) ((__v2du) (__C))[0];
    351 }
    352 
    353 extern __inline __m128i
    354 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    355 _mm_sign_epi8 (__m128i __A, __m128i __B)
    356 {
    357   const __v16qi __zero = { 0 };
    358   __v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero);
    359   __v16qi __selectpos =
    360     (__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero));
    361   __v16qi __conv = vec_add (__selectneg, __selectpos);
    362   return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
    363 }
    364 
    365 extern __inline __m128i
    366 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    367 _mm_sign_epi16 (__m128i __A, __m128i __B)
    368 {
    369   const __v8hi __zero = { 0 };
    370   __v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero);
    371   __v8hi __selectpos =
    372     (__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero));
    373   __v8hi __conv = vec_add (__selectneg, __selectpos);
    374   return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
    375 }
    376 
    377 extern __inline __m128i
    378 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    379 _mm_sign_epi32 (__m128i __A, __m128i __B)
    380 {
    381   const __v4si __zero = { 0 };
    382   __v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero);
    383   __v4si __selectpos =
    384     (__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero));
    385   __v4si __conv = vec_add (__selectneg, __selectpos);
    386   return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
    387 }
    388 
    389 extern __inline __m64
    390 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    391 _mm_sign_pi8 (__m64 __A, __m64 __B)
    392 {
    393   const __v16qi __zero = { 0 };
    394   __v16qi __C = (__v16qi) (__v2du) { __A, __A };
    395   __v16qi __D = (__v16qi) (__v2du) { __B, __B };
    396   __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
    397   return (__m64) ((__v2du) (__C))[0];
    398 }
    399 
    400 extern __inline __m64
    401 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    402 _mm_sign_pi16 (__m64 __A, __m64 __B)
    403 {
    404   const __v8hi __zero = { 0 };
    405   __v8hi __C = (__v8hi) (__v2du) { __A, __A };
    406   __v8hi __D = (__v8hi) (__v2du) { __B, __B };
    407   __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
    408   return (__m64) ((__v2du) (__C))[0];
    409 }
    410 
    411 extern __inline __m64
    412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    413 _mm_sign_pi32 (__m64 __A, __m64 __B)
    414 {
    415   const __v4si __zero = { 0 };
    416   __v4si __C = (__v4si) (__v2du) { __A, __A };
    417   __v4si __D = (__v4si) (__v2du) { __B, __B };
    418   __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
    419   return (__m64) ((__v2du) (__C))[0];
    420 }
    421 
    422 extern __inline __m128i
    423 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    424 _mm_maddubs_epi16 (__m128i __A, __m128i __B)
    425 {
    426   __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
    427   __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
    428   __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
    429   __v8hi __E = vec_unpackh ((__v16qi) __B);
    430   __v8hi __F = vec_unpackl ((__v16qi) __B);
    431   __C = vec_mul (__C, __E);
    432   __D = vec_mul (__D, __F);
    433   const __v16qu __odds  =
    434     {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
    435   const __v16qu __evens =
    436     {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
    437   __E = vec_perm (__C, __D, __odds);
    438   __F = vec_perm (__C, __D, __evens);
    439   return (__m128i) vec_adds (__E, __F);
    440 }
    441 
    442 extern __inline __m64
    443 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    444 _mm_maddubs_pi16 (__m64 __A, __m64 __B)
    445 {
    446   __v8hi __C = (__v8hi) (__v2du) { __A, __A };
    447   __C = vec_unpackl ((__v16qi) __C);
    448   const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
    449   __C = vec_and (__C, __unsigned);
    450   __v8hi __D = (__v8hi) (__v2du) { __B, __B };
    451   __D = vec_unpackl ((__v16qi) __D);
    452   __D = vec_mul (__C, __D);
    453   const __v16qu __odds  =
    454     {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
    455   const __v16qu __evens =
    456     {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
    457   __C = vec_perm (__D, __D, __odds);
    458   __D = vec_perm (__D, __D, __evens);
    459   __C = vec_adds (__C, __D);
    460   return (__m64) ((__v2du) (__C))[0];
    461 }
    462 
    463 extern __inline __m128i
    464 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    465 _mm_mulhrs_epi16 (__m128i __A, __m128i __B)
    466 {
    467   __v4si __C = vec_unpackh ((__v8hi) __A);
    468   __v4si __D = vec_unpackh ((__v8hi) __B);
    469   __C = vec_mul (__C, __D);
    470   __D = vec_unpackl ((__v8hi) __A);
    471   __v4si __E = vec_unpackl ((__v8hi) __B);
    472   __D = vec_mul (__D, __E);
    473   const __v4su __shift = vec_splats ((unsigned int) 14);
    474   __C = vec_sr (__C, __shift);
    475   __D = vec_sr (__D, __shift);
    476   const __v4si __ones = vec_splats ((signed int) 1);
    477   __C = vec_add (__C, __ones);
    478   __C = vec_sr (__C, (__v4su) __ones);
    479   __D = vec_add (__D, __ones);
    480   __D = vec_sr (__D, (__v4su) __ones);
    481   return (__m128i) vec_pack (__C, __D);
    482 }
    483 
    484 extern __inline __m64
    485 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    486 _mm_mulhrs_pi16 (__m64 __A, __m64 __B)
    487 {
    488   __v4si __C = (__v4si) (__v2du) { __A, __A };
    489   __C = vec_unpackh ((__v8hi) __C);
    490   __v4si __D = (__v4si) (__v2du) { __B, __B };
    491   __D = vec_unpackh ((__v8hi) __D);
    492   __C = vec_mul (__C, __D);
    493   const __v4su __shift = vec_splats ((unsigned int) 14);
    494   __C = vec_sr (__C, __shift);
    495   const __v4si __ones = vec_splats ((signed int) 1);
    496   __C = vec_add (__C, __ones);
    497   __C = vec_sr (__C, (__v4su) __ones);
    498   __v8hi __E = vec_pack (__C, __D);
    499   return (__m64) ((__v2du) (__E))[0];
    500 }
    501 
    502 #endif
    503