Home | History | Annotate | Line # | Download | only in rs6000
      1  1.1.1.3  mrg /* Copyright (C) 2003-2022 Free Software Foundation, Inc.
      2      1.1  mrg 
      3      1.1  mrg    This file is part of GCC.
      4      1.1  mrg 
      5      1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6      1.1  mrg    it under the terms of the GNU General Public License as published by
      7      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8      1.1  mrg    any later version.
      9      1.1  mrg 
     10      1.1  mrg    GCC is distributed in the hope that it will be useful,
     11      1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13      1.1  mrg    GNU General Public License for more details.
     14      1.1  mrg 
     15      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17      1.1  mrg    3.1, as published by the Free Software Foundation.
     18      1.1  mrg 
     19      1.1  mrg    You should have received a copy of the GNU General Public License and
     20      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23      1.1  mrg 
     24      1.1  mrg /* Implemented from the specification included in the Intel C++ Compiler
     25      1.1  mrg    User Guide and Reference, version 9.0.  */
     26      1.1  mrg 
     27      1.1  mrg #ifndef NO_WARN_X86_INTRINSICS
     28      1.1  mrg /* This header is distributed to simplify porting x86_64 code that
     29      1.1  mrg    makes explicit use of Intel intrinsics to powerpc64le.
     30      1.1  mrg    It is the user's responsibility to determine if the results are
     31      1.1  mrg    acceptable and make additional changes as necessary.
     32      1.1  mrg    Note that much code that uses Intel intrinsics can be rewritten in
     33      1.1  mrg    standard C or GNU C extensions, which are more portable and better
     34      1.1  mrg    optimized across multiple targets.
     35      1.1  mrg 
     36      1.1  mrg    In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
     37      1.1  mrg    is a good match for most SIMD operations.  However the Horizontal
     38      1.1  mrg    add/sub requires the data pairs be permuted into a separate
     39      1.1  mrg    registers with vertical even/odd alignment for the operation.
     40      1.1  mrg    And the addsub operation requires the sign of only the even numbered
     41      1.1  mrg    elements be flipped (xored with -0.0).
     42      1.1  mrg    For larger blocks of code using these intrinsic implementations,
     43      1.1  mrg    the compiler be should be able to schedule instructions to avoid
     44      1.1  mrg    additional latency.
     45      1.1  mrg 
     46      1.1  mrg    In the specific case of the monitor and mwait instructions there are
     47      1.1  mrg    no direct equivalent in the PowerISA at this time.  So those
     48      1.1  mrg    intrinsics are not implemented.  */
     49      1.1  mrg #error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
     50      1.1  mrg #endif
     51      1.1  mrg 
     52      1.1  mrg #ifndef _PMMINTRIN_H_INCLUDED
     53      1.1  mrg #define _PMMINTRIN_H_INCLUDED
     54      1.1  mrg 
     55      1.1  mrg /* We need definitions from the SSE2 and SSE header files*/
     56      1.1  mrg #include <emmintrin.h>
     57      1.1  mrg 
     58      1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     59      1.1  mrg _mm_addsub_ps (__m128 __X, __m128 __Y)
     60      1.1  mrg {
     61  1.1.1.2  mrg   const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0};
     62  1.1.1.2  mrg   __v4sf __even_neg_Y = vec_xor(__Y, __even_n0);
     63  1.1.1.2  mrg   return (__m128) vec_add (__X, __even_neg_Y);
     64      1.1  mrg }
     65      1.1  mrg 
     66      1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     67      1.1  mrg _mm_addsub_pd (__m128d __X, __m128d __Y)
     68      1.1  mrg {
     69  1.1.1.2  mrg   const __v2df __even_n0 = {-0.0, 0.0};
     70  1.1.1.2  mrg   __v2df __even_neg_Y = vec_xor(__Y, __even_n0);
     71  1.1.1.2  mrg   return (__m128d) vec_add (__X, __even_neg_Y);
     72      1.1  mrg }
     73      1.1  mrg 
     74      1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     75      1.1  mrg _mm_hadd_ps (__m128 __X, __m128 __Y)
     76      1.1  mrg {
     77  1.1.1.2  mrg   __vector unsigned char __xform2 = {
     78      1.1  mrg       0x00, 0x01, 0x02, 0x03,
     79      1.1  mrg       0x08, 0x09, 0x0A, 0x0B,
     80      1.1  mrg       0x10, 0x11, 0x12, 0x13,
     81      1.1  mrg       0x18, 0x19, 0x1A, 0x1B
     82      1.1  mrg     };
     83  1.1.1.2  mrg   __vector unsigned char __xform1 = {
     84      1.1  mrg       0x04, 0x05, 0x06, 0x07,
     85      1.1  mrg       0x0C, 0x0D, 0x0E, 0x0F,
     86      1.1  mrg       0x14, 0x15, 0x16, 0x17,
     87      1.1  mrg       0x1C, 0x1D, 0x1E, 0x1F
     88      1.1  mrg     };
     89  1.1.1.2  mrg   return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
     90  1.1.1.2  mrg 			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
     91      1.1  mrg }
     92      1.1  mrg 
     93      1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     94      1.1  mrg _mm_hsub_ps (__m128 __X, __m128 __Y)
     95      1.1  mrg {
     96  1.1.1.2  mrg   __vector unsigned char __xform2 = {
     97      1.1  mrg       0x00, 0x01, 0x02, 0x03,
     98      1.1  mrg       0x08, 0x09, 0x0A, 0x0B,
     99      1.1  mrg       0x10, 0x11, 0x12, 0x13,
    100      1.1  mrg       0x18, 0x19, 0x1A, 0x1B
    101      1.1  mrg     };
    102  1.1.1.2  mrg   __vector unsigned char __xform1 = {
    103      1.1  mrg       0x04, 0x05, 0x06, 0x07,
    104      1.1  mrg       0x0C, 0x0D, 0x0E, 0x0F,
    105      1.1  mrg       0x14, 0x15, 0x16, 0x17,
    106      1.1  mrg       0x1C, 0x1D, 0x1E, 0x1F
    107      1.1  mrg     };
    108  1.1.1.2  mrg   return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
    109  1.1.1.2  mrg 			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
    110      1.1  mrg }
    111      1.1  mrg 
    112      1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    113      1.1  mrg _mm_hadd_pd (__m128d __X, __m128d __Y)
    114      1.1  mrg {
    115      1.1  mrg   return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
    116      1.1  mrg 				  vec_mergel ((__v2df) __X, (__v2df)__Y));
    117      1.1  mrg }
    118      1.1  mrg 
    119      1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    120      1.1  mrg _mm_hsub_pd (__m128d __X, __m128d __Y)
    121      1.1  mrg {
    122      1.1  mrg   return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
    123      1.1  mrg 			    vec_mergel ((__v2df) __X, (__v2df)__Y));
    124      1.1  mrg }
    125      1.1  mrg 
    126  1.1.1.3  mrg #ifdef _ARCH_PWR8
    127      1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    128      1.1  mrg _mm_movehdup_ps (__m128 __X)
    129      1.1  mrg {
    130      1.1  mrg   return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
    131      1.1  mrg }
    132  1.1.1.3  mrg #endif
    133      1.1  mrg 
    134  1.1.1.3  mrg #ifdef _ARCH_PWR8
    135      1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    136      1.1  mrg _mm_moveldup_ps (__m128 __X)
    137      1.1  mrg {
    138      1.1  mrg   return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
    139      1.1  mrg }
    140  1.1.1.3  mrg #endif
    141      1.1  mrg 
    142      1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    143      1.1  mrg _mm_loaddup_pd (double const *__P)
    144      1.1  mrg {
    145      1.1  mrg   return (__m128d) vec_splats (*__P);
    146      1.1  mrg }
    147      1.1  mrg 
    148      1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    149      1.1  mrg _mm_movedup_pd (__m128d __X)
    150      1.1  mrg {
    151      1.1  mrg   return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
    152      1.1  mrg }
    153      1.1  mrg 
    154      1.1  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    155      1.1  mrg _mm_lddqu_si128 (__m128i const *__P)
    156      1.1  mrg {
    157      1.1  mrg   return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
    158      1.1  mrg }
    159      1.1  mrg 
    160      1.1  mrg /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait.  */
    161      1.1  mrg 
    162      1.1  mrg #endif /* _PMMINTRIN_H_INCLUDED */
    163