config/rs6000/pmmintrin.h

1.1.1.3  mrg /* Copyright (C) 2003-2022 Free Software Foundation, Inc.
    1.1  mrg
    1.1  mrg    This file is part of GCC.
    1.1  mrg
    1.1  mrg    GCC is free software; you can redistribute it and/or modify
    1.1  mrg    it under the terms of the GNU General Public License as published by
    1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
    1.1  mrg    any later version.
    1.1  mrg
    1.1  mrg    GCC is distributed in the hope that it will be useful,
    1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    1.1  mrg    GNU General Public License for more details.
    1.1  mrg
    1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
    1.1  mrg    permissions described in the GCC Runtime Library Exception, version
    1.1  mrg    3.1, as published by the Free Software Foundation.
    1.1  mrg
    1.1  mrg    You should have received a copy of the GNU General Public License and
    1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
    1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    1.1  mrg    <http://www.gnu.org/licenses/>.  */
    1.1  mrg
    1.1  mrg /* Implemented from the specification included in the Intel C++ Compiler
    1.1  mrg    User Guide and Reference, version 9.0.  */
    1.1  mrg
    1.1  mrg #ifndef NO_WARN_X86_INTRINSICS
    1.1  mrg /* This header is distributed to simplify porting x86_64 code that
    1.1  mrg    makes explicit use of Intel intrinsics to powerpc64le.
    1.1  mrg    It is the user's responsibility to determine if the results are
    1.1  mrg    acceptable and make additional changes as necessary.
    1.1  mrg    Note that much code that uses Intel intrinsics can be rewritten in
    1.1  mrg    standard C or GNU C extensions, which are more portable and better
    1.1  mrg    optimized across multiple targets.
    1.1  mrg
    1.1  mrg    In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
    1.1  mrg    is a good match for most SIMD operations.  However the Horizontal
    1.1  mrg    add/sub requires the data pairs be permuted into a separate
    1.1  mrg    registers with vertical even/odd alignment for the operation.
    1.1  mrg    And the addsub operation requires the sign of only the even numbered
    1.1  mrg    elements be flipped (xored with -0.0).
    1.1  mrg    For larger blocks of code using these intrinsic implementations,
    1.1  mrg    the compiler be should be able to schedule instructions to avoid
    1.1  mrg    additional latency.
    1.1  mrg
    1.1  mrg    In the specific case of the monitor and mwait instructions there are
    1.1  mrg    no direct equivalent in the PowerISA at this time.  So those
    1.1  mrg    intrinsics are not implemented.  */
    1.1  mrg #error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
    1.1  mrg #endif
    1.1  mrg
    1.1  mrg #ifndef _PMMINTRIN_H_INCLUDED
    1.1  mrg #define _PMMINTRIN_H_INCLUDED
    1.1  mrg
    1.1  mrg /* We need definitions from the SSE2 and SSE header files*/
    1.1  mrg #include <emmintrin.h>
    1.1  mrg
    1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_addsub_ps (__m128 __X, __m128 __Y)
    1.1  mrg {
1.1.1.2  mrg   const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0};
1.1.1.2  mrg   __v4sf __even_neg_Y = vec_xor(__Y, __even_n0);
1.1.1.2  mrg   return (__m128) vec_add (__X, __even_neg_Y);
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_addsub_pd (__m128d __X, __m128d __Y)
    1.1  mrg {
1.1.1.2  mrg   const __v2df __even_n0 = {-0.0, 0.0};
1.1.1.2  mrg   __v2df __even_neg_Y = vec_xor(__Y, __even_n0);
1.1.1.2  mrg   return (__m128d) vec_add (__X, __even_neg_Y);
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_hadd_ps (__m128 __X, __m128 __Y)
    1.1  mrg {
1.1.1.2  mrg   __vector unsigned char __xform2 = {
    1.1  mrg       0x00, 0x01, 0x02, 0x03,
    1.1  mrg       0x08, 0x09, 0x0A, 0x0B,
    1.1  mrg       0x10, 0x11, 0x12, 0x13,
    1.1  mrg       0x18, 0x19, 0x1A, 0x1B
    1.1  mrg     };
1.1.1.2  mrg   __vector unsigned char __xform1 = {
    1.1  mrg       0x04, 0x05, 0x06, 0x07,
    1.1  mrg       0x0C, 0x0D, 0x0E, 0x0F,
    1.1  mrg       0x14, 0x15, 0x16, 0x17,
    1.1  mrg       0x1C, 0x1D, 0x1E, 0x1F
    1.1  mrg     };
1.1.1.2  mrg   return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
1.1.1.2  mrg 			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_hsub_ps (__m128 __X, __m128 __Y)
    1.1  mrg {
1.1.1.2  mrg   __vector unsigned char __xform2 = {
    1.1  mrg       0x00, 0x01, 0x02, 0x03,
    1.1  mrg       0x08, 0x09, 0x0A, 0x0B,
    1.1  mrg       0x10, 0x11, 0x12, 0x13,
    1.1  mrg       0x18, 0x19, 0x1A, 0x1B
    1.1  mrg     };
1.1.1.2  mrg   __vector unsigned char __xform1 = {
    1.1  mrg       0x04, 0x05, 0x06, 0x07,
    1.1  mrg       0x0C, 0x0D, 0x0E, 0x0F,
    1.1  mrg       0x14, 0x15, 0x16, 0x17,
    1.1  mrg       0x1C, 0x1D, 0x1E, 0x1F
    1.1  mrg     };
1.1.1.2  mrg   return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
1.1.1.2  mrg 			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_hadd_pd (__m128d __X, __m128d __Y)
    1.1  mrg {
    1.1  mrg   return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
    1.1  mrg 				  vec_mergel ((__v2df) __X, (__v2df)__Y));
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_hsub_pd (__m128d __X, __m128d __Y)
    1.1  mrg {
    1.1  mrg   return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
    1.1  mrg 			    vec_mergel ((__v2df) __X, (__v2df)__Y));
    1.1  mrg }
    1.1  mrg
1.1.1.3  mrg #ifdef _ARCH_PWR8
    1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_movehdup_ps (__m128 __X)
    1.1  mrg {
    1.1  mrg   return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
    1.1  mrg }
1.1.1.3  mrg #endif
    1.1  mrg
1.1.1.3  mrg #ifdef _ARCH_PWR8
    1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_moveldup_ps (__m128 __X)
    1.1  mrg {
    1.1  mrg   return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
    1.1  mrg }
1.1.1.3  mrg #endif
    1.1  mrg
    1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_loaddup_pd (double const *__P)
    1.1  mrg {
    1.1  mrg   return (__m128d) vec_splats (*__P);
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_movedup_pd (__m128d __X)
    1.1  mrg {
    1.1  mrg   return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
    1.1  mrg }
    1.1  mrg
    1.1  mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1.1  mrg _mm_lddqu_si128 (__m128i const *__P)
    1.1  mrg {
    1.1  mrg   return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
    1.1  mrg }
    1.1  mrg
    1.1  mrg /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait.  */
    1.1  mrg
    1.1  mrg #endif /* _PMMINTRIN_H_INCLUDED */