Home | History | Annotate | Line # | Download | only in i386
avx512erintrin.h revision 1.1.1.1.4.2
      1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512ERINTRIN_H_INCLUDED
     29 #define _AVX512ERINTRIN_H_INCLUDED
     30 
     31 #ifndef __AVX512ER__
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512er")
     34 #define __DISABLE_AVX512ER__
     35 #endif /* __AVX512ER__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
     39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
     40 
     41 /* The Intel API is flexible enough that we must allow aliasing with other
     42    vector types, and their scalar components.  */
     43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
     44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
     45 
     46 typedef unsigned char  __mmask8;
     47 typedef unsigned short __mmask16;
     48 
     49 #ifdef __OPTIMIZE__
     50 extern __inline __m512d
     51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
     53 {
     54   __m512d __W;
     55   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     56 					       (__v8df) __W,
     57 					       (__mmask8) -1, __R);
     58 }
     59 
     60 extern __inline __m512d
     61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
     63 {
     64   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     65 					       (__v8df) __W,
     66 					       (__mmask8) __U, __R);
     67 }
     68 
     69 extern __inline __m512d
     70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
     72 {
     73   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     74 					       (__v8df) _mm512_setzero_pd (),
     75 					       (__mmask8) __U, __R);
     76 }
     77 
     78 extern __inline __m512
     79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     80 _mm512_exp2a23_round_ps (__m512 __A, int __R)
     81 {
     82   __m512 __W;
     83   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     84 					      (__v16sf) __W,
     85 					      (__mmask16) -1, __R);
     86 }
     87 
     88 extern __inline __m512
     89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
     91 {
     92   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     93 					      (__v16sf) __W,
     94 					      (__mmask16) __U, __R);
     95 }
     96 
     97 extern __inline __m512
     98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
    100 {
    101   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
    102 					      (__v16sf) _mm512_setzero_ps (),
    103 					      (__mmask16) __U, __R);
    104 }
    105 
    106 extern __inline __m512d
    107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    108 _mm512_rcp28_round_pd (__m512d __A, int __R)
    109 {
    110   __m512d __W;
    111   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    112 						(__v8df) __W,
    113 						(__mmask8) -1, __R);
    114 }
    115 
    116 extern __inline __m512d
    117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    119 {
    120   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    121 						(__v8df) __W,
    122 						(__mmask8) __U, __R);
    123 }
    124 
    125 extern __inline __m512d
    126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
    128 {
    129   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    130 						(__v8df) _mm512_setzero_pd (),
    131 						(__mmask8) __U, __R);
    132 }
    133 
    134 extern __inline __m512
    135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    136 _mm512_rcp28_round_ps (__m512 __A, int __R)
    137 {
    138   __m512 __W;
    139   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    140 					       (__v16sf) __W,
    141 					       (__mmask16) -1, __R);
    142 }
    143 
    144 extern __inline __m512
    145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    147 {
    148   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    149 					       (__v16sf) __W,
    150 					       (__mmask16) __U, __R);
    151 }
    152 
    153 extern __inline __m512
    154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
    156 {
    157   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    158 					       (__v16sf) _mm512_setzero_ps (),
    159 					       (__mmask16) __U, __R);
    160 }
    161 
    162 extern __inline __m128d
    163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
    165 {
    166   return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
    167 						 (__v2df) __A,
    168 						 __R);
    169 }
    170 
    171 extern __inline __m128
    172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    173 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
    174 {
    175   return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
    176 						(__v4sf) __A,
    177 						__R);
    178 }
    179 
    180 extern __inline __m512d
    181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    182 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
    183 {
    184   __m512d __W;
    185   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    186 						  (__v8df) __W,
    187 						  (__mmask8) -1, __R);
    188 }
    189 
    190 extern __inline __m512d
    191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    192 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    193 {
    194   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    195 						  (__v8df) __W,
    196 						  (__mmask8) __U, __R);
    197 }
    198 
    199 extern __inline __m512d
    200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    201 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
    202 {
    203   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    204 						  (__v8df) _mm512_setzero_pd (),
    205 						  (__mmask8) __U, __R);
    206 }
    207 
    208 extern __inline __m512
    209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    210 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
    211 {
    212   __m512 __W;
    213   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    214 						 (__v16sf) __W,
    215 						 (__mmask16) -1, __R);
    216 }
    217 
    218 extern __inline __m512
    219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    220 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    221 {
    222   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    223 						 (__v16sf) __W,
    224 						 (__mmask16) __U, __R);
    225 }
    226 
    227 extern __inline __m512
    228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    229 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
    230 {
    231   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    232 						 (__v16sf) _mm512_setzero_ps (),
    233 						 (__mmask16) __U, __R);
    234 }
    235 
    236 extern __inline __m128d
    237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    238 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
    239 {
    240   return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
    241 						   (__v2df) __A,
    242 						   __R);
    243 }
    244 
    245 extern __inline __m128
    246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    247 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
    248 {
    249   return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
    250 						  (__v4sf) __A,
    251 						  __R);
    252 }
    253 
    254 #else
    255 #define _mm512_exp2a23_round_pd(A, C)            \
    256     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    257 
    258 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
    259     __builtin_ia32_exp2pd_mask(A, W, U, C)
    260 
    261 #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
    262     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    263 
    264 #define _mm512_exp2a23_round_ps(A, C)            \
    265     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    266 
    267 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
    268     __builtin_ia32_exp2ps_mask(A, W, U, C)
    269 
    270 #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
    271     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    272 
    273 #define _mm512_rcp28_round_pd(A, C)            \
    274     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    275 
    276 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
    277     __builtin_ia32_rcp28pd_mask(A, W, U, C)
    278 
    279 #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
    280     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    281 
    282 #define _mm512_rcp28_round_ps(A, C)            \
    283     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    284 
    285 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
    286     __builtin_ia32_rcp28ps_mask(A, W, U, C)
    287 
    288 #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
    289     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    290 
    291 #define _mm512_rsqrt28_round_pd(A, C)            \
    292     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    293 
    294 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
    295     __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
    296 
    297 #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
    298     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    299 
    300 #define _mm512_rsqrt28_round_ps(A, C)            \
    301     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    302 
    303 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
    304     __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
    305 
    306 #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
    307     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    308 
    309 #define _mm_rcp28_round_sd(A, B, R)	\
    310     __builtin_ia32_rcp28sd_round(A, B, R)
    311 
    312 #define _mm_rcp28_round_ss(A, B, R)	\
    313     __builtin_ia32_rcp28ss_round(A, B, R)
    314 
    315 #define _mm_rsqrt28_round_sd(A, B, R)	\
    316     __builtin_ia32_rsqrt28sd_round(A, B, R)
    317 
    318 #define _mm_rsqrt28_round_ss(A, B, R)	\
    319     __builtin_ia32_rsqrt28ss_round(A, B, R)
    320 
    321 #endif
    322 
    323 #define _mm512_exp2a23_pd(A)                    \
    324     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    325 
    326 #define _mm512_mask_exp2a23_pd(W, U, A)   \
    327     _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    328 
    329 #define _mm512_maskz_exp2a23_pd(U, A)     \
    330     _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    331 
    332 #define _mm512_exp2a23_ps(A)                    \
    333     _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    334 
    335 #define _mm512_mask_exp2a23_ps(W, U, A)   \
    336     _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    337 
    338 #define _mm512_maskz_exp2a23_ps(U, A)     \
    339     _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    340 
    341 #define _mm512_rcp28_pd(A)                    \
    342     _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    343 
    344 #define _mm512_mask_rcp28_pd(W, U, A)   \
    345     _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    346 
    347 #define _mm512_maskz_rcp28_pd(U, A)     \
    348     _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    349 
    350 #define _mm512_rcp28_ps(A)                    \
    351     _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    352 
    353 #define _mm512_mask_rcp28_ps(W, U, A)   \
    354     _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    355 
    356 #define _mm512_maskz_rcp28_ps(U, A)     \
    357     _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    358 
    359 #define _mm512_rsqrt28_pd(A)                    \
    360     _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    361 
    362 #define _mm512_mask_rsqrt28_pd(W, U, A)   \
    363     _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    364 
    365 #define _mm512_maskz_rsqrt28_pd(U, A)     \
    366     _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    367 
    368 #define _mm512_rsqrt28_ps(A)                    \
    369     _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    370 
    371 #define _mm512_mask_rsqrt28_ps(W, U, A)   \
    372     _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    373 
    374 #define _mm512_maskz_rsqrt28_ps(U, A)     \
    375     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    376 
    377 #define _mm_rcp28_sd(A, B)	\
    378     __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    379 
    380 #define _mm_rcp28_ss(A, B)	\
    381     __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    382 
    383 #define _mm_rsqrt28_sd(A, B)	\
    384     __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    385 
    386 #define _mm_rsqrt28_ss(A, B)	\
    387     __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    388 
    389 #ifdef __DISABLE_AVX512ER__
    390 #undef __DISABLE_AVX512ER__
    391 #pragma GCC pop_options
    392 #endif /* __DISABLE_AVX512ER__ */
    393 
    394 #endif /* _AVX512ERINTRIN_H_INCLUDED */
    395