Home | History | Annotate | Line # | Download | only in i386
avx512bf16vlintrin.h revision 1.1.1.2
      1 /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
     29 #define _AVX512BF16VLINTRIN_H_INCLUDED
     30 
     31 #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512bf16,avx512vl")
     34 #define __DISABLE_AVX512BF16VL__
     35 #endif /* __AVX512BF16__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef short __v16bh __attribute__ ((__vector_size__ (32)));
     39 typedef short __v8bh __attribute__ ((__vector_size__ (16)));
     40 
     41 /* The Intel API is flexible enough that we must allow aliasing with other
     42    vector types, and their scalar components.  */
     43 typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
     44 typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
     45 
     46 typedef unsigned short __bfloat16;
     47 /* vcvtne2ps2bf16 */
     48 
     49 extern __inline __m256bh
     50 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     51 _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
     52 {
     53   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
     54 }
     55 
     56 extern __inline __m256bh
     57 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     58 _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
     59 {
     60   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
     61 }
     62 
     63 extern __inline __m256bh
     64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     65 _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
     66 {
     67   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
     68 }
     69 
     70 extern __inline __m128bh
     71 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     72 _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
     73 {
     74   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
     75 }
     76 
     77 extern __inline __m128bh
     78 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     79 _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
     80 {
     81   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
     82 }
     83 
     84 extern __inline __m128bh
     85 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     86 _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
     87 {
     88   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
     89 }
     90 
     91 /* vcvtneps2bf16 */
     92 
     93 extern __inline __m128bh
     94 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     95 _mm256_cvtneps_pbh (__m256 __A)
     96 {
     97   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
     98 }
     99 
    100 extern __inline __m128bh
    101 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    102 _mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
    103 {
    104   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
    105 }
    106 
    107 extern __inline __m128bh
    108 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    109 _mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
    110 {
    111   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
    112 }
    113 
    114 extern __inline __m128bh
    115 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    116 _mm_cvtneps_pbh (__m128 __A)
    117 {
    118   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
    119 }
    120 
    121 extern __inline __m128bh
    122 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    123 _mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
    124 {
    125   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
    126 }
    127 
    128 extern __inline __m128bh
    129 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    130 _mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
    131 {
    132   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
    133 }
    134 
    135 /* vdpbf16ps */
    136 
    137 extern __inline __m256
    138 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    139 _mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
    140 {
    141   return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
    142 }
    143 
    144 extern __inline __m256
    145 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    146 _mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
    147 {
    148   return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
    149 }
    150 
    151 extern __inline __m256
    152 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    153 _mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
    154 {
    155   return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
    156 }
    157 
    158 extern __inline __m128
    159 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    160 _mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
    161 {
    162   return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
    163 }
    164 
    165 extern __inline __m128
    166 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    167 _mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
    168 {
    169   return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
    170 }
    171 
    172 extern __inline __m128
    173 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    174 _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
    175 {
    176   return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
    177 }
    178 
    179 extern __inline __bfloat16
    180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    181 _mm_cvtness_sbh (float __A)
    182 {
    183   __v4sf __V = {__A, 0, 0, 0};
    184   __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
    185 	       (__v8hi)_mm_undefined_si128 (), (__mmask8)-1);
    186   return __R[0];
    187 }
    188 
    189 extern __inline __m128
    190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    191 _mm_cvtpbh_ps (__m128bh __A)
    192 {
    193   return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
    194 	 (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
    195 }
    196 
    197 extern __inline __m256
    198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    199 _mm256_cvtpbh_ps (__m128bh __A)
    200 {
    201   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
    202 	 (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
    203 }
    204 
    205 extern __inline __m128
    206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    207 _mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
    208 {
    209   return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
    210 	 (__m128i)_mm_maskz_cvtepi16_epi32 (
    211 	 (__mmask8)__U, (__m128i)__A), 16));
    212 }
    213 
    214 extern __inline __m256
    215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    216 _mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
    217 {
    218   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
    219 	 (__m256i)_mm256_maskz_cvtepi16_epi32 (
    220 	 (__mmask8)__U, (__m128i)__A), 16));
    221 }
    222 
    223 extern __inline __m128
    224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    225 _mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
    226 {
    227   return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
    228 	 (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
    229 	 (__m128i)__A), 16));
    230 }
    231 
    232 extern __inline __m256
    233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    234 _mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
    235 {
    236   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
    237 	 (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
    238 	 (__m128i)__A), 16));
    239 }
    240 
    241 #ifdef __DISABLE_AVX512BF16VL__
    242 #undef __DISABLE_AVX512BF16VL__
    243 #pragma GCC pop_options
    244 #endif /* __DISABLE_AVX512BF16VL__ */
    245 
    246 #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
    247