Home | History | Annotate | Line # | Download | only in i386
avx512bf16vlintrin.h revision 1.1.1.1
      1 /* Copyright (C) 2019-2020 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
     29 #define _AVX512BF16VLINTRIN_H_INCLUDED
     30 
     31 #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512bf16,avx512vl")
     34 #define __DISABLE_AVX512BF16VL__
     35 #endif /* __AVX512BF16__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef short __v16bh __attribute__ ((__vector_size__ (32)));
     39 typedef short __v8bh __attribute__ ((__vector_size__ (16)));
     40 
     41 /* The Intel API is flexible enough that we must allow aliasing with other
     42    vector types, and their scalar components.  */
     43 typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
     44 typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
     45 
     46 /* vcvtne2ps2bf16 */
     47 
     48 extern __inline __m256bh
     49 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     50 _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
     51 {
     52   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
     53 }
     54 
     55 extern __inline __m256bh
     56 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     57 _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
     58 {
     59   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
     60 }
     61 
     62 extern __inline __m256bh
     63 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     64 _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
     65 {
     66   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
     67 }
     68 
     69 extern __inline __m128bh
     70 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     71 _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
     72 {
     73   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
     74 }
     75 
     76 extern __inline __m128bh
     77 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     78 _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
     79 {
     80   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
     81 }
     82 
     83 extern __inline __m128bh
     84 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85 _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
     86 {
     87   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
     88 }
     89 
     90 /* vcvtneps2bf16 */
     91 
     92 extern __inline __m128bh
     93 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     94 _mm256_cvtneps_pbh (__m256 __A)
     95 {
     96   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
     97 }
     98 
     99 extern __inline __m128bh
    100 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    101 _mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
    102 {
    103   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
    104 }
    105 
    106 extern __inline __m128bh
    107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    108 _mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
    109 {
    110   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
    111 }
    112 
    113 extern __inline __m128bh
    114 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    115 _mm_cvtneps_pbh (__m128 __A)
    116 {
    117   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
    118 }
    119 
    120 extern __inline __m128bh
    121 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    122 _mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
    123 {
    124   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
    125 }
    126 
    127 extern __inline __m128bh
    128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    129 _mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
    130 {
    131   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
    132 }
    133 
    134 /* vdpbf16ps */
    135 
    136 extern __inline __m256
    137 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    138 _mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
    139 {
    140   return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
    141 }
    142 
    143 extern __inline __m256
    144 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    145 _mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
    146 {
    147   return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
    148 }
    149 
    150 extern __inline __m256
    151 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    152 _mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
    153 {
    154   return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
    155 }
    156 
    157 extern __inline __m128
    158 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    159 _mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
    160 {
    161   return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
    162 }
    163 
    164 extern __inline __m128
    165 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    166 _mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
    167 {
    168   return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
    169 }
    170 
    171 extern __inline __m128
    172 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    173 _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
    174 {
    175   return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
    176 }
    177 
    178 #ifdef __DISABLE_AVX512BF16VL__
    179 #undef __DISABLE_AVX512BF16VL__
    180 #pragma GCC pop_options
    181 #endif /* __DISABLE_AVX512BF16VL__ */
    182 
    183 #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
    184