Home | History | Annotate | Line # | Download | only in i386
      1 /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512BF16INTRIN_H_INCLUDED
     29 #define _AVX512BF16INTRIN_H_INCLUDED
     30 
     31 #ifndef __AVX512BF16__
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512bf16")
     34 #define __DISABLE_AVX512BF16__
     35 #endif /* __AVX512BF16__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef short __v32bh __attribute__ ((__vector_size__ (64)));
     39 
     40 /* The Intel API is flexible enough that we must allow aliasing with other
     41    vector types, and their scalar components.  */
     42 typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
     43 
     44 /* Convert One BF16 Data to One Single Float Data.  */
     45 extern __inline float
     46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     47 _mm_cvtsbh_ss (__bfloat16 __A)
     48 {
     49   union{ float __a; unsigned int __b;} __tmp;
     50   __tmp.__b = ((unsigned int)(__A)) << 16;
     51   return __tmp.__a;
     52 }
     53 
     54 /* vcvtne2ps2bf16 */
     55 
     56 extern __inline __m512bh
     57 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     58 _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
     59 {
     60   return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
     61 }
     62 
     63 extern __inline __m512bh
     64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     65 _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
     66 {
     67   return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
     68 }
     69 
     70 extern __inline __m512bh
     71 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     72 _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
     73 {
     74   return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
     75 }
     76 
     77 /* vcvtneps2bf16 */
     78 
     79 extern __inline __m256bh
     80 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     81 _mm512_cvtneps_pbh (__m512 __A)
     82 {
     83   return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
     84 }
     85 
     86 extern __inline __m256bh
     87 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     88 _mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
     89 {
     90   return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
     91 }
     92 
     93 extern __inline __m256bh
     94 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     95 _mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
     96 {
     97   return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
     98 }
     99 
    100 /* vdpbf16ps */
    101 
    102 extern __inline __m512
    103 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    104 _mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
    105 {
    106   return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
    107 }
    108 
    109 extern __inline __m512
    110 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    111 _mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
    112 {
    113   return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
    114 }
    115 
    116 extern __inline __m512
    117 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    118 _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
    119 {
    120   return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
    121 }
    122 
    123 extern __inline __m512
    124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    125 _mm512_cvtpbh_ps (__m256bh __A)
    126 {
    127   return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
    128 	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
    129 }
    130 
    131 extern __inline __m512
    132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    133 _mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
    134 {
    135   return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
    136 	 (__m512i)_mm512_maskz_cvtepi16_epi32 (
    137 	 (__mmask16)__U, (__m256i)__A), 16));
    138 }
    139 
    140 extern __inline __m512
    141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    142 _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
    143 {
    144   return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
    145 	 (__m512i)__S, (__mmask16)__U,
    146 	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
    147 }
    148 
    149 #ifdef __DISABLE_AVX512BF16__
    150 #undef __DISABLE_AVX512BF16__
    151 #pragma GCC pop_options
    152 #endif /* __DISABLE_AVX512BF16__ */
    153 
    154 #endif /* _AVX512BF16INTRIN_H_INCLUDED */
    155