Home | History | Annotate | Line # | Download | only in i386
      1  1.1.1.2  mrg /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
      2      1.1  mrg 
      3      1.1  mrg    This file is part of GCC.
      4      1.1  mrg 
      5      1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6      1.1  mrg    it under the terms of the GNU General Public License as published by
      7      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8      1.1  mrg    any later version.
      9      1.1  mrg 
     10      1.1  mrg    GCC is distributed in the hope that it will be useful,
     11      1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13      1.1  mrg    GNU General Public License for more details.
     14      1.1  mrg 
     15      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17      1.1  mrg    3.1, as published by the Free Software Foundation.
     18      1.1  mrg 
     19      1.1  mrg    You should have received a copy of the GNU General Public License and
     20      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23      1.1  mrg 
     24      1.1  mrg #ifndef _IMMINTRIN_H_INCLUDED
     25      1.1  mrg #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
     26      1.1  mrg #endif
     27      1.1  mrg 
     28      1.1  mrg #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
     29      1.1  mrg #define _AVX512BF16VLINTRIN_H_INCLUDED
     30      1.1  mrg 
     31      1.1  mrg #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
     32      1.1  mrg #pragma GCC push_options
     33      1.1  mrg #pragma GCC target("avx512bf16,avx512vl")
     34      1.1  mrg #define __DISABLE_AVX512BF16VL__
     35      1.1  mrg #endif /* __AVX512BF16__ */
     36      1.1  mrg 
     37      1.1  mrg /* Internal data types for implementing the intrinsics.  */
     38      1.1  mrg typedef short __v16bh __attribute__ ((__vector_size__ (32)));
     39      1.1  mrg typedef short __v8bh __attribute__ ((__vector_size__ (16)));
     40      1.1  mrg 
     41      1.1  mrg /* The Intel API is flexible enough that we must allow aliasing with other
     42      1.1  mrg    vector types, and their scalar components.  */
     43      1.1  mrg typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
     44      1.1  mrg typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
     45      1.1  mrg 
     46  1.1.1.2  mrg typedef unsigned short __bfloat16;
     47      1.1  mrg /* vcvtne2ps2bf16 */
     48      1.1  mrg 
     49      1.1  mrg extern __inline __m256bh
     50      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     51      1.1  mrg _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
     52      1.1  mrg {
     53      1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
     54      1.1  mrg }
     55      1.1  mrg 
     56      1.1  mrg extern __inline __m256bh
     57      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     58      1.1  mrg _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
     59      1.1  mrg {
     60      1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
     61      1.1  mrg }
     62      1.1  mrg 
     63      1.1  mrg extern __inline __m256bh
     64      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     65      1.1  mrg _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
     66      1.1  mrg {
     67      1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
     68      1.1  mrg }
     69      1.1  mrg 
     70      1.1  mrg extern __inline __m128bh
     71      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     72      1.1  mrg _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
     73      1.1  mrg {
     74      1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
     75      1.1  mrg }
     76      1.1  mrg 
     77      1.1  mrg extern __inline __m128bh
     78      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     79      1.1  mrg _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
     80      1.1  mrg {
     81      1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
     82      1.1  mrg }
     83      1.1  mrg 
     84      1.1  mrg extern __inline __m128bh
     85      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     86      1.1  mrg _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
     87      1.1  mrg {
     88      1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
     89      1.1  mrg }
     90      1.1  mrg 
     91      1.1  mrg /* vcvtneps2bf16 */
     92      1.1  mrg 
     93      1.1  mrg extern __inline __m128bh
     94      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     95      1.1  mrg _mm256_cvtneps_pbh (__m256 __A)
     96      1.1  mrg {
     97      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
     98      1.1  mrg }
     99      1.1  mrg 
    100      1.1  mrg extern __inline __m128bh
    101      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    102      1.1  mrg _mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
    103      1.1  mrg {
    104      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
    105      1.1  mrg }
    106      1.1  mrg 
    107      1.1  mrg extern __inline __m128bh
    108      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    109      1.1  mrg _mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
    110      1.1  mrg {
    111      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
    112      1.1  mrg }
    113      1.1  mrg 
    114      1.1  mrg extern __inline __m128bh
    115      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    116      1.1  mrg _mm_cvtneps_pbh (__m128 __A)
    117      1.1  mrg {
    118      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
    119      1.1  mrg }
    120      1.1  mrg 
    121      1.1  mrg extern __inline __m128bh
    122      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    123      1.1  mrg _mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
    124      1.1  mrg {
    125      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
    126      1.1  mrg }
    127      1.1  mrg 
    128      1.1  mrg extern __inline __m128bh
    129      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    130      1.1  mrg _mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
    131      1.1  mrg {
    132      1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
    133      1.1  mrg }
    134      1.1  mrg 
    135      1.1  mrg /* vdpbf16ps */
    136      1.1  mrg 
    137      1.1  mrg extern __inline __m256
    138      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    139      1.1  mrg _mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
    140      1.1  mrg {
    141      1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
    142      1.1  mrg }
    143      1.1  mrg 
    144      1.1  mrg extern __inline __m256
    145      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    146      1.1  mrg _mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
    147      1.1  mrg {
    148      1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
    149      1.1  mrg }
    150      1.1  mrg 
    151      1.1  mrg extern __inline __m256
    152      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    153      1.1  mrg _mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
    154      1.1  mrg {
    155      1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
    156      1.1  mrg }
    157      1.1  mrg 
    158      1.1  mrg extern __inline __m128
    159      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    160      1.1  mrg _mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
    161      1.1  mrg {
    162      1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
    163      1.1  mrg }
    164      1.1  mrg 
    165      1.1  mrg extern __inline __m128
    166      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    167      1.1  mrg _mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
    168      1.1  mrg {
    169      1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
    170      1.1  mrg }
    171      1.1  mrg 
    172      1.1  mrg extern __inline __m128
    173      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    174      1.1  mrg _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
    175      1.1  mrg {
    176      1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
    177      1.1  mrg }
    178      1.1  mrg 
    179  1.1.1.2  mrg extern __inline __bfloat16
    180  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    181  1.1.1.2  mrg _mm_cvtness_sbh (float __A)
    182  1.1.1.2  mrg {
    183  1.1.1.2  mrg   __v4sf __V = {__A, 0, 0, 0};
    184  1.1.1.2  mrg   __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
    185  1.1.1.2  mrg 	       (__v8hi)_mm_undefined_si128 (), (__mmask8)-1);
    186  1.1.1.2  mrg   return __R[0];
    187  1.1.1.2  mrg }
    188  1.1.1.2  mrg 
    189  1.1.1.2  mrg extern __inline __m128
    190  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    191  1.1.1.2  mrg _mm_cvtpbh_ps (__m128bh __A)
    192  1.1.1.2  mrg {
    193  1.1.1.2  mrg   return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
    194  1.1.1.2  mrg 	 (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
    195  1.1.1.2  mrg }
    196  1.1.1.2  mrg 
    197  1.1.1.2  mrg extern __inline __m256
    198  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    199  1.1.1.2  mrg _mm256_cvtpbh_ps (__m128bh __A)
    200  1.1.1.2  mrg {
    201  1.1.1.2  mrg   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
    202  1.1.1.2  mrg 	 (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
    203  1.1.1.2  mrg }
    204  1.1.1.2  mrg 
    205  1.1.1.2  mrg extern __inline __m128
    206  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    207  1.1.1.2  mrg _mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
    208  1.1.1.2  mrg {
    209  1.1.1.2  mrg   return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
    210  1.1.1.2  mrg 	 (__m128i)_mm_maskz_cvtepi16_epi32 (
    211  1.1.1.2  mrg 	 (__mmask8)__U, (__m128i)__A), 16));
    212  1.1.1.2  mrg }
    213  1.1.1.2  mrg 
    214  1.1.1.2  mrg extern __inline __m256
    215  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    216  1.1.1.2  mrg _mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
    217  1.1.1.2  mrg {
    218  1.1.1.2  mrg   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
    219  1.1.1.2  mrg 	 (__m256i)_mm256_maskz_cvtepi16_epi32 (
    220  1.1.1.2  mrg 	 (__mmask8)__U, (__m128i)__A), 16));
    221  1.1.1.2  mrg }
    222  1.1.1.2  mrg 
    223  1.1.1.2  mrg extern __inline __m128
    224  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    225  1.1.1.2  mrg _mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
    226  1.1.1.2  mrg {
    227  1.1.1.2  mrg   return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
    228  1.1.1.2  mrg 	 (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
    229  1.1.1.2  mrg 	 (__m128i)__A), 16));
    230  1.1.1.2  mrg }
    231  1.1.1.2  mrg 
    232  1.1.1.2  mrg extern __inline __m256
    233  1.1.1.2  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    234  1.1.1.2  mrg _mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
    235  1.1.1.2  mrg {
    236  1.1.1.2  mrg   return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
    237  1.1.1.2  mrg 	 (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
    238  1.1.1.2  mrg 	 (__m128i)__A), 16));
    239  1.1.1.2  mrg }
    240  1.1.1.2  mrg 
    241      1.1  mrg #ifdef __DISABLE_AVX512BF16VL__
    242      1.1  mrg #undef __DISABLE_AVX512BF16VL__
    243      1.1  mrg #pragma GCC pop_options
    244      1.1  mrg #endif /* __DISABLE_AVX512BF16VL__ */
    245      1.1  mrg 
    246      1.1  mrg #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
    247