Home | History | Annotate | Line # | Download | only in i386
avx512bf16vlintrin.h revision 1.1
      1  1.1  mrg /* Copyright (C) 2019-2020 Free Software Foundation, Inc.
      2  1.1  mrg 
      3  1.1  mrg    This file is part of GCC.
      4  1.1  mrg 
      5  1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6  1.1  mrg    it under the terms of the GNU General Public License as published by
      7  1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8  1.1  mrg    any later version.
      9  1.1  mrg 
     10  1.1  mrg    GCC is distributed in the hope that it will be useful,
     11  1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  1.1  mrg    GNU General Public License for more details.
     14  1.1  mrg 
     15  1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16  1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17  1.1  mrg    3.1, as published by the Free Software Foundation.
     18  1.1  mrg 
     19  1.1  mrg    You should have received a copy of the GNU General Public License and
     20  1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21  1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23  1.1  mrg 
     24  1.1  mrg #ifndef _IMMINTRIN_H_INCLUDED
     25  1.1  mrg #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
     26  1.1  mrg #endif
     27  1.1  mrg 
     28  1.1  mrg #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
     29  1.1  mrg #define _AVX512BF16VLINTRIN_H_INCLUDED
     30  1.1  mrg 
     31  1.1  mrg #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
     32  1.1  mrg #pragma GCC push_options
     33  1.1  mrg #pragma GCC target("avx512bf16,avx512vl")
     34  1.1  mrg #define __DISABLE_AVX512BF16VL__
     35  1.1  mrg #endif /* __AVX512BF16__ */
     36  1.1  mrg 
     37  1.1  mrg /* Internal data types for implementing the intrinsics.  */
     38  1.1  mrg typedef short __v16bh __attribute__ ((__vector_size__ (32)));
     39  1.1  mrg typedef short __v8bh __attribute__ ((__vector_size__ (16)));
     40  1.1  mrg 
     41  1.1  mrg /* The Intel API is flexible enough that we must allow aliasing with other
     42  1.1  mrg    vector types, and their scalar components.  */
     43  1.1  mrg typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
     44  1.1  mrg typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
     45  1.1  mrg 
     46  1.1  mrg /* vcvtne2ps2bf16 */
     47  1.1  mrg 
     48  1.1  mrg extern __inline __m256bh
     49  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     50  1.1  mrg _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
     51  1.1  mrg {
     52  1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
     53  1.1  mrg }
     54  1.1  mrg 
     55  1.1  mrg extern __inline __m256bh
     56  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     57  1.1  mrg _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
     58  1.1  mrg {
     59  1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
     60  1.1  mrg }
     61  1.1  mrg 
     62  1.1  mrg extern __inline __m256bh
     63  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     64  1.1  mrg _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
     65  1.1  mrg {
     66  1.1  mrg   return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
     67  1.1  mrg }
     68  1.1  mrg 
     69  1.1  mrg extern __inline __m128bh
     70  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     71  1.1  mrg _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
     72  1.1  mrg {
     73  1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
     74  1.1  mrg }
     75  1.1  mrg 
     76  1.1  mrg extern __inline __m128bh
     77  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     78  1.1  mrg _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
     79  1.1  mrg {
     80  1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
     81  1.1  mrg }
     82  1.1  mrg 
     83  1.1  mrg extern __inline __m128bh
     84  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85  1.1  mrg _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
     86  1.1  mrg {
     87  1.1  mrg   return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
     88  1.1  mrg }
     89  1.1  mrg 
     90  1.1  mrg /* vcvtneps2bf16 */
     91  1.1  mrg 
     92  1.1  mrg extern __inline __m128bh
     93  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     94  1.1  mrg _mm256_cvtneps_pbh (__m256 __A)
     95  1.1  mrg {
     96  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
     97  1.1  mrg }
     98  1.1  mrg 
     99  1.1  mrg extern __inline __m128bh
    100  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    101  1.1  mrg _mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
    102  1.1  mrg {
    103  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
    104  1.1  mrg }
    105  1.1  mrg 
    106  1.1  mrg extern __inline __m128bh
    107  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    108  1.1  mrg _mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
    109  1.1  mrg {
    110  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
    111  1.1  mrg }
    112  1.1  mrg 
    113  1.1  mrg extern __inline __m128bh
    114  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    115  1.1  mrg _mm_cvtneps_pbh (__m128 __A)
    116  1.1  mrg {
    117  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
    118  1.1  mrg }
    119  1.1  mrg 
    120  1.1  mrg extern __inline __m128bh
    121  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    122  1.1  mrg _mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
    123  1.1  mrg {
    124  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
    125  1.1  mrg }
    126  1.1  mrg 
    127  1.1  mrg extern __inline __m128bh
    128  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    129  1.1  mrg _mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
    130  1.1  mrg {
    131  1.1  mrg   return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
    132  1.1  mrg }
    133  1.1  mrg 
    134  1.1  mrg /* vdpbf16ps */
    135  1.1  mrg 
    136  1.1  mrg extern __inline __m256
    137  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    138  1.1  mrg _mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
    139  1.1  mrg {
    140  1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
    141  1.1  mrg }
    142  1.1  mrg 
    143  1.1  mrg extern __inline __m256
    144  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    145  1.1  mrg _mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
    146  1.1  mrg {
    147  1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
    148  1.1  mrg }
    149  1.1  mrg 
    150  1.1  mrg extern __inline __m256
    151  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    152  1.1  mrg _mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
    153  1.1  mrg {
    154  1.1  mrg   return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
    155  1.1  mrg }
    156  1.1  mrg 
    157  1.1  mrg extern __inline __m128
    158  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    159  1.1  mrg _mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
    160  1.1  mrg {
    161  1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
    162  1.1  mrg }
    163  1.1  mrg 
    164  1.1  mrg extern __inline __m128
    165  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    166  1.1  mrg _mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
    167  1.1  mrg {
    168  1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
    169  1.1  mrg }
    170  1.1  mrg 
    171  1.1  mrg extern __inline __m128
    172  1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    173  1.1  mrg _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
    174  1.1  mrg {
    175  1.1  mrg   return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
    176  1.1  mrg }
    177  1.1  mrg 
    178  1.1  mrg #ifdef __DISABLE_AVX512BF16VL__
    179  1.1  mrg #undef __DISABLE_AVX512BF16VL__
    180  1.1  mrg #pragma GCC pop_options
    181  1.1  mrg #endif /* __DISABLE_AVX512BF16VL__ */
    182  1.1  mrg 
    183  1.1  mrg #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
    184