Home | History | Annotate | Line # | Download | only in i386
      1  1.1  mrg /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
      2  1.1  mrg 
      3  1.1  mrg    This file is part of GCC.
      4  1.1  mrg 
      5  1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6  1.1  mrg    it under the terms of the GNU General Public License as published by
      7  1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8  1.1  mrg    any later version.
      9  1.1  mrg 
     10  1.1  mrg    GCC is distributed in the hope that it will be useful,
     11  1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  1.1  mrg    GNU General Public License for more details.
     14  1.1  mrg 
     15  1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16  1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17  1.1  mrg    3.1, as published by the Free Software Foundation.
     18  1.1  mrg 
     19  1.1  mrg    You should have received a copy of the GNU General Public License and
     20  1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21  1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23  1.1  mrg 
     24  1.1  mrg #ifndef _IMMINTRIN_H_INCLUDED
     25  1.1  mrg #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
     26  1.1  mrg #endif
     27  1.1  mrg 
     28  1.1  mrg #ifndef __AVX512FP16INTRIN_H_INCLUDED
     29  1.1  mrg #define __AVX512FP16INTRIN_H_INCLUDED
     30  1.1  mrg 
     31  1.1  mrg #ifndef __AVX512FP16__
     32  1.1  mrg #pragma GCC push_options
     33  1.1  mrg #pragma GCC target("avx512fp16")
     34  1.1  mrg #define __DISABLE_AVX512FP16__
     35  1.1  mrg #endif /* __AVX512FP16__ */
     36  1.1  mrg 
     37  1.1  mrg /* Internal data types for implementing the intrinsics.  */
     38  1.1  mrg typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
     39  1.1  mrg typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
     40  1.1  mrg typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
     41  1.1  mrg 
     42  1.1  mrg /* The Intel API is flexible enough that we must allow aliasing with other
     43  1.1  mrg    vector types, and their scalar components.  */
     44  1.1  mrg typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
     45  1.1  mrg typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
     46  1.1  mrg typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
     47  1.1  mrg 
     48  1.1  mrg /* Unaligned version of the same type.  */
     49  1.1  mrg typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16),	\
     50  1.1  mrg 					   __may_alias__, __aligned__ (1)));
     51  1.1  mrg typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32),	\
     52  1.1  mrg 					   __may_alias__, __aligned__ (1)));
     53  1.1  mrg typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64),	\
     54  1.1  mrg 					   __may_alias__, __aligned__ (1)));
     55  1.1  mrg 
     56  1.1  mrg extern __inline __m128h
     57  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     58  1.1  mrg _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
     59  1.1  mrg 	    _Float16 __A4, _Float16 __A3, _Float16 __A2,
     60  1.1  mrg 	    _Float16 __A1, _Float16 __A0)
     61  1.1  mrg {
     62  1.1  mrg   return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
     63  1.1  mrg 					  __A4, __A5, __A6, __A7 };
     64  1.1  mrg }
     65  1.1  mrg 
     66  1.1  mrg extern __inline __m256h
     67  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     68  1.1  mrg _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
     69  1.1  mrg 	       _Float16 __A12, _Float16 __A11, _Float16 __A10,
     70  1.1  mrg 	       _Float16 __A9, _Float16 __A8, _Float16 __A7,
     71  1.1  mrg 	       _Float16 __A6, _Float16 __A5, _Float16 __A4,
     72  1.1  mrg 	       _Float16 __A3, _Float16 __A2, _Float16 __A1,
     73  1.1  mrg 	       _Float16 __A0)
     74  1.1  mrg {
     75  1.1  mrg   return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
     76  1.1  mrg 					   __A4, __A5, __A6, __A7,
     77  1.1  mrg 					   __A8, __A9, __A10, __A11,
     78  1.1  mrg 					   __A12, __A13, __A14, __A15 };
     79  1.1  mrg }
     80  1.1  mrg 
     81  1.1  mrg extern __inline __m512h
     82  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     83  1.1  mrg _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
     84  1.1  mrg 	       _Float16 __A28, _Float16 __A27, _Float16 __A26,
     85  1.1  mrg 	       _Float16 __A25, _Float16 __A24, _Float16 __A23,
     86  1.1  mrg 	       _Float16 __A22, _Float16 __A21, _Float16 __A20,
     87  1.1  mrg 	       _Float16 __A19, _Float16 __A18, _Float16 __A17,
     88  1.1  mrg 	       _Float16 __A16, _Float16 __A15, _Float16 __A14,
     89  1.1  mrg 	       _Float16 __A13, _Float16 __A12, _Float16 __A11,
     90  1.1  mrg 	       _Float16 __A10, _Float16 __A9, _Float16 __A8,
     91  1.1  mrg 	       _Float16 __A7, _Float16 __A6, _Float16 __A5,
     92  1.1  mrg 	       _Float16 __A4, _Float16 __A3, _Float16 __A2,
     93  1.1  mrg 	       _Float16 __A1, _Float16 __A0)
     94  1.1  mrg {
     95  1.1  mrg   return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
     96  1.1  mrg 					   __A4, __A5, __A6, __A7,
     97  1.1  mrg 					   __A8, __A9, __A10, __A11,
     98  1.1  mrg 					   __A12, __A13, __A14, __A15,
     99  1.1  mrg 					   __A16, __A17, __A18, __A19,
    100  1.1  mrg 					   __A20, __A21, __A22, __A23,
    101  1.1  mrg 					   __A24, __A25, __A26, __A27,
    102  1.1  mrg 					   __A28, __A29, __A30, __A31 };
    103  1.1  mrg }
    104  1.1  mrg 
    105  1.1  mrg /* Create vectors of elements in the reversed order from _mm_set_ph,
    106  1.1  mrg    _mm256_set_ph and _mm512_set_ph functions.  */
    107  1.1  mrg 
    108  1.1  mrg extern __inline __m128h
    109  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    110  1.1  mrg _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
    111  1.1  mrg 	     _Float16 __A3, _Float16 __A4, _Float16 __A5,
    112  1.1  mrg 	     _Float16 __A6, _Float16 __A7)
    113  1.1  mrg {
    114  1.1  mrg   return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
    115  1.1  mrg }
    116  1.1  mrg 
    117  1.1  mrg extern __inline __m256h
    118  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    119  1.1  mrg _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
    120  1.1  mrg 		_Float16 __A3, _Float16 __A4, _Float16 __A5,
    121  1.1  mrg 		_Float16 __A6, _Float16 __A7, _Float16 __A8,
    122  1.1  mrg 		_Float16 __A9, _Float16 __A10, _Float16 __A11,
    123  1.1  mrg 		_Float16 __A12, _Float16 __A13, _Float16 __A14,
    124  1.1  mrg 		_Float16 __A15)
    125  1.1  mrg {
    126  1.1  mrg   return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
    127  1.1  mrg 			__A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
    128  1.1  mrg 			__A0);
    129  1.1  mrg }
    130  1.1  mrg 
    131  1.1  mrg extern __inline __m512h
    132  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    133  1.1  mrg _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
    134  1.1  mrg 		_Float16 __A3, _Float16 __A4, _Float16 __A5,
    135  1.1  mrg 		_Float16 __A6, _Float16 __A7, _Float16 __A8,
    136  1.1  mrg 		_Float16 __A9, _Float16 __A10, _Float16 __A11,
    137  1.1  mrg 		_Float16 __A12, _Float16 __A13, _Float16 __A14,
    138  1.1  mrg 		_Float16 __A15, _Float16 __A16, _Float16 __A17,
    139  1.1  mrg 		_Float16 __A18, _Float16 __A19, _Float16 __A20,
    140  1.1  mrg 		_Float16 __A21, _Float16 __A22, _Float16 __A23,
    141  1.1  mrg 		_Float16 __A24, _Float16 __A25, _Float16 __A26,
    142  1.1  mrg 		_Float16 __A27, _Float16 __A28, _Float16 __A29,
    143  1.1  mrg 		_Float16 __A30, _Float16 __A31)
    144  1.1  mrg 
    145  1.1  mrg {
    146  1.1  mrg   return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
    147  1.1  mrg 			__A24, __A23, __A22, __A21, __A20, __A19, __A18,
    148  1.1  mrg 			__A17, __A16, __A15, __A14, __A13, __A12, __A11,
    149  1.1  mrg 			__A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
    150  1.1  mrg 			__A2, __A1, __A0);
    151  1.1  mrg }
    152  1.1  mrg 
    153  1.1  mrg /* Broadcast _Float16 to vector.  */
    154  1.1  mrg 
    155  1.1  mrg extern __inline __m128h
    156  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    157  1.1  mrg _mm_set1_ph (_Float16 __A)
    158  1.1  mrg {
    159  1.1  mrg   return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
    160  1.1  mrg }
    161  1.1  mrg 
    162  1.1  mrg extern __inline __m256h
    163  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    164  1.1  mrg _mm256_set1_ph (_Float16 __A)
    165  1.1  mrg {
    166  1.1  mrg   return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
    167  1.1  mrg 			__A, __A, __A, __A, __A, __A, __A, __A);
    168  1.1  mrg }
    169  1.1  mrg 
    170  1.1  mrg extern __inline __m512h
    171  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    172  1.1  mrg _mm512_set1_ph (_Float16 __A)
    173  1.1  mrg {
    174  1.1  mrg   return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
    175  1.1  mrg 			__A, __A, __A, __A, __A, __A, __A, __A,
    176  1.1  mrg 			__A, __A, __A, __A, __A, __A, __A, __A,
    177  1.1  mrg 			__A, __A, __A, __A, __A, __A, __A, __A);
    178  1.1  mrg }
    179  1.1  mrg 
    180  1.1  mrg /* Create a vector with all zeros.  */
    181  1.1  mrg 
    182  1.1  mrg extern __inline __m128h
    183  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    184  1.1  mrg _mm_setzero_ph (void)
    185  1.1  mrg {
    186  1.1  mrg   return _mm_set1_ph (0.0f);
    187  1.1  mrg }
    188  1.1  mrg 
    189  1.1  mrg extern __inline __m256h
    190  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    191  1.1  mrg _mm256_setzero_ph (void)
    192  1.1  mrg {
    193  1.1  mrg   return _mm256_set1_ph (0.0f);
    194  1.1  mrg }
    195  1.1  mrg 
    196  1.1  mrg extern __inline __m512h
    197  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    198  1.1  mrg _mm512_setzero_ph (void)
    199  1.1  mrg {
    200  1.1  mrg   return _mm512_set1_ph (0.0f);
    201  1.1  mrg }
    202  1.1  mrg 
    203  1.1  mrg extern __inline __m128h
    204  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    205  1.1  mrg _mm_undefined_ph (void)
    206  1.1  mrg {
    207  1.1  mrg #pragma GCC diagnostic push
    208  1.1  mrg #pragma GCC diagnostic ignored "-Winit-self"
    209  1.1  mrg   __m128h __Y = __Y;
    210  1.1  mrg #pragma GCC diagnostic pop
    211  1.1  mrg   return __Y;
    212  1.1  mrg }
    213  1.1  mrg 
    214  1.1  mrg extern __inline __m256h
    215  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    216  1.1  mrg _mm256_undefined_ph (void)
    217  1.1  mrg {
    218  1.1  mrg #pragma GCC diagnostic push
    219  1.1  mrg #pragma GCC diagnostic ignored "-Winit-self"
    220  1.1  mrg   __m256h __Y = __Y;
    221  1.1  mrg #pragma GCC diagnostic pop
    222  1.1  mrg   return __Y;
    223  1.1  mrg }
    224  1.1  mrg 
    225  1.1  mrg extern __inline __m512h
    226  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    227  1.1  mrg _mm512_undefined_ph (void)
    228  1.1  mrg {
    229  1.1  mrg #pragma GCC diagnostic push
    230  1.1  mrg #pragma GCC diagnostic ignored "-Winit-self"
    231  1.1  mrg   __m512h __Y = __Y;
    232  1.1  mrg #pragma GCC diagnostic pop
    233  1.1  mrg   return __Y;
    234  1.1  mrg }
    235  1.1  mrg 
    236  1.1  mrg extern __inline _Float16
    237  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    238  1.1  mrg _mm_cvtsh_h (__m128h __A)
    239  1.1  mrg {
    240  1.1  mrg   return __A[0];
    241  1.1  mrg }
    242  1.1  mrg 
    243  1.1  mrg extern __inline _Float16
    244  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    245  1.1  mrg _mm256_cvtsh_h (__m256h __A)
    246  1.1  mrg {
    247  1.1  mrg   return __A[0];
    248  1.1  mrg }
    249  1.1  mrg 
    250  1.1  mrg extern __inline _Float16
    251  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    252  1.1  mrg _mm512_cvtsh_h (__m512h __A)
    253  1.1  mrg {
    254  1.1  mrg   return __A[0];
    255  1.1  mrg }
    256  1.1  mrg 
    257  1.1  mrg extern __inline __m512
    258  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    259  1.1  mrg _mm512_castph_ps (__m512h __a)
    260  1.1  mrg {
    261  1.1  mrg   return (__m512) __a;
    262  1.1  mrg }
    263  1.1  mrg 
    264  1.1  mrg extern __inline __m512d
    265  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    266  1.1  mrg _mm512_castph_pd (__m512h __a)
    267  1.1  mrg {
    268  1.1  mrg   return (__m512d) __a;
    269  1.1  mrg }
    270  1.1  mrg 
    271  1.1  mrg extern __inline __m512i
    272  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    273  1.1  mrg _mm512_castph_si512 (__m512h __a)
    274  1.1  mrg {
    275  1.1  mrg   return (__m512i) __a;
    276  1.1  mrg }
    277  1.1  mrg 
    278  1.1  mrg extern __inline __m128h
    279  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    280  1.1  mrg _mm512_castph512_ph128 (__m512h __A)
    281  1.1  mrg {
    282  1.1  mrg   union
    283  1.1  mrg   {
    284  1.1  mrg     __m128h __a[4];
    285  1.1  mrg     __m512h __v;
    286  1.1  mrg   } __u = { .__v = __A };
    287  1.1  mrg   return __u.__a[0];
    288  1.1  mrg }
    289  1.1  mrg 
    290  1.1  mrg extern __inline __m256h
    291  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    292  1.1  mrg _mm512_castph512_ph256 (__m512h __A)
    293  1.1  mrg {
    294  1.1  mrg   union
    295  1.1  mrg   {
    296  1.1  mrg     __m256h __a[2];
    297  1.1  mrg     __m512h __v;
    298  1.1  mrg   } __u = { .__v = __A };
    299  1.1  mrg   return __u.__a[0];
    300  1.1  mrg }
    301  1.1  mrg 
    302  1.1  mrg extern __inline __m512h
    303  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    304  1.1  mrg _mm512_castph128_ph512 (__m128h __A)
    305  1.1  mrg {
    306  1.1  mrg   union
    307  1.1  mrg   {
    308  1.1  mrg     __m128h __a[4];
    309  1.1  mrg     __m512h __v;
    310  1.1  mrg   } __u;
    311  1.1  mrg   __u.__a[0] = __A;
    312  1.1  mrg   return __u.__v;
    313  1.1  mrg }
    314  1.1  mrg 
    315  1.1  mrg extern __inline __m512h
    316  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    317  1.1  mrg _mm512_castph256_ph512 (__m256h __A)
    318  1.1  mrg {
    319  1.1  mrg   union
    320  1.1  mrg   {
    321  1.1  mrg     __m256h __a[2];
    322  1.1  mrg     __m512h __v;
    323  1.1  mrg   } __u;
    324  1.1  mrg   __u.__a[0] = __A;
    325  1.1  mrg   return __u.__v;
    326  1.1  mrg }
    327  1.1  mrg 
    328  1.1  mrg extern __inline __m512h
    329  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    330  1.1  mrg _mm512_zextph128_ph512 (__m128h __A)
    331  1.1  mrg {
    332  1.1  mrg   return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
    333  1.1  mrg 				       (__m128) __A, 0);
    334  1.1  mrg }
    335  1.1  mrg 
    336  1.1  mrg extern __inline __m512h
    337  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    338  1.1  mrg _mm512_zextph256_ph512 (__m256h __A)
    339  1.1  mrg {
    340  1.1  mrg   return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
    341  1.1  mrg 				       (__m256d) __A, 0);
    342  1.1  mrg }
    343  1.1  mrg 
    344  1.1  mrg extern __inline __m512h
    345  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    346  1.1  mrg _mm512_castps_ph (__m512 __a)
    347  1.1  mrg {
    348  1.1  mrg   return (__m512h) __a;
    349  1.1  mrg }
    350  1.1  mrg 
    351  1.1  mrg extern __inline __m512h
    352  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    353  1.1  mrg _mm512_castpd_ph (__m512d __a)
    354  1.1  mrg {
    355  1.1  mrg   return (__m512h) __a;
    356  1.1  mrg }
    357  1.1  mrg 
    358  1.1  mrg extern __inline __m512h
    359  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    360  1.1  mrg _mm512_castsi512_ph (__m512i __a)
    361  1.1  mrg {
    362  1.1  mrg   return (__m512h) __a;
    363  1.1  mrg }
    364  1.1  mrg 
    365  1.1  mrg /* Create a vector with element 0 as F and the rest zero.  */
    366  1.1  mrg extern __inline __m128h
    367  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    368  1.1  mrg _mm_set_sh (_Float16 __F)
    369  1.1  mrg {
    370  1.1  mrg   return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
    371  1.1  mrg }
    372  1.1  mrg 
    373  1.1  mrg /* Create a vector with element 0 as *P and the rest zero.  */
    374  1.1  mrg extern __inline __m128h
    375  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    376  1.1  mrg _mm_load_sh (void const *__P)
    377  1.1  mrg {
    378  1.1  mrg   return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
    379  1.1  mrg 		     *(_Float16 const *) __P);
    380  1.1  mrg }
    381  1.1  mrg 
    382  1.1  mrg extern __inline __m512h
    383  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    384  1.1  mrg _mm512_load_ph (void const *__P)
    385  1.1  mrg {
    386  1.1  mrg   return *(const __m512h *) __P;
    387  1.1  mrg }
    388  1.1  mrg 
    389  1.1  mrg extern __inline __m256h
    390  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    391  1.1  mrg _mm256_load_ph (void const *__P)
    392  1.1  mrg {
    393  1.1  mrg   return *(const __m256h *) __P;
    394  1.1  mrg }
    395  1.1  mrg 
    396  1.1  mrg extern __inline __m128h
    397  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    398  1.1  mrg _mm_load_ph (void const *__P)
    399  1.1  mrg {
    400  1.1  mrg   return *(const __m128h *) __P;
    401  1.1  mrg }
    402  1.1  mrg 
    403  1.1  mrg extern __inline __m512h
    404  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    405  1.1  mrg _mm512_loadu_ph (void const *__P)
    406  1.1  mrg {
    407  1.1  mrg   return *(const __m512h_u *) __P;
    408  1.1  mrg }
    409  1.1  mrg 
    410  1.1  mrg extern __inline __m256h
    411  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    412  1.1  mrg _mm256_loadu_ph (void const *__P)
    413  1.1  mrg {
    414  1.1  mrg   return *(const __m256h_u *) __P;
    415  1.1  mrg }
    416  1.1  mrg 
    417  1.1  mrg extern __inline __m128h
    418  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    419  1.1  mrg _mm_loadu_ph (void const *__P)
    420  1.1  mrg {
    421  1.1  mrg   return *(const __m128h_u *) __P;
    422  1.1  mrg }
    423  1.1  mrg 
    424  1.1  mrg /* Stores the lower _Float16 value.  */
    425  1.1  mrg extern __inline void
    426  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    427  1.1  mrg _mm_store_sh (void *__P, __m128h __A)
    428  1.1  mrg {
    429  1.1  mrg   *(_Float16 *) __P = ((__v8hf)__A)[0];
    430  1.1  mrg }
    431  1.1  mrg 
    432  1.1  mrg extern __inline void
    433  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    434  1.1  mrg _mm512_store_ph (void *__P, __m512h __A)
    435  1.1  mrg {
    436  1.1  mrg    *(__m512h *) __P = __A;
    437  1.1  mrg }
    438  1.1  mrg 
    439  1.1  mrg extern __inline void
    440  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    441  1.1  mrg _mm256_store_ph (void *__P, __m256h __A)
    442  1.1  mrg {
    443  1.1  mrg    *(__m256h *) __P = __A;
    444  1.1  mrg }
    445  1.1  mrg 
    446  1.1  mrg extern __inline void
    447  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    448  1.1  mrg _mm_store_ph (void *__P, __m128h __A)
    449  1.1  mrg {
    450  1.1  mrg    *(__m128h *) __P = __A;
    451  1.1  mrg }
    452  1.1  mrg 
    453  1.1  mrg extern __inline void
    454  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    455  1.1  mrg _mm512_storeu_ph (void *__P, __m512h __A)
    456  1.1  mrg {
    457  1.1  mrg    *(__m512h_u *) __P = __A;
    458  1.1  mrg }
    459  1.1  mrg 
    460  1.1  mrg extern __inline void
    461  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    462  1.1  mrg _mm256_storeu_ph (void *__P, __m256h __A)
    463  1.1  mrg {
    464  1.1  mrg    *(__m256h_u *) __P = __A;
    465  1.1  mrg }
    466  1.1  mrg 
    467  1.1  mrg extern __inline void
    468  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    469  1.1  mrg _mm_storeu_ph (void *__P, __m128h __A)
    470  1.1  mrg {
    471  1.1  mrg    *(__m128h_u *) __P = __A;
    472  1.1  mrg }
    473  1.1  mrg 
    474  1.1  mrg extern __inline __m512h
    475  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    476  1.1  mrg _mm512_abs_ph (__m512h __A)
    477  1.1  mrg {
    478  1.1  mrg   return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
    479  1.1  mrg 				      (__m512i) __A);
    480  1.1  mrg }
    481  1.1  mrg 
    482  1.1  mrg /* Intrinsics v[add,sub,mul,div]ph.  */
    483  1.1  mrg extern __inline __m512h
    484  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    485  1.1  mrg _mm512_add_ph (__m512h __A, __m512h __B)
    486  1.1  mrg {
    487  1.1  mrg   return (__m512h) ((__v32hf) __A + (__v32hf) __B);
    488  1.1  mrg }
    489  1.1  mrg 
    490  1.1  mrg extern __inline __m512h
    491  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    492  1.1  mrg _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
    493  1.1  mrg {
    494  1.1  mrg   return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
    495  1.1  mrg }
    496  1.1  mrg 
    497  1.1  mrg extern __inline __m512h
    498  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    499  1.1  mrg _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
    500  1.1  mrg {
    501  1.1  mrg   return __builtin_ia32_addph512_mask (__B, __C,
    502  1.1  mrg 				       _mm512_setzero_ph (), __A);
    503  1.1  mrg }
    504  1.1  mrg 
    505  1.1  mrg extern __inline __m512h
    506  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    507  1.1  mrg _mm512_sub_ph (__m512h __A, __m512h __B)
    508  1.1  mrg {
    509  1.1  mrg   return (__m512h) ((__v32hf) __A - (__v32hf) __B);
    510  1.1  mrg }
    511  1.1  mrg 
    512  1.1  mrg extern __inline __m512h
    513  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    514  1.1  mrg _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
    515  1.1  mrg {
    516  1.1  mrg   return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
    517  1.1  mrg }
    518  1.1  mrg 
    519  1.1  mrg extern __inline __m512h
    520  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    521  1.1  mrg _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
    522  1.1  mrg {
    523  1.1  mrg   return __builtin_ia32_subph512_mask (__B, __C,
    524  1.1  mrg 				       _mm512_setzero_ph (), __A);
    525  1.1  mrg }
    526  1.1  mrg 
    527  1.1  mrg extern __inline __m512h
    528  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    529  1.1  mrg _mm512_mul_ph (__m512h __A, __m512h __B)
    530  1.1  mrg {
    531  1.1  mrg   return (__m512h) ((__v32hf) __A * (__v32hf) __B);
    532  1.1  mrg }
    533  1.1  mrg 
    534  1.1  mrg extern __inline __m512h
    535  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    536  1.1  mrg _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
    537  1.1  mrg {
    538  1.1  mrg   return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
    539  1.1  mrg }
    540  1.1  mrg 
    541  1.1  mrg extern __inline __m512h
    542  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    543  1.1  mrg _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
    544  1.1  mrg {
    545  1.1  mrg   return __builtin_ia32_mulph512_mask (__B, __C,
    546  1.1  mrg 				       _mm512_setzero_ph (), __A);
    547  1.1  mrg }
    548  1.1  mrg 
    549  1.1  mrg extern __inline __m512h
    550  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    551  1.1  mrg _mm512_div_ph (__m512h __A, __m512h __B)
    552  1.1  mrg {
    553  1.1  mrg   return (__m512h) ((__v32hf) __A / (__v32hf) __B);
    554  1.1  mrg }
    555  1.1  mrg 
    556  1.1  mrg extern __inline __m512h
    557  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    558  1.1  mrg _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
    559  1.1  mrg {
    560  1.1  mrg   return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
    561  1.1  mrg }
    562  1.1  mrg 
    563  1.1  mrg extern __inline __m512h
    564  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    565  1.1  mrg _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
    566  1.1  mrg {
    567  1.1  mrg   return __builtin_ia32_divph512_mask (__B, __C,
    568  1.1  mrg 				       _mm512_setzero_ph (), __A);
    569  1.1  mrg }
    570  1.1  mrg 
    571  1.1  mrg #ifdef __OPTIMIZE__
    572  1.1  mrg extern __inline __m512h
    573  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    574  1.1  mrg _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
    575  1.1  mrg {
    576  1.1  mrg   return __builtin_ia32_addph512_mask_round (__A, __B,
    577  1.1  mrg 					     _mm512_setzero_ph (),
    578  1.1  mrg 					     (__mmask32) -1, __C);
    579  1.1  mrg }
    580  1.1  mrg 
    581  1.1  mrg extern __inline __m512h
    582  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    583  1.1  mrg _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
    584  1.1  mrg 			  __m512h __D, const int __E)
    585  1.1  mrg {
    586  1.1  mrg   return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
    587  1.1  mrg }
    588  1.1  mrg 
    589  1.1  mrg extern __inline __m512h
    590  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    591  1.1  mrg _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
    592  1.1  mrg 			   const int __D)
    593  1.1  mrg {
    594  1.1  mrg   return __builtin_ia32_addph512_mask_round (__B, __C,
    595  1.1  mrg 					     _mm512_setzero_ph (),
    596  1.1  mrg 					     __A, __D);
    597  1.1  mrg }
    598  1.1  mrg 
    599  1.1  mrg extern __inline __m512h
    600  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    601  1.1  mrg _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
    602  1.1  mrg {
    603  1.1  mrg   return __builtin_ia32_subph512_mask_round (__A, __B,
    604  1.1  mrg 					     _mm512_setzero_ph (),
    605  1.1  mrg 					     (__mmask32) -1, __C);
    606  1.1  mrg }
    607  1.1  mrg 
    608  1.1  mrg extern __inline __m512h
    609  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    610  1.1  mrg _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
    611  1.1  mrg 			  __m512h __D, const int __E)
    612  1.1  mrg {
    613  1.1  mrg   return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
    614  1.1  mrg }
    615  1.1  mrg 
    616  1.1  mrg extern __inline __m512h
    617  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    618  1.1  mrg _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
    619  1.1  mrg 			   const int __D)
    620  1.1  mrg {
    621  1.1  mrg   return __builtin_ia32_subph512_mask_round (__B, __C,
    622  1.1  mrg 					     _mm512_setzero_ph (),
    623  1.1  mrg 					     __A, __D);
    624  1.1  mrg }
    625  1.1  mrg 
    626  1.1  mrg extern __inline __m512h
    627  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    628  1.1  mrg _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
    629  1.1  mrg {
    630  1.1  mrg   return __builtin_ia32_mulph512_mask_round (__A, __B,
    631  1.1  mrg 					     _mm512_setzero_ph (),
    632  1.1  mrg 					     (__mmask32) -1, __C);
    633  1.1  mrg }
    634  1.1  mrg 
    635  1.1  mrg extern __inline __m512h
    636  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    637  1.1  mrg _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
    638  1.1  mrg 			  __m512h __D, const int __E)
    639  1.1  mrg {
    640  1.1  mrg   return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
    641  1.1  mrg }
    642  1.1  mrg 
    643  1.1  mrg extern __inline __m512h
    644  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    645  1.1  mrg _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
    646  1.1  mrg 			   const int __D)
    647  1.1  mrg {
    648  1.1  mrg   return __builtin_ia32_mulph512_mask_round (__B, __C,
    649  1.1  mrg 					     _mm512_setzero_ph (),
    650  1.1  mrg 					     __A, __D);
    651  1.1  mrg }
    652  1.1  mrg 
    653  1.1  mrg extern __inline __m512h
    654  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    655  1.1  mrg _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
    656  1.1  mrg {
    657  1.1  mrg   return __builtin_ia32_divph512_mask_round (__A, __B,
    658  1.1  mrg 					     _mm512_setzero_ph (),
    659  1.1  mrg 					     (__mmask32) -1, __C);
    660  1.1  mrg }
    661  1.1  mrg 
    662  1.1  mrg extern __inline __m512h
    663  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    664  1.1  mrg _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
    665  1.1  mrg 			  __m512h __D, const int __E)
    666  1.1  mrg {
    667  1.1  mrg   return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
    668  1.1  mrg }
    669  1.1  mrg 
    670  1.1  mrg extern __inline __m512h
    671  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    672  1.1  mrg _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
    673  1.1  mrg 			   const int __D)
    674  1.1  mrg {
    675  1.1  mrg   return __builtin_ia32_divph512_mask_round (__B, __C,
    676  1.1  mrg 					     _mm512_setzero_ph (),
    677  1.1  mrg 					     __A, __D);
    678  1.1  mrg }
    679  1.1  mrg #else
    680  1.1  mrg #define _mm512_add_round_ph(A, B, C)					\
    681  1.1  mrg   ((__m512h)__builtin_ia32_addph512_mask_round((A), (B),		\
    682  1.1  mrg 					       _mm512_setzero_ph (),	\
    683  1.1  mrg 					       (__mmask32)-1, (C)))
    684  1.1  mrg 
    685  1.1  mrg #define _mm512_mask_add_round_ph(A, B, C, D, E)				\
    686  1.1  mrg   ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
    687  1.1  mrg 
    688  1.1  mrg #define _mm512_maskz_add_round_ph(A, B, C, D)				\
    689  1.1  mrg   ((__m512h)__builtin_ia32_addph512_mask_round((B), (C),		\
    690  1.1  mrg 					       _mm512_setzero_ph (),	\
    691  1.1  mrg 					       (A), (D)))
    692  1.1  mrg 
    693  1.1  mrg #define _mm512_sub_round_ph(A, B, C)					\
    694  1.1  mrg   ((__m512h)__builtin_ia32_subph512_mask_round((A), (B),		\
    695  1.1  mrg 					       _mm512_setzero_ph (),	\
    696  1.1  mrg 					       (__mmask32)-1, (C)))
    697  1.1  mrg 
    698  1.1  mrg #define _mm512_mask_sub_round_ph(A, B, C, D, E)				\
    699  1.1  mrg   ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
    700  1.1  mrg 
    701  1.1  mrg #define _mm512_maskz_sub_round_ph(A, B, C, D)				\
    702  1.1  mrg   ((__m512h)__builtin_ia32_subph512_mask_round((B), (C),		\
    703  1.1  mrg 					       _mm512_setzero_ph (),	\
    704  1.1  mrg 					       (A), (D)))
    705  1.1  mrg 
    706  1.1  mrg #define _mm512_mul_round_ph(A, B, C)					\
    707  1.1  mrg   ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B),		\
    708  1.1  mrg 					       _mm512_setzero_ph (),	\
    709  1.1  mrg 					       (__mmask32)-1, (C)))
    710  1.1  mrg 
    711  1.1  mrg #define _mm512_mask_mul_round_ph(A, B, C, D, E)				\
    712  1.1  mrg   ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
    713  1.1  mrg 
    714  1.1  mrg #define _mm512_maskz_mul_round_ph(A, B, C, D)				\
    715  1.1  mrg   ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C),		\
    716  1.1  mrg 					       _mm512_setzero_ph (),	\
    717  1.1  mrg 					       (A), (D)))
    718  1.1  mrg 
    719  1.1  mrg #define _mm512_div_round_ph(A, B, C)					\
    720  1.1  mrg   ((__m512h)__builtin_ia32_divph512_mask_round((A), (B),		\
    721  1.1  mrg 					       _mm512_setzero_ph (),	\
    722  1.1  mrg 					       (__mmask32)-1, (C)))
    723  1.1  mrg 
    724  1.1  mrg #define _mm512_mask_div_round_ph(A, B, C, D, E)				\
    725  1.1  mrg   ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
    726  1.1  mrg 
    727  1.1  mrg #define _mm512_maskz_div_round_ph(A, B, C, D)				\
    728  1.1  mrg   ((__m512h)__builtin_ia32_divph512_mask_round((B), (C),		\
    729  1.1  mrg 					       _mm512_setzero_ph (),	\
    730  1.1  mrg 					       (A), (D)))
    731  1.1  mrg #endif  /* __OPTIMIZE__  */
    732  1.1  mrg 
    733  1.1  mrg extern __inline __m512h
    734  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    735  1.1  mrg _mm512_conj_pch (__m512h __A)
    736  1.1  mrg {
    737  1.1  mrg   return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
    738  1.1  mrg }
    739  1.1  mrg 
    740  1.1  mrg extern __inline __m512h
    741  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    742  1.1  mrg _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
    743  1.1  mrg {
    744  1.1  mrg   return (__m512h)
    745  1.1  mrg     __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
    746  1.1  mrg 				   (__v16sf) __W,
    747  1.1  mrg 				   (__mmask16) __U);
    748  1.1  mrg }
    749  1.1  mrg 
    750  1.1  mrg extern __inline __m512h
    751  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    752  1.1  mrg _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
    753  1.1  mrg {
    754  1.1  mrg   return (__m512h)
    755  1.1  mrg     __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
    756  1.1  mrg 				   (__v16sf) _mm512_setzero_ps (),
    757  1.1  mrg 				   (__mmask16) __U);
    758  1.1  mrg }
    759  1.1  mrg 
    760  1.1  mrg /* Intrinsics of v[add,sub,mul,div]sh.  */
    761  1.1  mrg extern __inline __m128h
    762  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    763  1.1  mrg _mm_add_sh (__m128h __A, __m128h __B)
    764  1.1  mrg {
    765  1.1  mrg   __A[0] += __B[0];
    766  1.1  mrg   return __A;
    767  1.1  mrg }
    768  1.1  mrg 
    769  1.1  mrg extern __inline __m128h
    770  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    771  1.1  mrg _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    772  1.1  mrg {
    773  1.1  mrg   return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
    774  1.1  mrg }
    775  1.1  mrg 
    776  1.1  mrg extern __inline __m128h
    777  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    778  1.1  mrg _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
    779  1.1  mrg {
    780  1.1  mrg   return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
    781  1.1  mrg 				    __A);
    782  1.1  mrg }
    783  1.1  mrg 
    784  1.1  mrg extern __inline __m128h
    785  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    786  1.1  mrg _mm_sub_sh (__m128h __A, __m128h __B)
    787  1.1  mrg {
    788  1.1  mrg   __A[0] -= __B[0];
    789  1.1  mrg   return __A;
    790  1.1  mrg }
    791  1.1  mrg 
    792  1.1  mrg extern __inline __m128h
    793  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    794  1.1  mrg _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    795  1.1  mrg {
    796  1.1  mrg   return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
    797  1.1  mrg }
    798  1.1  mrg 
    799  1.1  mrg extern __inline __m128h
    800  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    801  1.1  mrg _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
    802  1.1  mrg {
    803  1.1  mrg   return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
    804  1.1  mrg 				    __A);
    805  1.1  mrg }
    806  1.1  mrg 
    807  1.1  mrg extern __inline __m128h
    808  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    809  1.1  mrg _mm_mul_sh (__m128h __A, __m128h __B)
    810  1.1  mrg {
    811  1.1  mrg   __A[0] *= __B[0];
    812  1.1  mrg   return __A;
    813  1.1  mrg }
    814  1.1  mrg 
    815  1.1  mrg extern __inline __m128h
    816  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    817  1.1  mrg _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    818  1.1  mrg {
    819  1.1  mrg   return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
    820  1.1  mrg }
    821  1.1  mrg 
    822  1.1  mrg extern __inline __m128h
    823  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    824  1.1  mrg _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
    825  1.1  mrg {
    826  1.1  mrg   return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
    827  1.1  mrg }
    828  1.1  mrg 
    829  1.1  mrg extern __inline __m128h
    830  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    831  1.1  mrg _mm_div_sh (__m128h __A, __m128h __B)
    832  1.1  mrg {
    833  1.1  mrg   __A[0] /= __B[0];
    834  1.1  mrg   return __A;
    835  1.1  mrg }
    836  1.1  mrg 
    837  1.1  mrg extern __inline __m128h
    838  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    839  1.1  mrg _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    840  1.1  mrg {
    841  1.1  mrg   return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
    842  1.1  mrg }
    843  1.1  mrg 
    844  1.1  mrg extern __inline __m128h
    845  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    846  1.1  mrg _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
    847  1.1  mrg {
    848  1.1  mrg   return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
    849  1.1  mrg 				    __A);
    850  1.1  mrg }
    851  1.1  mrg 
    852  1.1  mrg #ifdef __OPTIMIZE__
    853  1.1  mrg extern __inline __m128h
    854  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    855  1.1  mrg _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
    856  1.1  mrg {
    857  1.1  mrg   return __builtin_ia32_addsh_mask_round (__A, __B,
    858  1.1  mrg 					  _mm_setzero_ph (),
    859  1.1  mrg 					  (__mmask8) -1, __C);
    860  1.1  mrg }
    861  1.1  mrg 
    862  1.1  mrg extern __inline __m128h
    863  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    864  1.1  mrg _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
    865  1.1  mrg 		       __m128h __D, const int __E)
    866  1.1  mrg {
    867  1.1  mrg   return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
    868  1.1  mrg }
    869  1.1  mrg 
    870  1.1  mrg extern __inline __m128h
    871  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    872  1.1  mrg _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
    873  1.1  mrg 			const int __D)
    874  1.1  mrg {
    875  1.1  mrg   return __builtin_ia32_addsh_mask_round (__B, __C,
    876  1.1  mrg 					  _mm_setzero_ph (),
    877  1.1  mrg 					  __A, __D);
    878  1.1  mrg }
    879  1.1  mrg 
    880  1.1  mrg extern __inline __m128h
    881  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    882  1.1  mrg _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
    883  1.1  mrg {
    884  1.1  mrg   return __builtin_ia32_subsh_mask_round (__A, __B,
    885  1.1  mrg 					  _mm_setzero_ph (),
    886  1.1  mrg 					  (__mmask8) -1, __C);
    887  1.1  mrg }
    888  1.1  mrg 
    889  1.1  mrg extern __inline __m128h
    890  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    891  1.1  mrg _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
    892  1.1  mrg 		       __m128h __D, const int __E)
    893  1.1  mrg {
    894  1.1  mrg   return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
    895  1.1  mrg }
    896  1.1  mrg 
    897  1.1  mrg extern __inline __m128h
    898  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    899  1.1  mrg _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
    900  1.1  mrg 			const int __D)
    901  1.1  mrg {
    902  1.1  mrg   return __builtin_ia32_subsh_mask_round (__B, __C,
    903  1.1  mrg 					  _mm_setzero_ph (),
    904  1.1  mrg 					  __A, __D);
    905  1.1  mrg }
    906  1.1  mrg 
    907  1.1  mrg extern __inline __m128h
    908  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    909  1.1  mrg _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
    910  1.1  mrg {
    911  1.1  mrg   return __builtin_ia32_mulsh_mask_round (__A, __B,
    912  1.1  mrg 					  _mm_setzero_ph (),
    913  1.1  mrg 					  (__mmask8) -1, __C);
    914  1.1  mrg }
    915  1.1  mrg 
    916  1.1  mrg extern __inline __m128h
    917  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    918  1.1  mrg _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
    919  1.1  mrg 		       __m128h __D, const int __E)
    920  1.1  mrg {
    921  1.1  mrg   return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
    922  1.1  mrg }
    923  1.1  mrg 
    924  1.1  mrg extern __inline __m128h
    925  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    926  1.1  mrg _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
    927  1.1  mrg 			const int __D)
    928  1.1  mrg {
    929  1.1  mrg   return __builtin_ia32_mulsh_mask_round (__B, __C,
    930  1.1  mrg 					  _mm_setzero_ph (),
    931  1.1  mrg 					  __A, __D);
    932  1.1  mrg }
    933  1.1  mrg 
    934  1.1  mrg extern __inline __m128h
    935  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    936  1.1  mrg _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
    937  1.1  mrg {
    938  1.1  mrg   return __builtin_ia32_divsh_mask_round (__A, __B,
    939  1.1  mrg 					  _mm_setzero_ph (),
    940  1.1  mrg 					  (__mmask8) -1, __C);
    941  1.1  mrg }
    942  1.1  mrg 
    943  1.1  mrg extern __inline __m128h
    944  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    945  1.1  mrg _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
    946  1.1  mrg 		       __m128h __D, const int __E)
    947  1.1  mrg {
    948  1.1  mrg   return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
    949  1.1  mrg }
    950  1.1  mrg 
    951  1.1  mrg extern __inline __m128h
    952  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    953  1.1  mrg _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
    954  1.1  mrg 			const int __D)
    955  1.1  mrg {
    956  1.1  mrg   return __builtin_ia32_divsh_mask_round (__B, __C,
    957  1.1  mrg 					  _mm_setzero_ph (),
    958  1.1  mrg 					  __A, __D);
    959  1.1  mrg }
    960  1.1  mrg #else
    961  1.1  mrg #define _mm_add_round_sh(A, B, C)					\
    962  1.1  mrg   ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B),			\
    963  1.1  mrg 					     _mm_setzero_ph (),		\
    964  1.1  mrg 					     (__mmask8)-1, (C)))
    965  1.1  mrg 
    966  1.1  mrg #define _mm_mask_add_round_sh(A, B, C, D, E)				\
    967  1.1  mrg   ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
    968  1.1  mrg 
    969  1.1  mrg #define _mm_maskz_add_round_sh(A, B, C, D)			\
    970  1.1  mrg   ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C),		\
    971  1.1  mrg 					     _mm_setzero_ph (),	\
    972  1.1  mrg 					     (A), (D)))
    973  1.1  mrg 
    974  1.1  mrg #define _mm_sub_round_sh(A, B, C)					\
    975  1.1  mrg   ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B),			\
    976  1.1  mrg 					     _mm_setzero_ph (),		\
    977  1.1  mrg 					     (__mmask8)-1, (C)))
    978  1.1  mrg 
    979  1.1  mrg #define _mm_mask_sub_round_sh(A, B, C, D, E)				\
    980  1.1  mrg   ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
    981  1.1  mrg 
    982  1.1  mrg #define _mm_maskz_sub_round_sh(A, B, C, D)			\
    983  1.1  mrg   ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C),		\
    984  1.1  mrg 					     _mm_setzero_ph (),	\
    985  1.1  mrg 					     (A), (D)))
    986  1.1  mrg 
    987  1.1  mrg #define _mm_mul_round_sh(A, B, C)					\
    988  1.1  mrg   ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B),			\
    989  1.1  mrg 					     _mm_setzero_ph (),		\
    990  1.1  mrg 					     (__mmask8)-1, (C)))
    991  1.1  mrg 
    992  1.1  mrg #define _mm_mask_mul_round_sh(A, B, C, D, E)				\
    993  1.1  mrg   ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
    994  1.1  mrg 
    995  1.1  mrg #define _mm_maskz_mul_round_sh(A, B, C, D)			\
    996  1.1  mrg   ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C),		\
    997  1.1  mrg 					     _mm_setzero_ph (),	\
    998  1.1  mrg 					     (A), (D)))
    999  1.1  mrg 
   1000  1.1  mrg #define _mm_div_round_sh(A, B, C)					\
   1001  1.1  mrg   ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B),			\
   1002  1.1  mrg 					     _mm_setzero_ph (),		\
   1003  1.1  mrg 					     (__mmask8)-1, (C)))
   1004  1.1  mrg 
   1005  1.1  mrg #define _mm_mask_div_round_sh(A, B, C, D, E)				\
   1006  1.1  mrg   ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
   1007  1.1  mrg 
   1008  1.1  mrg #define _mm_maskz_div_round_sh(A, B, C, D)			\
   1009  1.1  mrg   ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C),		\
   1010  1.1  mrg 					     _mm_setzero_ph (),	\
   1011  1.1  mrg 					     (A), (D)))
   1012  1.1  mrg #endif /* __OPTIMIZE__ */
   1013  1.1  mrg 
   1014  1.1  mrg /* Intrinsic vmaxph vminph.  */
   1015  1.1  mrg extern __inline __m512h
   1016  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1017  1.1  mrg _mm512_max_ph (__m512h __A, __m512h __B)
   1018  1.1  mrg {
   1019  1.1  mrg   return __builtin_ia32_maxph512_mask (__A, __B,
   1020  1.1  mrg 				       _mm512_setzero_ph (),
   1021  1.1  mrg 				       (__mmask32) -1);
   1022  1.1  mrg }
   1023  1.1  mrg 
   1024  1.1  mrg extern __inline __m512h
   1025  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1026  1.1  mrg _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
   1027  1.1  mrg {
   1028  1.1  mrg   return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
   1029  1.1  mrg }
   1030  1.1  mrg 
   1031  1.1  mrg extern __inline __m512h
   1032  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1033  1.1  mrg _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
   1034  1.1  mrg {
   1035  1.1  mrg   return __builtin_ia32_maxph512_mask (__B, __C,
   1036  1.1  mrg 				       _mm512_setzero_ph (), __A);
   1037  1.1  mrg }
   1038  1.1  mrg 
   1039  1.1  mrg extern __inline __m512h
   1040  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1041  1.1  mrg _mm512_min_ph (__m512h __A, __m512h __B)
   1042  1.1  mrg {
   1043  1.1  mrg   return __builtin_ia32_minph512_mask (__A, __B,
   1044  1.1  mrg 				       _mm512_setzero_ph (),
   1045  1.1  mrg 				       (__mmask32) -1);
   1046  1.1  mrg }
   1047  1.1  mrg 
   1048  1.1  mrg extern __inline __m512h
   1049  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1050  1.1  mrg _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
   1051  1.1  mrg {
   1052  1.1  mrg   return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
   1053  1.1  mrg }
   1054  1.1  mrg 
   1055  1.1  mrg extern __inline __m512h
   1056  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1057  1.1  mrg _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
   1058  1.1  mrg {
   1059  1.1  mrg   return __builtin_ia32_minph512_mask (__B, __C,
   1060  1.1  mrg 				       _mm512_setzero_ph (), __A);
   1061  1.1  mrg }
   1062  1.1  mrg 
   1063  1.1  mrg #ifdef __OPTIMIZE__
   1064  1.1  mrg extern __inline __m512h
   1065  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1066  1.1  mrg _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
   1067  1.1  mrg {
   1068  1.1  mrg   return __builtin_ia32_maxph512_mask_round (__A, __B,
   1069  1.1  mrg 					     _mm512_setzero_ph (),
   1070  1.1  mrg 					     (__mmask32) -1, __C);
   1071  1.1  mrg }
   1072  1.1  mrg 
   1073  1.1  mrg extern __inline __m512h
   1074  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1075  1.1  mrg _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
   1076  1.1  mrg 			  __m512h __D, const int __E)
   1077  1.1  mrg {
   1078  1.1  mrg   return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
   1079  1.1  mrg }
   1080  1.1  mrg 
   1081  1.1  mrg extern __inline __m512h
   1082  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1083  1.1  mrg _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
   1084  1.1  mrg 			   const int __D)
   1085  1.1  mrg {
   1086  1.1  mrg   return __builtin_ia32_maxph512_mask_round (__B, __C,
   1087  1.1  mrg 					     _mm512_setzero_ph (),
   1088  1.1  mrg 					     __A, __D);
   1089  1.1  mrg }
   1090  1.1  mrg 
   1091  1.1  mrg extern __inline __m512h
   1092  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1093  1.1  mrg _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
   1094  1.1  mrg {
   1095  1.1  mrg   return __builtin_ia32_minph512_mask_round (__A, __B,
   1096  1.1  mrg 					     _mm512_setzero_ph (),
   1097  1.1  mrg 					     (__mmask32) -1, __C);
   1098  1.1  mrg }
   1099  1.1  mrg 
   1100  1.1  mrg extern __inline __m512h
   1101  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1102  1.1  mrg _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
   1103  1.1  mrg 			  __m512h __D, const int __E)
   1104  1.1  mrg {
   1105  1.1  mrg   return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
   1106  1.1  mrg }
   1107  1.1  mrg 
   1108  1.1  mrg extern __inline __m512h
   1109  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1110  1.1  mrg _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
   1111  1.1  mrg 			   const int __D)
   1112  1.1  mrg {
   1113  1.1  mrg   return __builtin_ia32_minph512_mask_round (__B, __C,
   1114  1.1  mrg 					     _mm512_setzero_ph (),
   1115  1.1  mrg 					     __A, __D);
   1116  1.1  mrg }
   1117  1.1  mrg 
   1118  1.1  mrg #else
   1119  1.1  mrg #define _mm512_max_round_ph(A, B, C)				\
   1120  1.1  mrg   (__builtin_ia32_maxph512_mask_round ((A), (B),		\
   1121  1.1  mrg 				       _mm512_setzero_ph (),	\
   1122  1.1  mrg 				       (__mmask32)-1, (C)))
   1123  1.1  mrg 
   1124  1.1  mrg #define _mm512_mask_max_round_ph(A, B, C, D, E)				\
   1125  1.1  mrg   (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
   1126  1.1  mrg 
   1127  1.1  mrg #define _mm512_maskz_max_round_ph(A, B, C, D)			\
   1128  1.1  mrg   (__builtin_ia32_maxph512_mask_round ((B), (C),		\
   1129  1.1  mrg 				       _mm512_setzero_ph (),	\
   1130  1.1  mrg 				       (A), (D)))
   1131  1.1  mrg 
   1132  1.1  mrg #define _mm512_min_round_ph(A, B, C)				\
   1133  1.1  mrg   (__builtin_ia32_minph512_mask_round ((A), (B),		\
   1134  1.1  mrg 				       _mm512_setzero_ph (),	\
   1135  1.1  mrg 				       (__mmask32)-1, (C)))
   1136  1.1  mrg 
   1137  1.1  mrg #define _mm512_mask_min_round_ph(A, B, C, D, E)				\
   1138  1.1  mrg   (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
   1139  1.1  mrg 
   1140  1.1  mrg #define _mm512_maskz_min_round_ph(A, B, C, D)			\
   1141  1.1  mrg   (__builtin_ia32_minph512_mask_round ((B), (C),		\
   1142  1.1  mrg 				       _mm512_setzero_ph (),	\
   1143  1.1  mrg 				       (A), (D)))
   1144  1.1  mrg #endif /* __OPTIMIZE__ */
   1145  1.1  mrg 
   1146  1.1  mrg /* Intrinsic vmaxsh vminsh.  */
   1147  1.1  mrg extern __inline __m128h
   1148  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1149  1.1  mrg _mm_max_sh (__m128h __A, __m128h __B)
   1150  1.1  mrg {
   1151  1.1  mrg   __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
   1152  1.1  mrg   return __A;
   1153  1.1  mrg }
   1154  1.1  mrg 
   1155  1.1  mrg extern __inline __m128h
   1156  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1157  1.1  mrg _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   1158  1.1  mrg {
   1159  1.1  mrg   return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
   1160  1.1  mrg }
   1161  1.1  mrg 
   1162  1.1  mrg extern __inline __m128h
   1163  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1164  1.1  mrg _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
   1165  1.1  mrg {
   1166  1.1  mrg   return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
   1167  1.1  mrg 				    __A);
   1168  1.1  mrg }
   1169  1.1  mrg 
   1170  1.1  mrg extern __inline __m128h
   1171  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1172  1.1  mrg _mm_min_sh (__m128h __A, __m128h __B)
   1173  1.1  mrg {
   1174  1.1  mrg   __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
   1175  1.1  mrg   return __A;
   1176  1.1  mrg }
   1177  1.1  mrg 
   1178  1.1  mrg extern __inline __m128h
   1179  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1180  1.1  mrg _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   1181  1.1  mrg {
   1182  1.1  mrg   return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
   1183  1.1  mrg }
   1184  1.1  mrg 
   1185  1.1  mrg extern __inline __m128h
   1186  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1187  1.1  mrg _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
   1188  1.1  mrg {
   1189  1.1  mrg   return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
   1190  1.1  mrg 				    __A);
   1191  1.1  mrg }
   1192  1.1  mrg 
   1193  1.1  mrg #ifdef __OPTIMIZE__
   1194  1.1  mrg extern __inline __m128h
   1195  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1196  1.1  mrg _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
   1197  1.1  mrg {
   1198  1.1  mrg   return __builtin_ia32_maxsh_mask_round (__A, __B,
   1199  1.1  mrg 					  _mm_setzero_ph (),
   1200  1.1  mrg 					  (__mmask8) -1, __C);
   1201  1.1  mrg }
   1202  1.1  mrg 
   1203  1.1  mrg extern __inline __m128h
   1204  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1205  1.1  mrg _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   1206  1.1  mrg 		       __m128h __D, const int __E)
   1207  1.1  mrg {
   1208  1.1  mrg   return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
   1209  1.1  mrg }
   1210  1.1  mrg 
   1211  1.1  mrg extern __inline __m128h
   1212  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1213  1.1  mrg _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   1214  1.1  mrg 			const int __D)
   1215  1.1  mrg {
   1216  1.1  mrg   return __builtin_ia32_maxsh_mask_round (__B, __C,
   1217  1.1  mrg 					  _mm_setzero_ph (),
   1218  1.1  mrg 					  __A, __D);
   1219  1.1  mrg }
   1220  1.1  mrg 
   1221  1.1  mrg extern __inline __m128h
   1222  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1223  1.1  mrg _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
   1224  1.1  mrg {
   1225  1.1  mrg   return __builtin_ia32_minsh_mask_round (__A, __B,
   1226  1.1  mrg 					  _mm_setzero_ph (),
   1227  1.1  mrg 					  (__mmask8) -1, __C);
   1228  1.1  mrg }
   1229  1.1  mrg 
   1230  1.1  mrg extern __inline __m128h
   1231  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1232  1.1  mrg _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   1233  1.1  mrg 		       __m128h __D, const int __E)
   1234  1.1  mrg {
   1235  1.1  mrg   return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
   1236  1.1  mrg }
   1237  1.1  mrg 
   1238  1.1  mrg extern __inline __m128h
   1239  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1240  1.1  mrg _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   1241  1.1  mrg 			const int __D)
   1242  1.1  mrg {
   1243  1.1  mrg   return __builtin_ia32_minsh_mask_round (__B, __C,
   1244  1.1  mrg 					  _mm_setzero_ph (),
   1245  1.1  mrg 					  __A, __D);
   1246  1.1  mrg }
   1247  1.1  mrg 
   1248  1.1  mrg #else
   1249  1.1  mrg #define _mm_max_round_sh(A, B, C)			\
   1250  1.1  mrg   (__builtin_ia32_maxsh_mask_round ((A), (B),		\
   1251  1.1  mrg 				    _mm_setzero_ph (),	\
   1252  1.1  mrg 				    (__mmask8)-1, (C)))
   1253  1.1  mrg 
   1254  1.1  mrg #define _mm_mask_max_round_sh(A, B, C, D, E)			\
   1255  1.1  mrg   (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
   1256  1.1  mrg 
   1257  1.1  mrg #define _mm_maskz_max_round_sh(A, B, C, D)		\
   1258  1.1  mrg   (__builtin_ia32_maxsh_mask_round ((B), (C),		\
   1259  1.1  mrg 				    _mm_setzero_ph (),	\
   1260  1.1  mrg 				    (A), (D)))
   1261  1.1  mrg 
   1262  1.1  mrg #define _mm_min_round_sh(A, B, C)			\
   1263  1.1  mrg   (__builtin_ia32_minsh_mask_round ((A), (B),		\
   1264  1.1  mrg 				    _mm_setzero_ph (),	\
   1265  1.1  mrg 				    (__mmask8)-1, (C)))
   1266  1.1  mrg 
   1267  1.1  mrg #define _mm_mask_min_round_sh(A, B, C, D, E)			\
   1268  1.1  mrg   (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
   1269  1.1  mrg 
   1270  1.1  mrg #define _mm_maskz_min_round_sh(A, B, C, D)		\
   1271  1.1  mrg   (__builtin_ia32_minsh_mask_round ((B), (C),		\
   1272  1.1  mrg 				    _mm_setzero_ph (),	\
   1273  1.1  mrg 				    (A), (D)))
   1274  1.1  mrg 
   1275  1.1  mrg #endif /* __OPTIMIZE__ */
   1276  1.1  mrg 
   1277  1.1  mrg /* vcmpph */
   1278  1.1  mrg #ifdef __OPTIMIZE
   1279  1.1  mrg extern __inline __mmask32
   1280  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1281  1.1  mrg _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
   1282  1.1  mrg {
   1283  1.1  mrg   return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
   1284  1.1  mrg 						   (__mmask32) -1);
   1285  1.1  mrg }
   1286  1.1  mrg 
   1287  1.1  mrg extern __inline __mmask32
   1288  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1289  1.1  mrg _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
   1290  1.1  mrg 			 const int __D)
   1291  1.1  mrg {
   1292  1.1  mrg   return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
   1293  1.1  mrg 						   __A);
   1294  1.1  mrg }
   1295  1.1  mrg 
   1296  1.1  mrg extern __inline __mmask32
   1297  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1298  1.1  mrg _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
   1299  1.1  mrg 			  const int __D)
   1300  1.1  mrg {
   1301  1.1  mrg   return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
   1302  1.1  mrg 							 __C, (__mmask32) -1,
   1303  1.1  mrg 							 __D);
   1304  1.1  mrg }
   1305  1.1  mrg 
   1306  1.1  mrg extern __inline __mmask32
   1307  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1308  1.1  mrg _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
   1309  1.1  mrg 			       const int __D, const int __E)
   1310  1.1  mrg {
   1311  1.1  mrg   return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
   1312  1.1  mrg 							 __D, __A,
   1313  1.1  mrg 							 __E);
   1314  1.1  mrg }
   1315  1.1  mrg 
   1316  1.1  mrg #else
   1317  1.1  mrg #define _mm512_cmp_ph_mask(A, B, C)			\
   1318  1.1  mrg   (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
   1319  1.1  mrg 
   1320  1.1  mrg #define _mm512_mask_cmp_ph_mask(A, B, C, D)		\
   1321  1.1  mrg   (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
   1322  1.1  mrg 
   1323  1.1  mrg #define _mm512_cmp_round_ph_mask(A, B, C, D)				\
   1324  1.1  mrg   (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
   1325  1.1  mrg 
   1326  1.1  mrg #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)			\
   1327  1.1  mrg   (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
   1328  1.1  mrg 
   1329  1.1  mrg #endif /* __OPTIMIZE__ */
   1330  1.1  mrg 
   1331  1.1  mrg /* Intrinsics vcmpsh.  */
   1332  1.1  mrg #ifdef __OPTIMIZE__
   1333  1.1  mrg extern __inline __mmask8
   1334  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1335  1.1  mrg _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
   1336  1.1  mrg {
   1337  1.1  mrg   return (__mmask8)
   1338  1.1  mrg     __builtin_ia32_cmpsh_mask_round (__A, __B,
   1339  1.1  mrg 				     __C, (__mmask8) -1,
   1340  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   1341  1.1  mrg }
   1342  1.1  mrg 
   1343  1.1  mrg extern __inline __mmask8
   1344  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1345  1.1  mrg _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
   1346  1.1  mrg 		      const int __D)
   1347  1.1  mrg {
   1348  1.1  mrg   return (__mmask8)
   1349  1.1  mrg     __builtin_ia32_cmpsh_mask_round (__B, __C,
   1350  1.1  mrg 				     __D, __A,
   1351  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   1352  1.1  mrg }
   1353  1.1  mrg 
   1354  1.1  mrg extern __inline __mmask8
   1355  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1356  1.1  mrg _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
   1357  1.1  mrg 		       const int __D)
   1358  1.1  mrg {
   1359  1.1  mrg   return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
   1360  1.1  mrg 						     __C, (__mmask8) -1,
   1361  1.1  mrg 						     __D);
   1362  1.1  mrg }
   1363  1.1  mrg 
   1364  1.1  mrg extern __inline __mmask8
   1365  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1366  1.1  mrg _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
   1367  1.1  mrg 			    const int __D, const int __E)
   1368  1.1  mrg {
   1369  1.1  mrg   return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
   1370  1.1  mrg 						     __D, __A,
   1371  1.1  mrg 						     __E);
   1372  1.1  mrg }
   1373  1.1  mrg 
   1374  1.1  mrg #else
   1375  1.1  mrg #define _mm_cmp_sh_mask(A, B, C)					\
   1376  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1),		\
   1377  1.1  mrg 				    (_MM_FROUND_CUR_DIRECTION)))
   1378  1.1  mrg 
   1379  1.1  mrg #define _mm_mask_cmp_sh_mask(A, B, C, D)				\
   1380  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A),			\
   1381  1.1  mrg 				    (_MM_FROUND_CUR_DIRECTION)))
   1382  1.1  mrg 
   1383  1.1  mrg #define _mm_cmp_round_sh_mask(A, B, C, D)			\
   1384  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
   1385  1.1  mrg 
   1386  1.1  mrg #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)		\
   1387  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
   1388  1.1  mrg 
   1389  1.1  mrg #endif /* __OPTIMIZE__ */
   1390  1.1  mrg 
   1391  1.1  mrg /* Intrinsics vcomish.  */
   1392  1.1  mrg extern __inline int
   1393  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1394  1.1  mrg _mm_comieq_sh (__m128h __A, __m128h __B)
   1395  1.1  mrg {
   1396  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
   1397  1.1  mrg 					  (__mmask8) -1,
   1398  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1399  1.1  mrg }
   1400  1.1  mrg 
   1401  1.1  mrg extern __inline int
   1402  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1403  1.1  mrg _mm_comilt_sh (__m128h __A, __m128h __B)
   1404  1.1  mrg {
   1405  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
   1406  1.1  mrg 					  (__mmask8) -1,
   1407  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1408  1.1  mrg }
   1409  1.1  mrg 
   1410  1.1  mrg extern __inline int
   1411  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1412  1.1  mrg _mm_comile_sh (__m128h __A, __m128h __B)
   1413  1.1  mrg {
   1414  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
   1415  1.1  mrg 					  (__mmask8) -1,
   1416  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1417  1.1  mrg }
   1418  1.1  mrg 
   1419  1.1  mrg extern __inline int
   1420  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1421  1.1  mrg _mm_comigt_sh (__m128h __A, __m128h __B)
   1422  1.1  mrg {
   1423  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
   1424  1.1  mrg 					  (__mmask8) -1,
   1425  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1426  1.1  mrg }
   1427  1.1  mrg 
   1428  1.1  mrg extern __inline int
   1429  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1430  1.1  mrg _mm_comige_sh (__m128h __A, __m128h __B)
   1431  1.1  mrg {
   1432  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
   1433  1.1  mrg 					  (__mmask8) -1,
   1434  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1435  1.1  mrg }
   1436  1.1  mrg 
   1437  1.1  mrg extern __inline int
   1438  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1439  1.1  mrg _mm_comineq_sh (__m128h __A, __m128h __B)
   1440  1.1  mrg {
   1441  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
   1442  1.1  mrg 					  (__mmask8) -1,
   1443  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1444  1.1  mrg }
   1445  1.1  mrg 
   1446  1.1  mrg extern __inline int
   1447  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1448  1.1  mrg _mm_ucomieq_sh (__m128h __A, __m128h __B)
   1449  1.1  mrg {
   1450  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
   1451  1.1  mrg 					  (__mmask8) -1,
   1452  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1453  1.1  mrg }
   1454  1.1  mrg 
   1455  1.1  mrg extern __inline int
   1456  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1457  1.1  mrg _mm_ucomilt_sh (__m128h __A, __m128h __B)
   1458  1.1  mrg {
   1459  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
   1460  1.1  mrg 					  (__mmask8) -1,
   1461  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1462  1.1  mrg }
   1463  1.1  mrg 
   1464  1.1  mrg extern __inline int
   1465  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1466  1.1  mrg _mm_ucomile_sh (__m128h __A, __m128h __B)
   1467  1.1  mrg {
   1468  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
   1469  1.1  mrg 					  (__mmask8) -1,
   1470  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1471  1.1  mrg }
   1472  1.1  mrg 
   1473  1.1  mrg extern __inline int
   1474  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1475  1.1  mrg _mm_ucomigt_sh (__m128h __A, __m128h __B)
   1476  1.1  mrg {
   1477  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
   1478  1.1  mrg 					  (__mmask8) -1,
   1479  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1480  1.1  mrg }
   1481  1.1  mrg 
   1482  1.1  mrg extern __inline int
   1483  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1484  1.1  mrg _mm_ucomige_sh (__m128h __A, __m128h __B)
   1485  1.1  mrg {
   1486  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
   1487  1.1  mrg 					  (__mmask8) -1,
   1488  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1489  1.1  mrg }
   1490  1.1  mrg 
   1491  1.1  mrg extern __inline int
   1492  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1493  1.1  mrg _mm_ucomineq_sh (__m128h __A, __m128h __B)
   1494  1.1  mrg {
   1495  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
   1496  1.1  mrg 					  (__mmask8) -1,
   1497  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1498  1.1  mrg }
   1499  1.1  mrg 
   1500  1.1  mrg #ifdef __OPTIMIZE__
   1501  1.1  mrg extern __inline int
   1502  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1503  1.1  mrg _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
   1504  1.1  mrg {
   1505  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
   1506  1.1  mrg 					  (__mmask8) -1,
   1507  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   1508  1.1  mrg }
   1509  1.1  mrg 
   1510  1.1  mrg extern __inline int
   1511  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1512  1.1  mrg _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
   1513  1.1  mrg {
   1514  1.1  mrg   return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
   1515  1.1  mrg 					  (__mmask8) -1,__R);
   1516  1.1  mrg }
   1517  1.1  mrg 
   1518  1.1  mrg #else
   1519  1.1  mrg #define _mm_comi_round_sh(A, B, P, R)					\
   1520  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
   1521  1.1  mrg #define _mm_comi_sh(A, B, P)						\
   1522  1.1  mrg   (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1),	\
   1523  1.1  mrg 				    _MM_FROUND_CUR_DIRECTION))
   1524  1.1  mrg 
   1525  1.1  mrg #endif /* __OPTIMIZE__  */
   1526  1.1  mrg 
   1527  1.1  mrg /* Intrinsics vsqrtph.  */
   1528  1.1  mrg extern __inline __m512h
   1529  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1530  1.1  mrg _mm512_sqrt_ph (__m512h __A)
   1531  1.1  mrg {
   1532  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__A,
   1533  1.1  mrg 					      _mm512_setzero_ph(),
   1534  1.1  mrg 					      (__mmask32) -1,
   1535  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   1536  1.1  mrg }
   1537  1.1  mrg 
   1538  1.1  mrg extern __inline __m512h
   1539  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1540  1.1  mrg _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
   1541  1.1  mrg {
   1542  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
   1543  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   1544  1.1  mrg }
   1545  1.1  mrg 
   1546  1.1  mrg extern __inline __m512h
   1547  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1548  1.1  mrg _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
   1549  1.1  mrg {
   1550  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__B,
   1551  1.1  mrg 					      _mm512_setzero_ph (),
   1552  1.1  mrg 					      __A,
   1553  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   1554  1.1  mrg }
   1555  1.1  mrg 
   1556  1.1  mrg #ifdef __OPTIMIZE__
   1557  1.1  mrg extern __inline __m512h
   1558  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1559  1.1  mrg _mm512_sqrt_round_ph (__m512h __A, const int __B)
   1560  1.1  mrg {
   1561  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__A,
   1562  1.1  mrg 					      _mm512_setzero_ph(),
   1563  1.1  mrg 					      (__mmask32) -1, __B);
   1564  1.1  mrg }
   1565  1.1  mrg 
   1566  1.1  mrg extern __inline __m512h
   1567  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1568  1.1  mrg _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
   1569  1.1  mrg 			   const int __D)
   1570  1.1  mrg {
   1571  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
   1572  1.1  mrg }
   1573  1.1  mrg 
   1574  1.1  mrg extern __inline __m512h
   1575  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1576  1.1  mrg _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
   1577  1.1  mrg {
   1578  1.1  mrg   return __builtin_ia32_sqrtph512_mask_round (__B,
   1579  1.1  mrg 					      _mm512_setzero_ph (),
   1580  1.1  mrg 					      __A, __C);
   1581  1.1  mrg }
   1582  1.1  mrg 
   1583  1.1  mrg #else
   1584  1.1  mrg #define _mm512_sqrt_round_ph(A, B)				\
   1585  1.1  mrg   (__builtin_ia32_sqrtph512_mask_round ((A),			\
   1586  1.1  mrg 					_mm512_setzero_ph (),	\
   1587  1.1  mrg 					(__mmask32)-1, (B)))
   1588  1.1  mrg 
   1589  1.1  mrg #define _mm512_mask_sqrt_round_ph(A, B, C, D)			\
   1590  1.1  mrg   (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
   1591  1.1  mrg 
   1592  1.1  mrg #define _mm512_maskz_sqrt_round_ph(A, B, C)			\
   1593  1.1  mrg   (__builtin_ia32_sqrtph512_mask_round ((B),			\
   1594  1.1  mrg 					_mm512_setzero_ph (),	\
   1595  1.1  mrg 					(A), (C)))
   1596  1.1  mrg 
   1597  1.1  mrg #endif /* __OPTIMIZE__ */
   1598  1.1  mrg 
   1599  1.1  mrg /* Intrinsics vrsqrtph.  */
   1600  1.1  mrg extern __inline __m512h
   1601  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1602  1.1  mrg _mm512_rsqrt_ph (__m512h __A)
   1603  1.1  mrg {
   1604  1.1  mrg   return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
   1605  1.1  mrg 					 (__mmask32) -1);
   1606  1.1  mrg }
   1607  1.1  mrg 
   1608  1.1  mrg extern __inline __m512h
   1609  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1610  1.1  mrg _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
   1611  1.1  mrg {
   1612  1.1  mrg   return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
   1613  1.1  mrg }
   1614  1.1  mrg 
   1615  1.1  mrg extern __inline __m512h
   1616  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1617  1.1  mrg _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
   1618  1.1  mrg {
   1619  1.1  mrg   return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
   1620  1.1  mrg 					 __A);
   1621  1.1  mrg }
   1622  1.1  mrg 
   1623  1.1  mrg /* Intrinsics vrsqrtsh.  */
   1624  1.1  mrg extern __inline __m128h
   1625  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1626  1.1  mrg _mm_rsqrt_sh (__m128h __A, __m128h __B)
   1627  1.1  mrg {
   1628  1.1  mrg   return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
   1629  1.1  mrg 				      (__mmask8) -1);
   1630  1.1  mrg }
   1631  1.1  mrg 
   1632  1.1  mrg extern __inline __m128h
   1633  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1634  1.1  mrg _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   1635  1.1  mrg {
   1636  1.1  mrg   return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
   1637  1.1  mrg }
   1638  1.1  mrg 
   1639  1.1  mrg extern __inline __m128h
   1640  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1641  1.1  mrg _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
   1642  1.1  mrg {
   1643  1.1  mrg   return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
   1644  1.1  mrg 				      __A);
   1645  1.1  mrg }
   1646  1.1  mrg 
   1647  1.1  mrg /* Intrinsics vsqrtsh.  */
   1648  1.1  mrg extern __inline __m128h
   1649  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1650  1.1  mrg _mm_sqrt_sh (__m128h __A, __m128h __B)
   1651  1.1  mrg {
   1652  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__B, __A,
   1653  1.1  mrg 					   _mm_setzero_ph (),
   1654  1.1  mrg 					   (__mmask8) -1,
   1655  1.1  mrg 					   _MM_FROUND_CUR_DIRECTION);
   1656  1.1  mrg }
   1657  1.1  mrg 
   1658  1.1  mrg extern __inline __m128h
   1659  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1660  1.1  mrg _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   1661  1.1  mrg {
   1662  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
   1663  1.1  mrg 					   _MM_FROUND_CUR_DIRECTION);
   1664  1.1  mrg }
   1665  1.1  mrg 
   1666  1.1  mrg extern __inline __m128h
   1667  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1668  1.1  mrg _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
   1669  1.1  mrg {
   1670  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__C, __B,
   1671  1.1  mrg 					   _mm_setzero_ph (),
   1672  1.1  mrg 					   __A, _MM_FROUND_CUR_DIRECTION);
   1673  1.1  mrg }
   1674  1.1  mrg 
   1675  1.1  mrg #ifdef __OPTIMIZE__
   1676  1.1  mrg extern __inline __m128h
   1677  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1678  1.1  mrg _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
   1679  1.1  mrg {
   1680  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__B, __A,
   1681  1.1  mrg 					   _mm_setzero_ph (),
   1682  1.1  mrg 					   (__mmask8) -1, __C);
   1683  1.1  mrg }
   1684  1.1  mrg 
   1685  1.1  mrg extern __inline __m128h
   1686  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1687  1.1  mrg _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   1688  1.1  mrg 			__m128h __D, const int __E)
   1689  1.1  mrg {
   1690  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
   1691  1.1  mrg 					   __E);
   1692  1.1  mrg }
   1693  1.1  mrg 
   1694  1.1  mrg extern __inline __m128h
   1695  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1696  1.1  mrg _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   1697  1.1  mrg 			 const int __D)
   1698  1.1  mrg {
   1699  1.1  mrg   return __builtin_ia32_sqrtsh_mask_round (__C, __B,
   1700  1.1  mrg 					   _mm_setzero_ph (),
   1701  1.1  mrg 					   __A, __D);
   1702  1.1  mrg }
   1703  1.1  mrg 
   1704  1.1  mrg #else
   1705  1.1  mrg #define _mm_sqrt_round_sh(A, B, C)				\
   1706  1.1  mrg   (__builtin_ia32_sqrtsh_mask_round ((B), (A),			\
   1707  1.1  mrg 				     _mm_setzero_ph (),		\
   1708  1.1  mrg 				     (__mmask8)-1, (C)))
   1709  1.1  mrg 
   1710  1.1  mrg #define _mm_mask_sqrt_round_sh(A, B, C, D, E)			\
   1711  1.1  mrg   (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
   1712  1.1  mrg 
   1713  1.1  mrg #define _mm_maskz_sqrt_round_sh(A, B, C, D)		\
   1714  1.1  mrg   (__builtin_ia32_sqrtsh_mask_round ((C), (B),		\
   1715  1.1  mrg 				     _mm_setzero_ph (),	\
   1716  1.1  mrg 				     (A), (D)))
   1717  1.1  mrg 
   1718  1.1  mrg #endif /* __OPTIMIZE__ */
   1719  1.1  mrg 
   1720  1.1  mrg /* Intrinsics vrcpph.  */
   1721  1.1  mrg extern __inline __m512h
   1722  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1723  1.1  mrg _mm512_rcp_ph (__m512h __A)
   1724  1.1  mrg {
   1725  1.1  mrg   return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
   1726  1.1  mrg 				       (__mmask32) -1);
   1727  1.1  mrg }
   1728  1.1  mrg 
   1729  1.1  mrg extern __inline __m512h
   1730  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1731  1.1  mrg _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
   1732  1.1  mrg {
   1733  1.1  mrg   return __builtin_ia32_rcpph512_mask (__C, __A, __B);
   1734  1.1  mrg }
   1735  1.1  mrg 
   1736  1.1  mrg extern __inline __m512h
   1737  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1738  1.1  mrg _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
   1739  1.1  mrg {
   1740  1.1  mrg   return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
   1741  1.1  mrg 				       __A);
   1742  1.1  mrg }
   1743  1.1  mrg 
   1744  1.1  mrg /* Intrinsics vrcpsh.  */
   1745  1.1  mrg extern __inline __m128h
   1746  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1747  1.1  mrg _mm_rcp_sh (__m128h __A, __m128h __B)
   1748  1.1  mrg {
   1749  1.1  mrg   return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
   1750  1.1  mrg 				    (__mmask8) -1);
   1751  1.1  mrg }
   1752  1.1  mrg 
   1753  1.1  mrg extern __inline __m128h
   1754  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1755  1.1  mrg _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
   1756  1.1  mrg {
   1757  1.1  mrg   return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
   1758  1.1  mrg }
   1759  1.1  mrg 
   1760  1.1  mrg extern __inline __m128h
   1761  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1762  1.1  mrg _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
   1763  1.1  mrg {
   1764  1.1  mrg   return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
   1765  1.1  mrg 				    __A);
   1766  1.1  mrg }
   1767  1.1  mrg 
   1768  1.1  mrg /* Intrinsics vscalefph.  */
   1769  1.1  mrg extern __inline __m512h
   1770  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1771  1.1  mrg _mm512_scalef_ph (__m512h __A, __m512h __B)
   1772  1.1  mrg {
   1773  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__A, __B,
   1774  1.1  mrg 						_mm512_setzero_ph (),
   1775  1.1  mrg 						(__mmask32) -1,
   1776  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1777  1.1  mrg }
   1778  1.1  mrg 
   1779  1.1  mrg extern __inline __m512h
   1780  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1781  1.1  mrg _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
   1782  1.1  mrg {
   1783  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
   1784  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1785  1.1  mrg }
   1786  1.1  mrg 
   1787  1.1  mrg extern __inline __m512h
   1788  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1789  1.1  mrg _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
   1790  1.1  mrg {
   1791  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__B, __C,
   1792  1.1  mrg 						_mm512_setzero_ph (),
   1793  1.1  mrg 						__A,
   1794  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1795  1.1  mrg }
   1796  1.1  mrg 
   1797  1.1  mrg #ifdef __OPTIMIZE__
   1798  1.1  mrg extern __inline __m512h
   1799  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1800  1.1  mrg _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
   1801  1.1  mrg {
   1802  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__A, __B,
   1803  1.1  mrg 						_mm512_setzero_ph (),
   1804  1.1  mrg 						(__mmask32) -1, __C);
   1805  1.1  mrg }
   1806  1.1  mrg 
   1807  1.1  mrg extern __inline __m512h
   1808  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1809  1.1  mrg _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
   1810  1.1  mrg 			     __m512h __D, const int __E)
   1811  1.1  mrg {
   1812  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
   1813  1.1  mrg 						__E);
   1814  1.1  mrg }
   1815  1.1  mrg 
   1816  1.1  mrg extern __inline __m512h
   1817  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1818  1.1  mrg _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
   1819  1.1  mrg 			      const int __D)
   1820  1.1  mrg {
   1821  1.1  mrg   return __builtin_ia32_scalefph512_mask_round (__B, __C,
   1822  1.1  mrg 						_mm512_setzero_ph (),
   1823  1.1  mrg 						__A, __D);
   1824  1.1  mrg }
   1825  1.1  mrg 
   1826  1.1  mrg #else
   1827  1.1  mrg #define _mm512_scalef_round_ph(A, B, C)				\
   1828  1.1  mrg   (__builtin_ia32_scalefph512_mask_round ((A), (B),		\
   1829  1.1  mrg 					  _mm512_setzero_ph (),	\
   1830  1.1  mrg 					  (__mmask32)-1, (C)))
   1831  1.1  mrg 
   1832  1.1  mrg #define _mm512_mask_scalef_round_ph(A, B, C, D, E)			\
   1833  1.1  mrg   (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
   1834  1.1  mrg 
   1835  1.1  mrg #define _mm512_maskz_scalef_round_ph(A, B, C, D)		\
   1836  1.1  mrg   (__builtin_ia32_scalefph512_mask_round ((B), (C),		\
   1837  1.1  mrg 					  _mm512_setzero_ph (),	\
   1838  1.1  mrg 					  (A), (D)))
   1839  1.1  mrg 
   1840  1.1  mrg #endif  /* __OPTIMIZE__ */
   1841  1.1  mrg 
   1842  1.1  mrg /* Intrinsics vscalefsh.  */
   1843  1.1  mrg extern __inline __m128h
   1844  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1845  1.1  mrg _mm_scalef_sh (__m128h __A, __m128h __B)
   1846  1.1  mrg {
   1847  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__A, __B,
   1848  1.1  mrg 					     _mm_setzero_ph (),
   1849  1.1  mrg 					     (__mmask8) -1,
   1850  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   1851  1.1  mrg }
   1852  1.1  mrg 
   1853  1.1  mrg extern __inline __m128h
   1854  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1855  1.1  mrg _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   1856  1.1  mrg {
   1857  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
   1858  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   1859  1.1  mrg }
   1860  1.1  mrg 
   1861  1.1  mrg extern __inline __m128h
   1862  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1863  1.1  mrg _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
   1864  1.1  mrg {
   1865  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__B, __C,
   1866  1.1  mrg 					     _mm_setzero_ph (),
   1867  1.1  mrg 					     __A,
   1868  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   1869  1.1  mrg }
   1870  1.1  mrg 
   1871  1.1  mrg #ifdef __OPTIMIZE__
   1872  1.1  mrg extern __inline __m128h
   1873  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1874  1.1  mrg _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
   1875  1.1  mrg {
   1876  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__A, __B,
   1877  1.1  mrg 					     _mm_setzero_ph (),
   1878  1.1  mrg 					     (__mmask8) -1, __C);
   1879  1.1  mrg }
   1880  1.1  mrg 
   1881  1.1  mrg extern __inline __m128h
   1882  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1883  1.1  mrg _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   1884  1.1  mrg 			  __m128h __D, const int __E)
   1885  1.1  mrg {
   1886  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
   1887  1.1  mrg 					     __E);
   1888  1.1  mrg }
   1889  1.1  mrg 
   1890  1.1  mrg extern __inline __m128h
   1891  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1892  1.1  mrg _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   1893  1.1  mrg 			   const int __D)
   1894  1.1  mrg {
   1895  1.1  mrg   return __builtin_ia32_scalefsh_mask_round (__B, __C,
   1896  1.1  mrg 					     _mm_setzero_ph (),
   1897  1.1  mrg 					     __A, __D);
   1898  1.1  mrg }
   1899  1.1  mrg 
   1900  1.1  mrg #else
   1901  1.1  mrg #define _mm_scalef_round_sh(A, B, C)				\
   1902  1.1  mrg   (__builtin_ia32_scalefsh_mask_round ((A), (B),		\
   1903  1.1  mrg 				       _mm_setzero_ph (),	\
   1904  1.1  mrg 				       (__mmask8)-1, (C)))
   1905  1.1  mrg 
   1906  1.1  mrg #define _mm_mask_scalef_round_sh(A, B, C, D, E)				\
   1907  1.1  mrg   (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
   1908  1.1  mrg 
   1909  1.1  mrg #define _mm_maskz_scalef_round_sh(A, B, C, D)				\
   1910  1.1  mrg   (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (),	\
   1911  1.1  mrg 				       (A), (D)))
   1912  1.1  mrg 
   1913  1.1  mrg #endif /* __OPTIMIZE__ */
   1914  1.1  mrg 
   1915  1.1  mrg /* Intrinsics vreduceph.  */
   1916  1.1  mrg #ifdef __OPTIMIZE__
   1917  1.1  mrg extern __inline __m512h
   1918  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1919  1.1  mrg _mm512_reduce_ph (__m512h __A, int __B)
   1920  1.1  mrg {
   1921  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__A, __B,
   1922  1.1  mrg 						_mm512_setzero_ph (),
   1923  1.1  mrg 						(__mmask32) -1,
   1924  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1925  1.1  mrg }
   1926  1.1  mrg 
   1927  1.1  mrg extern __inline __m512h
   1928  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1929  1.1  mrg _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
   1930  1.1  mrg {
   1931  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
   1932  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1933  1.1  mrg }
   1934  1.1  mrg 
   1935  1.1  mrg extern __inline __m512h
   1936  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1937  1.1  mrg _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
   1938  1.1  mrg {
   1939  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__B, __C,
   1940  1.1  mrg 						_mm512_setzero_ph (),
   1941  1.1  mrg 						__A,
   1942  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   1943  1.1  mrg }
   1944  1.1  mrg 
   1945  1.1  mrg extern __inline __m512h
   1946  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1947  1.1  mrg _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
   1948  1.1  mrg {
   1949  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__A, __B,
   1950  1.1  mrg 						_mm512_setzero_ph (),
   1951  1.1  mrg 						(__mmask32) -1, __C);
   1952  1.1  mrg }
   1953  1.1  mrg 
   1954  1.1  mrg extern __inline __m512h
   1955  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1956  1.1  mrg _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
   1957  1.1  mrg 			     int __D, const int __E)
   1958  1.1  mrg {
   1959  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
   1960  1.1  mrg 						__E);
   1961  1.1  mrg }
   1962  1.1  mrg 
   1963  1.1  mrg extern __inline __m512h
   1964  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   1965  1.1  mrg _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
   1966  1.1  mrg 			      const int __D)
   1967  1.1  mrg {
   1968  1.1  mrg   return __builtin_ia32_reduceph512_mask_round (__B, __C,
   1969  1.1  mrg 						_mm512_setzero_ph (),
   1970  1.1  mrg 						__A, __D);
   1971  1.1  mrg }
   1972  1.1  mrg 
   1973  1.1  mrg #else
   1974  1.1  mrg #define _mm512_reduce_ph(A, B)						\
   1975  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((A), (B),			\
   1976  1.1  mrg 					  _mm512_setzero_ph (),		\
   1977  1.1  mrg 					  (__mmask32)-1,		\
   1978  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION))
   1979  1.1  mrg 
   1980  1.1  mrg #define _mm512_mask_reduce_ph(A, B, C, D)				\
   1981  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B),		\
   1982  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION))
   1983  1.1  mrg 
   1984  1.1  mrg #define _mm512_maskz_reduce_ph(A, B, C)					\
   1985  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((B), (C),			\
   1986  1.1  mrg 					  _mm512_setzero_ph (),		\
   1987  1.1  mrg 					  (A), _MM_FROUND_CUR_DIRECTION))
   1988  1.1  mrg 
   1989  1.1  mrg #define _mm512_reduce_round_ph(A, B, C)				\
   1990  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((A), (B),		\
   1991  1.1  mrg 					  _mm512_setzero_ph (),	\
   1992  1.1  mrg 					  (__mmask32)-1, (C)))
   1993  1.1  mrg 
   1994  1.1  mrg #define _mm512_mask_reduce_round_ph(A, B, C, D, E)			\
   1995  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
   1996  1.1  mrg 
   1997  1.1  mrg #define _mm512_maskz_reduce_round_ph(A, B, C, D)		\
   1998  1.1  mrg   (__builtin_ia32_reduceph512_mask_round ((B), (C),		\
   1999  1.1  mrg 					  _mm512_setzero_ph (),	\
   2000  1.1  mrg 					  (A), (D)))
   2001  1.1  mrg 
   2002  1.1  mrg #endif /* __OPTIMIZE__ */
   2003  1.1  mrg 
   2004  1.1  mrg /* Intrinsics vreducesh.  */
   2005  1.1  mrg #ifdef __OPTIMIZE__
   2006  1.1  mrg extern __inline __m128h
   2007  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2008  1.1  mrg _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
   2009  1.1  mrg {
   2010  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
   2011  1.1  mrg 					     _mm_setzero_ph (),
   2012  1.1  mrg 					     (__mmask8) -1,
   2013  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2014  1.1  mrg }
   2015  1.1  mrg 
   2016  1.1  mrg extern __inline __m128h
   2017  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2018  1.1  mrg _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
   2019  1.1  mrg 		    __m128h __D, int __E)
   2020  1.1  mrg {
   2021  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
   2022  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2023  1.1  mrg }
   2024  1.1  mrg 
   2025  1.1  mrg extern __inline __m128h
   2026  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2027  1.1  mrg _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
   2028  1.1  mrg {
   2029  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
   2030  1.1  mrg 					     _mm_setzero_ph (), __A,
   2031  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2032  1.1  mrg }
   2033  1.1  mrg 
   2034  1.1  mrg extern __inline __m128h
   2035  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2036  1.1  mrg _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
   2037  1.1  mrg {
   2038  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
   2039  1.1  mrg 					     _mm_setzero_ph (),
   2040  1.1  mrg 					     (__mmask8) -1, __D);
   2041  1.1  mrg }
   2042  1.1  mrg 
   2043  1.1  mrg extern __inline __m128h
   2044  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2045  1.1  mrg _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   2046  1.1  mrg 			  __m128h __D, int __E, const int __F)
   2047  1.1  mrg {
   2048  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
   2049  1.1  mrg 					     __B, __F);
   2050  1.1  mrg }
   2051  1.1  mrg 
   2052  1.1  mrg extern __inline __m128h
   2053  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2054  1.1  mrg _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   2055  1.1  mrg 			   int __D, const int __E)
   2056  1.1  mrg {
   2057  1.1  mrg   return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
   2058  1.1  mrg 					     _mm_setzero_ph (),
   2059  1.1  mrg 					     __A, __E);
   2060  1.1  mrg }
   2061  1.1  mrg 
   2062  1.1  mrg #else
   2063  1.1  mrg #define _mm_reduce_sh(A, B, C)						\
   2064  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((A), (B), (C),			\
   2065  1.1  mrg 				       _mm_setzero_ph (),		\
   2066  1.1  mrg 				       (__mmask8)-1,			\
   2067  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION))
   2068  1.1  mrg 
   2069  1.1  mrg #define _mm_mask_reduce_sh(A, B, C, D, E)				\
   2070  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B),		\
   2071  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION))
   2072  1.1  mrg 
   2073  1.1  mrg #define _mm_maskz_reduce_sh(A, B, C, D)					\
   2074  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((B), (C), (D),			\
   2075  1.1  mrg 				       _mm_setzero_ph (),		\
   2076  1.1  mrg 				       (A), _MM_FROUND_CUR_DIRECTION))
   2077  1.1  mrg 
   2078  1.1  mrg #define _mm_reduce_round_sh(A, B, C, D)				\
   2079  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((A), (B), (C),		\
   2080  1.1  mrg 				       _mm_setzero_ph (),	\
   2081  1.1  mrg 				       (__mmask8)-1, (D)))
   2082  1.1  mrg 
   2083  1.1  mrg #define _mm_mask_reduce_round_sh(A, B, C, D, E, F)			\
   2084  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
   2085  1.1  mrg 
   2086  1.1  mrg #define _mm_maskz_reduce_round_sh(A, B, C, D, E)		\
   2087  1.1  mrg   (__builtin_ia32_reducesh_mask_round ((B), (C), (D),		\
   2088  1.1  mrg 				       _mm_setzero_ph (),	\
   2089  1.1  mrg 				       (A), (E)))
   2090  1.1  mrg 
   2091  1.1  mrg #endif /* __OPTIMIZE__ */
   2092  1.1  mrg 
   2093  1.1  mrg /* Intrinsics vrndscaleph.  */
   2094  1.1  mrg #ifdef __OPTIMIZE__
   2095  1.1  mrg extern __inline __m512h
   2096  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2097  1.1  mrg _mm512_roundscale_ph (__m512h __A, int __B)
   2098  1.1  mrg {
   2099  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
   2100  1.1  mrg 						  _mm512_setzero_ph (),
   2101  1.1  mrg 						  (__mmask32) -1,
   2102  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   2103  1.1  mrg }
   2104  1.1  mrg 
   2105  1.1  mrg extern __inline __m512h
   2106  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2107  1.1  mrg _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
   2108  1.1  mrg 			   __m512h __C, int __D)
   2109  1.1  mrg {
   2110  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
   2111  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   2112  1.1  mrg }
   2113  1.1  mrg 
   2114  1.1  mrg extern __inline __m512h
   2115  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2116  1.1  mrg _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
   2117  1.1  mrg {
   2118  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
   2119  1.1  mrg 						  _mm512_setzero_ph (),
   2120  1.1  mrg 						  __A,
   2121  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   2122  1.1  mrg }
   2123  1.1  mrg 
   2124  1.1  mrg extern __inline __m512h
   2125  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2126  1.1  mrg _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
   2127  1.1  mrg {
   2128  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
   2129  1.1  mrg 						  _mm512_setzero_ph (),
   2130  1.1  mrg 						  (__mmask32) -1,
   2131  1.1  mrg 						  __C);
   2132  1.1  mrg }
   2133  1.1  mrg 
   2134  1.1  mrg extern __inline __m512h
   2135  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2136  1.1  mrg _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
   2137  1.1  mrg 				 __m512h __C, int __D, const int __E)
   2138  1.1  mrg {
   2139  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
   2140  1.1  mrg 						  __B, __E);
   2141  1.1  mrg }
   2142  1.1  mrg 
   2143  1.1  mrg extern __inline __m512h
   2144  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2145  1.1  mrg _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
   2146  1.1  mrg 				  const int __D)
   2147  1.1  mrg {
   2148  1.1  mrg   return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
   2149  1.1  mrg 						  _mm512_setzero_ph (),
   2150  1.1  mrg 						  __A, __D);
   2151  1.1  mrg }
   2152  1.1  mrg 
   2153  1.1  mrg #else
   2154  1.1  mrg #define _mm512_roundscale_ph(A, B)					\
   2155  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
   2156  1.1  mrg 					    _mm512_setzero_ph (),	\
   2157  1.1  mrg 					    (__mmask32)-1,		\
   2158  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION))
   2159  1.1  mrg 
   2160  1.1  mrg #define _mm512_mask_roundscale_ph(A, B, C, D)				\
   2161  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B),		\
   2162  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION))
   2163  1.1  mrg 
   2164  1.1  mrg #define _mm512_maskz_roundscale_ph(A, B, C)				\
   2165  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
   2166  1.1  mrg 					    _mm512_setzero_ph (),	\
   2167  1.1  mrg 					    (A),			\
   2168  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION))
   2169  1.1  mrg #define _mm512_roundscale_round_ph(A, B, C)				\
   2170  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
   2171  1.1  mrg 					    _mm512_setzero_ph (),	\
   2172  1.1  mrg 					    (__mmask32)-1, (C)))
   2173  1.1  mrg 
   2174  1.1  mrg #define _mm512_mask_roundscale_round_ph(A, B, C, D, E)			\
   2175  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
   2176  1.1  mrg 
   2177  1.1  mrg #define _mm512_maskz_roundscale_round_ph(A, B, C, D)			\
   2178  1.1  mrg   (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
   2179  1.1  mrg 					    _mm512_setzero_ph (),	\
   2180  1.1  mrg 					    (A), (D)))
   2181  1.1  mrg 
   2182  1.1  mrg #endif /* __OPTIMIZE__ */
   2183  1.1  mrg 
   2184  1.1  mrg /* Intrinsics vrndscalesh.  */
   2185  1.1  mrg #ifdef __OPTIMIZE__
   2186  1.1  mrg extern __inline __m128h
   2187  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2188  1.1  mrg _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
   2189  1.1  mrg {
   2190  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
   2191  1.1  mrg 					       _mm_setzero_ph (),
   2192  1.1  mrg 					       (__mmask8) -1,
   2193  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   2194  1.1  mrg }
   2195  1.1  mrg 
   2196  1.1  mrg extern __inline __m128h
   2197  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2198  1.1  mrg _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
   2199  1.1  mrg 			__m128h __D, int __E)
   2200  1.1  mrg {
   2201  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
   2202  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   2203  1.1  mrg }
   2204  1.1  mrg 
   2205  1.1  mrg extern __inline __m128h
   2206  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2207  1.1  mrg _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
   2208  1.1  mrg {
   2209  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
   2210  1.1  mrg 					       _mm_setzero_ph (), __A,
   2211  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   2212  1.1  mrg }
   2213  1.1  mrg 
   2214  1.1  mrg extern __inline __m128h
   2215  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2216  1.1  mrg _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
   2217  1.1  mrg {
   2218  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
   2219  1.1  mrg 					       _mm_setzero_ph (),
   2220  1.1  mrg 					       (__mmask8) -1,
   2221  1.1  mrg 					       __D);
   2222  1.1  mrg }
   2223  1.1  mrg 
   2224  1.1  mrg extern __inline __m128h
   2225  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2226  1.1  mrg _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
   2227  1.1  mrg 			      __m128h __D, int __E, const int __F)
   2228  1.1  mrg {
   2229  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
   2230  1.1  mrg 					       __A, __B, __F);
   2231  1.1  mrg }
   2232  1.1  mrg 
   2233  1.1  mrg extern __inline __m128h
   2234  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2235  1.1  mrg _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
   2236  1.1  mrg 			       int __D, const int __E)
   2237  1.1  mrg {
   2238  1.1  mrg   return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
   2239  1.1  mrg 					       _mm_setzero_ph (),
   2240  1.1  mrg 					       __A, __E);
   2241  1.1  mrg }
   2242  1.1  mrg 
   2243  1.1  mrg #else
   2244  1.1  mrg #define _mm_roundscale_sh(A, B, C)					\
   2245  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),			\
   2246  1.1  mrg 					 _mm_setzero_ph (),		\
   2247  1.1  mrg 					 (__mmask8)-1,			\
   2248  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION))
   2249  1.1  mrg 
   2250  1.1  mrg #define _mm_mask_roundscale_sh(A, B, C, D, E)				\
   2251  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B),	\
   2252  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION))
   2253  1.1  mrg 
   2254  1.1  mrg #define _mm_maskz_roundscale_sh(A, B, C, D)				\
   2255  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),			\
   2256  1.1  mrg 					 _mm_setzero_ph (),		\
   2257  1.1  mrg 					 (A), _MM_FROUND_CUR_DIRECTION))
   2258  1.1  mrg 
   2259  1.1  mrg #define _mm_roundscale_round_sh(A, B, C, D)			\
   2260  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),		\
   2261  1.1  mrg 					 _mm_setzero_ph (),	\
   2262  1.1  mrg 					 (__mmask8)-1, (D)))
   2263  1.1  mrg 
   2264  1.1  mrg #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F)			\
   2265  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
   2266  1.1  mrg 
   2267  1.1  mrg #define _mm_maskz_roundscale_round_sh(A, B, C, D, E)		\
   2268  1.1  mrg   (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),		\
   2269  1.1  mrg 					 _mm_setzero_ph (),	\
   2270  1.1  mrg 					 (A), (E)))
   2271  1.1  mrg 
   2272  1.1  mrg #endif /* __OPTIMIZE__ */
   2273  1.1  mrg 
   2274  1.1  mrg /* Intrinsics vfpclasssh.  */
   2275  1.1  mrg #ifdef __OPTIMIZE__
   2276  1.1  mrg extern __inline __mmask8
   2277  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2278  1.1  mrg _mm_fpclass_sh_mask (__m128h __A, const int __imm)
   2279  1.1  mrg {
   2280  1.1  mrg   return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
   2281  1.1  mrg 						   (__mmask8) -1);
   2282  1.1  mrg }
   2283  1.1  mrg 
   2284  1.1  mrg extern __inline __mmask8
   2285  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2286  1.1  mrg _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
   2287  1.1  mrg {
   2288  1.1  mrg   return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
   2289  1.1  mrg }
   2290  1.1  mrg 
   2291  1.1  mrg #else
   2292  1.1  mrg #define _mm_fpclass_sh_mask(X, C)					\
   2293  1.1  mrg   ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X),	\
   2294  1.1  mrg 					     (int) (C), (__mmask8) (-1))) \
   2295  1.1  mrg 
   2296  1.1  mrg #define _mm_mask_fpclass_sh_mask(U, X, C)				\
   2297  1.1  mrg   ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X),	\
   2298  1.1  mrg 					     (int) (C), (__mmask8) (U)))
   2299  1.1  mrg #endif /* __OPTIMIZE__ */
   2300  1.1  mrg 
   2301  1.1  mrg /* Intrinsics vfpclassph.  */
   2302  1.1  mrg #ifdef __OPTIMIZE__
   2303  1.1  mrg extern __inline __mmask32
   2304  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2305  1.1  mrg _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
   2306  1.1  mrg 			     const int __imm)
   2307  1.1  mrg {
   2308  1.1  mrg   return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
   2309  1.1  mrg 						       __imm, __U);
   2310  1.1  mrg }
   2311  1.1  mrg 
   2312  1.1  mrg extern __inline __mmask32
   2313  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2314  1.1  mrg _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
   2315  1.1  mrg {
   2316  1.1  mrg   return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
   2317  1.1  mrg 						       __imm,
   2318  1.1  mrg 						       (__mmask32) -1);
   2319  1.1  mrg }
   2320  1.1  mrg 
   2321  1.1  mrg #else
   2322  1.1  mrg #define _mm512_mask_fpclass_ph_mask(u, x, c)				\
   2323  1.1  mrg   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
   2324  1.1  mrg 						 (int) (c),(__mmask32)(u)))
   2325  1.1  mrg 
   2326  1.1  mrg #define _mm512_fpclass_ph_mask(x, c)                                    \
   2327  1.1  mrg   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
   2328  1.1  mrg 						 (int) (c),(__mmask32)-1))
   2329  1.1  mrg #endif /* __OPIMTIZE__ */
   2330  1.1  mrg 
   2331  1.1  mrg /* Intrinsics vgetexpph, vgetexpsh.  */
   2332  1.1  mrg extern __inline __m128h
   2333  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2334  1.1  mrg _mm_getexp_sh (__m128h __A, __m128h __B)
   2335  1.1  mrg {
   2336  1.1  mrg   return (__m128h)
   2337  1.1  mrg     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2338  1.1  mrg 					(__v8hf) _mm_setzero_ph (),
   2339  1.1  mrg 					(__mmask8) -1,
   2340  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   2341  1.1  mrg }
   2342  1.1  mrg 
   2343  1.1  mrg extern __inline __m128h
   2344  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2345  1.1  mrg _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
   2346  1.1  mrg {
   2347  1.1  mrg   return (__m128h)
   2348  1.1  mrg     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2349  1.1  mrg 					(__v8hf) __W, (__mmask8) __U,
   2350  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   2351  1.1  mrg }
   2352  1.1  mrg 
   2353  1.1  mrg extern __inline __m128h
   2354  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2355  1.1  mrg _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
   2356  1.1  mrg {
   2357  1.1  mrg   return (__m128h)
   2358  1.1  mrg     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2359  1.1  mrg 					(__v8hf) _mm_setzero_ph (),
   2360  1.1  mrg 					(__mmask8) __U,
   2361  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   2362  1.1  mrg }
   2363  1.1  mrg 
   2364  1.1  mrg extern __inline __m512h
   2365  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2366  1.1  mrg _mm512_getexp_ph (__m512h __A)
   2367  1.1  mrg {
   2368  1.1  mrg   return (__m512h)
   2369  1.1  mrg     __builtin_ia32_getexpph512_mask ((__v32hf) __A,
   2370  1.1  mrg 				     (__v32hf) _mm512_setzero_ph (),
   2371  1.1  mrg 				     (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
   2372  1.1  mrg }
   2373  1.1  mrg 
   2374  1.1  mrg extern __inline __m512h
   2375  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2376  1.1  mrg _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
   2377  1.1  mrg {
   2378  1.1  mrg   return (__m512h)
   2379  1.1  mrg     __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
   2380  1.1  mrg 				     (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
   2381  1.1  mrg }
   2382  1.1  mrg 
   2383  1.1  mrg extern __inline __m512h
   2384  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2385  1.1  mrg _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
   2386  1.1  mrg {
   2387  1.1  mrg   return (__m512h)
   2388  1.1  mrg     __builtin_ia32_getexpph512_mask ((__v32hf) __A,
   2389  1.1  mrg 				     (__v32hf) _mm512_setzero_ph (),
   2390  1.1  mrg 				     (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
   2391  1.1  mrg }
   2392  1.1  mrg 
   2393  1.1  mrg #ifdef __OPTIMIZE__
   2394  1.1  mrg extern __inline __m128h
   2395  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2396  1.1  mrg _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
   2397  1.1  mrg {
   2398  1.1  mrg   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
   2399  1.1  mrg 						       (__v8hf) __B,
   2400  1.1  mrg 						       _mm_setzero_ph (),
   2401  1.1  mrg 						       (__mmask8) -1,
   2402  1.1  mrg 						       __R);
   2403  1.1  mrg }
   2404  1.1  mrg 
   2405  1.1  mrg extern __inline __m128h
   2406  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2407  1.1  mrg _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
   2408  1.1  mrg 			  __m128h __B, const int __R)
   2409  1.1  mrg {
   2410  1.1  mrg   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
   2411  1.1  mrg 						       (__v8hf) __B,
   2412  1.1  mrg 						       (__v8hf) __W,
   2413  1.1  mrg 						       (__mmask8) __U, __R);
   2414  1.1  mrg }
   2415  1.1  mrg 
   2416  1.1  mrg extern __inline __m128h
   2417  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2418  1.1  mrg _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
   2419  1.1  mrg 			   const int __R)
   2420  1.1  mrg {
   2421  1.1  mrg   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
   2422  1.1  mrg 						       (__v8hf) __B,
   2423  1.1  mrg 						       (__v8hf)
   2424  1.1  mrg 						       _mm_setzero_ph (),
   2425  1.1  mrg 						       (__mmask8) __U, __R);
   2426  1.1  mrg }
   2427  1.1  mrg 
   2428  1.1  mrg extern __inline __m512h
   2429  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2430  1.1  mrg _mm512_getexp_round_ph (__m512h __A, const int __R)
   2431  1.1  mrg {
   2432  1.1  mrg   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
   2433  1.1  mrg 						    (__v32hf)
   2434  1.1  mrg 						    _mm512_setzero_ph (),
   2435  1.1  mrg 						    (__mmask32) -1, __R);
   2436  1.1  mrg }
   2437  1.1  mrg 
   2438  1.1  mrg extern __inline __m512h
   2439  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2440  1.1  mrg _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
   2441  1.1  mrg 			     const int __R)
   2442  1.1  mrg {
   2443  1.1  mrg   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
   2444  1.1  mrg 						    (__v32hf) __W,
   2445  1.1  mrg 						    (__mmask32) __U, __R);
   2446  1.1  mrg }
   2447  1.1  mrg 
   2448  1.1  mrg extern __inline __m512h
   2449  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2450  1.1  mrg _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
   2451  1.1  mrg {
   2452  1.1  mrg   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
   2453  1.1  mrg 						    (__v32hf)
   2454  1.1  mrg 						    _mm512_setzero_ph (),
   2455  1.1  mrg 						    (__mmask32) __U, __R);
   2456  1.1  mrg }
   2457  1.1  mrg 
   2458  1.1  mrg #else
   2459  1.1  mrg #define _mm_getexp_round_sh(A, B, R)					\
   2460  1.1  mrg   ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),	\
   2461  1.1  mrg 					       (__v8hf)(__m128h)(B),	\
   2462  1.1  mrg 					       (__v8hf)_mm_setzero_ph(), \
   2463  1.1  mrg 					       (__mmask8)-1, R))
   2464  1.1  mrg 
   2465  1.1  mrg #define _mm_mask_getexp_round_sh(W, U, A, B, C)			\
   2466  1.1  mrg   (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
   2467  1.1  mrg 
   2468  1.1  mrg #define _mm_maskz_getexp_round_sh(U, A, B, C)				\
   2469  1.1  mrg   (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,			\
   2470  1.1  mrg 					      (__v8hf)_mm_setzero_ph(),	\
   2471  1.1  mrg 					      U, C)
   2472  1.1  mrg 
   2473  1.1  mrg #define _mm512_getexp_round_ph(A, R)					\
   2474  1.1  mrg   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
   2475  1.1  mrg 					    (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
   2476  1.1  mrg 
   2477  1.1  mrg #define _mm512_mask_getexp_round_ph(W, U, A, R)				\
   2478  1.1  mrg   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
   2479  1.1  mrg 					    (__v32hf)(__m512h)(W), (__mmask32)(U), R))
   2480  1.1  mrg 
   2481  1.1  mrg #define _mm512_maskz_getexp_round_ph(U, A, R)				\
   2482  1.1  mrg   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
   2483  1.1  mrg 					    (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
   2484  1.1  mrg 
   2485  1.1  mrg #endif /* __OPTIMIZE__ */
   2486  1.1  mrg 
   2487  1.1  mrg /* Intrinsics vgetmantph, vgetmantsh.  */
   2488  1.1  mrg #ifdef __OPTIMIZE__
   2489  1.1  mrg extern __inline __m128h
   2490  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2491  1.1  mrg _mm_getmant_sh (__m128h __A, __m128h __B,
   2492  1.1  mrg 		_MM_MANTISSA_NORM_ENUM __C,
   2493  1.1  mrg 		_MM_MANTISSA_SIGN_ENUM __D)
   2494  1.1  mrg {
   2495  1.1  mrg   return (__m128h)
   2496  1.1  mrg     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2497  1.1  mrg 					 (__D << 2) | __C, _mm_setzero_ph (),
   2498  1.1  mrg 					 (__mmask8) -1,
   2499  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   2500  1.1  mrg }
   2501  1.1  mrg 
   2502  1.1  mrg extern __inline __m128h
   2503  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2504  1.1  mrg _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
   2505  1.1  mrg 		     __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
   2506  1.1  mrg 		     _MM_MANTISSA_SIGN_ENUM __D)
   2507  1.1  mrg {
   2508  1.1  mrg   return (__m128h)
   2509  1.1  mrg     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2510  1.1  mrg 					 (__D << 2) | __C, (__v8hf) __W,
   2511  1.1  mrg 					 __U, _MM_FROUND_CUR_DIRECTION);
   2512  1.1  mrg }
   2513  1.1  mrg 
   2514  1.1  mrg extern __inline __m128h
   2515  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2516  1.1  mrg _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
   2517  1.1  mrg 		      _MM_MANTISSA_NORM_ENUM __C,
   2518  1.1  mrg 		      _MM_MANTISSA_SIGN_ENUM __D)
   2519  1.1  mrg {
   2520  1.1  mrg   return (__m128h)
   2521  1.1  mrg     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
   2522  1.1  mrg 					 (__D << 2) | __C,
   2523  1.1  mrg 					 (__v8hf) _mm_setzero_ph(),
   2524  1.1  mrg 					 __U, _MM_FROUND_CUR_DIRECTION);
   2525  1.1  mrg }
   2526  1.1  mrg 
   2527  1.1  mrg extern __inline __m512h
   2528  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2529  1.1  mrg _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
   2530  1.1  mrg 		   _MM_MANTISSA_SIGN_ENUM __C)
   2531  1.1  mrg {
   2532  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2533  1.1  mrg 						     (__C << 2) | __B,
   2534  1.1  mrg 						     _mm512_setzero_ph (),
   2535  1.1  mrg 						     (__mmask32) -1,
   2536  1.1  mrg 						     _MM_FROUND_CUR_DIRECTION);
   2537  1.1  mrg }
   2538  1.1  mrg 
   2539  1.1  mrg extern __inline __m512h
   2540  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2541  1.1  mrg _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
   2542  1.1  mrg 			_MM_MANTISSA_NORM_ENUM __B,
   2543  1.1  mrg 			_MM_MANTISSA_SIGN_ENUM __C)
   2544  1.1  mrg {
   2545  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2546  1.1  mrg 						     (__C << 2) | __B,
   2547  1.1  mrg 						     (__v32hf) __W, __U,
   2548  1.1  mrg 						     _MM_FROUND_CUR_DIRECTION);
   2549  1.1  mrg }
   2550  1.1  mrg 
   2551  1.1  mrg extern __inline __m512h
   2552  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2553  1.1  mrg _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
   2554  1.1  mrg 			 _MM_MANTISSA_NORM_ENUM __B,
   2555  1.1  mrg 			 _MM_MANTISSA_SIGN_ENUM __C)
   2556  1.1  mrg {
   2557  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2558  1.1  mrg 						     (__C << 2) | __B,
   2559  1.1  mrg 						     (__v32hf)
   2560  1.1  mrg 						     _mm512_setzero_ph (),
   2561  1.1  mrg 						     __U,
   2562  1.1  mrg 						     _MM_FROUND_CUR_DIRECTION);
   2563  1.1  mrg }
   2564  1.1  mrg 
   2565  1.1  mrg extern __inline __m128h
   2566  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2567  1.1  mrg _mm_getmant_round_sh (__m128h __A, __m128h __B,
   2568  1.1  mrg 		      _MM_MANTISSA_NORM_ENUM __C,
   2569  1.1  mrg 		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
   2570  1.1  mrg {
   2571  1.1  mrg   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
   2572  1.1  mrg 							(__v8hf) __B,
   2573  1.1  mrg 							(__D << 2) | __C,
   2574  1.1  mrg 							_mm_setzero_ph (),
   2575  1.1  mrg 							(__mmask8) -1,
   2576  1.1  mrg 							__R);
   2577  1.1  mrg }
   2578  1.1  mrg 
   2579  1.1  mrg extern __inline __m128h
   2580  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2581  1.1  mrg _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
   2582  1.1  mrg 			   __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
   2583  1.1  mrg 			   _MM_MANTISSA_SIGN_ENUM __D, const int __R)
   2584  1.1  mrg {
   2585  1.1  mrg   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
   2586  1.1  mrg 							(__v8hf) __B,
   2587  1.1  mrg 							(__D << 2) | __C,
   2588  1.1  mrg 							(__v8hf) __W,
   2589  1.1  mrg 							__U, __R);
   2590  1.1  mrg }
   2591  1.1  mrg 
   2592  1.1  mrg extern __inline __m128h
   2593  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2594  1.1  mrg _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
   2595  1.1  mrg 			    _MM_MANTISSA_NORM_ENUM __C,
   2596  1.1  mrg 			    _MM_MANTISSA_SIGN_ENUM __D, const int __R)
   2597  1.1  mrg {
   2598  1.1  mrg   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
   2599  1.1  mrg 							(__v8hf) __B,
   2600  1.1  mrg 							(__D << 2) | __C,
   2601  1.1  mrg 							(__v8hf)
   2602  1.1  mrg 							_mm_setzero_ph(),
   2603  1.1  mrg 							__U, __R);
   2604  1.1  mrg }
   2605  1.1  mrg 
   2606  1.1  mrg extern __inline __m512h
   2607  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2608  1.1  mrg _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
   2609  1.1  mrg 			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   2610  1.1  mrg {
   2611  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2612  1.1  mrg 						     (__C << 2) | __B,
   2613  1.1  mrg 						     _mm512_setzero_ph (),
   2614  1.1  mrg 						     (__mmask32) -1, __R);
   2615  1.1  mrg }
   2616  1.1  mrg 
   2617  1.1  mrg extern __inline __m512h
   2618  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2619  1.1  mrg _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
   2620  1.1  mrg 			      _MM_MANTISSA_NORM_ENUM __B,
   2621  1.1  mrg 			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   2622  1.1  mrg {
   2623  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2624  1.1  mrg 						     (__C << 2) | __B,
   2625  1.1  mrg 						     (__v32hf) __W, __U,
   2626  1.1  mrg 						     __R);
   2627  1.1  mrg }
   2628  1.1  mrg 
   2629  1.1  mrg extern __inline __m512h
   2630  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2631  1.1  mrg _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
   2632  1.1  mrg 			       _MM_MANTISSA_NORM_ENUM __B,
   2633  1.1  mrg 			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
   2634  1.1  mrg {
   2635  1.1  mrg   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
   2636  1.1  mrg 						     (__C << 2) | __B,
   2637  1.1  mrg 						     (__v32hf)
   2638  1.1  mrg 						     _mm512_setzero_ph (),
   2639  1.1  mrg 						     __U, __R);
   2640  1.1  mrg }
   2641  1.1  mrg 
   2642  1.1  mrg #else
   2643  1.1  mrg #define _mm512_getmant_ph(X, B, C)					\
   2644  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2645  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2646  1.1  mrg 					      (__v32hf)(__m512h)	\
   2647  1.1  mrg 					      _mm512_setzero_ph(),	\
   2648  1.1  mrg 					      (__mmask32)-1,		\
   2649  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION))
   2650  1.1  mrg 
   2651  1.1  mrg #define _mm512_mask_getmant_ph(W, U, X, B, C)				\
   2652  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2653  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2654  1.1  mrg 					      (__v32hf)(__m512h)(W),	\
   2655  1.1  mrg 					      (__mmask32)(U),		\
   2656  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION))
   2657  1.1  mrg 
   2658  1.1  mrg 
   2659  1.1  mrg #define _mm512_maskz_getmant_ph(U, X, B, C)				\
   2660  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2661  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2662  1.1  mrg 					      (__v32hf)(__m512h)	\
   2663  1.1  mrg 					      _mm512_setzero_ph(),	\
   2664  1.1  mrg 					      (__mmask32)(U),		\
   2665  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION))
   2666  1.1  mrg 
   2667  1.1  mrg #define _mm_getmant_sh(X, Y, C, D)					\
   2668  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2669  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2670  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2671  1.1  mrg 						 (__v8hf)(__m128h)	\
   2672  1.1  mrg 						 _mm_setzero_ph (),	\
   2673  1.1  mrg 						 (__mmask8)-1,		\
   2674  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION))
   2675  1.1  mrg 
   2676  1.1  mrg #define _mm_mask_getmant_sh(W, U, X, Y, C, D)				\
   2677  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2678  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2679  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2680  1.1  mrg 						 (__v8hf)(__m128h)(W),	\
   2681  1.1  mrg 						 (__mmask8)(U),		\
   2682  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION))
   2683  1.1  mrg 
   2684  1.1  mrg #define _mm_maskz_getmant_sh(U, X, Y, C, D)				\
   2685  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2686  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2687  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2688  1.1  mrg 						 (__v8hf)(__m128h)	\
   2689  1.1  mrg 						 _mm_setzero_ph(),	\
   2690  1.1  mrg 						 (__mmask8)(U),		\
   2691  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION))
   2692  1.1  mrg 
   2693  1.1  mrg #define _mm512_getmant_round_ph(X, B, C, R)				\
   2694  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2695  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2696  1.1  mrg 					      (__v32hf)(__m512h)	\
   2697  1.1  mrg 					      _mm512_setzero_ph(),	\
   2698  1.1  mrg 					      (__mmask32)-1,		\
   2699  1.1  mrg 					      (R)))
   2700  1.1  mrg 
   2701  1.1  mrg #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R)			\
   2702  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2703  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2704  1.1  mrg 					      (__v32hf)(__m512h)(W),	\
   2705  1.1  mrg 					      (__mmask32)(U),		\
   2706  1.1  mrg 					      (R)))
   2707  1.1  mrg 
   2708  1.1  mrg 
   2709  1.1  mrg #define _mm512_maskz_getmant_round_ph(U, X, B, C, R)			\
   2710  1.1  mrg   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
   2711  1.1  mrg 					      (int)(((C)<<2) | (B)),	\
   2712  1.1  mrg 					      (__v32hf)(__m512h)	\
   2713  1.1  mrg 					      _mm512_setzero_ph(),	\
   2714  1.1  mrg 					      (__mmask32)(U),		\
   2715  1.1  mrg 					      (R)))
   2716  1.1  mrg 
   2717  1.1  mrg #define _mm_getmant_round_sh(X, Y, C, D, R)				\
   2718  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2719  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2720  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2721  1.1  mrg 						 (__v8hf)(__m128h)	\
   2722  1.1  mrg 						 _mm_setzero_ph (),	\
   2723  1.1  mrg 						 (__mmask8)-1,		\
   2724  1.1  mrg 						 (R)))
   2725  1.1  mrg 
   2726  1.1  mrg #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R)			\
   2727  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2728  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2729  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2730  1.1  mrg 						 (__v8hf)(__m128h)(W),	\
   2731  1.1  mrg 						 (__mmask8)(U),		\
   2732  1.1  mrg 						 (R)))
   2733  1.1  mrg 
   2734  1.1  mrg #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R)			\
   2735  1.1  mrg   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
   2736  1.1  mrg 						 (__v8hf)(__m128h)(Y),	\
   2737  1.1  mrg 						 (int)(((D)<<2) | (C)),	\
   2738  1.1  mrg 						 (__v8hf)(__m128h)	\
   2739  1.1  mrg 						 _mm_setzero_ph(),	\
   2740  1.1  mrg 						 (__mmask8)(U),		\
   2741  1.1  mrg 						 (R)))
   2742  1.1  mrg 
   2743  1.1  mrg #endif /* __OPTIMIZE__ */
   2744  1.1  mrg 
   2745  1.1  mrg /* Intrinsics vmovw.  */
   2746  1.1  mrg extern __inline __m128i
   2747  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2748  1.1  mrg _mm_cvtsi16_si128 (short __A)
   2749  1.1  mrg {
   2750  1.1  mrg   return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
   2751  1.1  mrg }
   2752  1.1  mrg 
   2753  1.1  mrg extern __inline short
   2754  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2755  1.1  mrg _mm_cvtsi128_si16 (__m128i __A)
   2756  1.1  mrg {
   2757  1.1  mrg   return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
   2758  1.1  mrg }
   2759  1.1  mrg 
   2760  1.1  mrg /* Intrinsics vmovsh.  */
   2761  1.1  mrg extern __inline __m128h
   2762  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2763  1.1  mrg _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
   2764  1.1  mrg {
   2765  1.1  mrg   return __builtin_ia32_loadsh_mask (__C, __A, __B);
   2766  1.1  mrg }
   2767  1.1  mrg 
   2768  1.1  mrg extern __inline __m128h
   2769  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2770  1.1  mrg _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
   2771  1.1  mrg {
   2772  1.1  mrg   return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
   2773  1.1  mrg }
   2774  1.1  mrg 
   2775  1.1  mrg extern __inline void
   2776  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2777  1.1  mrg _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
   2778  1.1  mrg {
   2779  1.1  mrg   __builtin_ia32_storesh_mask (__A,  __C, __B);
   2780  1.1  mrg }
   2781  1.1  mrg 
   2782  1.1  mrg extern __inline __m128h
   2783  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2784  1.1  mrg _mm_move_sh (__m128h __A, __m128h  __B)
   2785  1.1  mrg {
   2786  1.1  mrg   __A[0] = __B[0];
   2787  1.1  mrg   return __A;
   2788  1.1  mrg }
   2789  1.1  mrg 
   2790  1.1  mrg extern __inline __m128h
   2791  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2792  1.1  mrg _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h  __C, __m128h __D)
   2793  1.1  mrg {
   2794  1.1  mrg   return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
   2795  1.1  mrg }
   2796  1.1  mrg 
   2797  1.1  mrg extern __inline __m128h
   2798  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2799  1.1  mrg _mm_maskz_move_sh (__mmask8 __A, __m128h  __B, __m128h __C)
   2800  1.1  mrg {
   2801  1.1  mrg   return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
   2802  1.1  mrg }
   2803  1.1  mrg 
   2804  1.1  mrg /* Intrinsics vcvtph2dq.  */
   2805  1.1  mrg extern __inline __m512i
   2806  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2807  1.1  mrg _mm512_cvtph_epi32 (__m256h __A)
   2808  1.1  mrg {
   2809  1.1  mrg   return (__m512i)
   2810  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__A,
   2811  1.1  mrg 					    (__v16si)
   2812  1.1  mrg 					    _mm512_setzero_si512 (),
   2813  1.1  mrg 					    (__mmask16) -1,
   2814  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   2815  1.1  mrg }
   2816  1.1  mrg 
   2817  1.1  mrg extern __inline __m512i
   2818  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2819  1.1  mrg _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
   2820  1.1  mrg {
   2821  1.1  mrg   return (__m512i)
   2822  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__C,
   2823  1.1  mrg 					    (__v16si) __A,
   2824  1.1  mrg 					    __B,
   2825  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   2826  1.1  mrg }
   2827  1.1  mrg 
   2828  1.1  mrg extern __inline __m512i
   2829  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2830  1.1  mrg _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
   2831  1.1  mrg {
   2832  1.1  mrg   return (__m512i)
   2833  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__B,
   2834  1.1  mrg 					    (__v16si)
   2835  1.1  mrg 					    _mm512_setzero_si512 (),
   2836  1.1  mrg 					    __A,
   2837  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   2838  1.1  mrg }
   2839  1.1  mrg 
   2840  1.1  mrg #ifdef __OPTIMIZE__
   2841  1.1  mrg extern __inline __m512i
   2842  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2843  1.1  mrg _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
   2844  1.1  mrg {
   2845  1.1  mrg   return (__m512i)
   2846  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__A,
   2847  1.1  mrg 					    (__v16si)
   2848  1.1  mrg 					    _mm512_setzero_si512 (),
   2849  1.1  mrg 					    (__mmask16) -1,
   2850  1.1  mrg 					    __B);
   2851  1.1  mrg }
   2852  1.1  mrg 
   2853  1.1  mrg extern __inline __m512i
   2854  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2855  1.1  mrg _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
   2856  1.1  mrg {
   2857  1.1  mrg   return (__m512i)
   2858  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__C,
   2859  1.1  mrg 					    (__v16si) __A,
   2860  1.1  mrg 					    __B,
   2861  1.1  mrg 					    __D);
   2862  1.1  mrg }
   2863  1.1  mrg 
   2864  1.1  mrg extern __inline __m512i
   2865  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2866  1.1  mrg _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
   2867  1.1  mrg {
   2868  1.1  mrg   return (__m512i)
   2869  1.1  mrg     __builtin_ia32_vcvtph2dq512_mask_round (__B,
   2870  1.1  mrg 					    (__v16si)
   2871  1.1  mrg 					    _mm512_setzero_si512 (),
   2872  1.1  mrg 					    __A,
   2873  1.1  mrg 					    __C);
   2874  1.1  mrg }
   2875  1.1  mrg 
   2876  1.1  mrg #else
   2877  1.1  mrg #define _mm512_cvt_roundph_epi32(A, B)					\
   2878  1.1  mrg   ((__m512i)								\
   2879  1.1  mrg    __builtin_ia32_vcvtph2dq512_mask_round ((A),				\
   2880  1.1  mrg 					   (__v16si)			\
   2881  1.1  mrg 					   _mm512_setzero_si512 (),	\
   2882  1.1  mrg 					   (__mmask16)-1,		\
   2883  1.1  mrg 					   (B)))
   2884  1.1  mrg 
   2885  1.1  mrg #define _mm512_mask_cvt_roundph_epi32(A, B, C, D)			\
   2886  1.1  mrg   ((__m512i)								\
   2887  1.1  mrg    __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
   2888  1.1  mrg 
   2889  1.1  mrg #define _mm512_maskz_cvt_roundph_epi32(A, B, C)				\
   2890  1.1  mrg   ((__m512i)								\
   2891  1.1  mrg    __builtin_ia32_vcvtph2dq512_mask_round ((B),				\
   2892  1.1  mrg 					   (__v16si)			\
   2893  1.1  mrg 					   _mm512_setzero_si512 (),	\
   2894  1.1  mrg 					   (A),				\
   2895  1.1  mrg 					   (C)))
   2896  1.1  mrg 
   2897  1.1  mrg #endif /* __OPTIMIZE__ */
   2898  1.1  mrg 
   2899  1.1  mrg /* Intrinsics vcvtph2udq.  */
   2900  1.1  mrg extern __inline __m512i
   2901  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2902  1.1  mrg _mm512_cvtph_epu32 (__m256h __A)
   2903  1.1  mrg {
   2904  1.1  mrg   return (__m512i)
   2905  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__A,
   2906  1.1  mrg 					     (__v16si)
   2907  1.1  mrg 					     _mm512_setzero_si512 (),
   2908  1.1  mrg 					     (__mmask16) -1,
   2909  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2910  1.1  mrg }
   2911  1.1  mrg 
   2912  1.1  mrg extern __inline __m512i
   2913  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2914  1.1  mrg _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
   2915  1.1  mrg {
   2916  1.1  mrg   return (__m512i)
   2917  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__C,
   2918  1.1  mrg 					     (__v16si) __A,
   2919  1.1  mrg 					     __B,
   2920  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2921  1.1  mrg }
   2922  1.1  mrg 
   2923  1.1  mrg extern __inline __m512i
   2924  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2925  1.1  mrg _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
   2926  1.1  mrg {
   2927  1.1  mrg   return (__m512i)
   2928  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__B,
   2929  1.1  mrg 					     (__v16si)
   2930  1.1  mrg 					     _mm512_setzero_si512 (),
   2931  1.1  mrg 					     __A,
   2932  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   2933  1.1  mrg }
   2934  1.1  mrg 
   2935  1.1  mrg #ifdef __OPTIMIZE__
   2936  1.1  mrg extern __inline __m512i
   2937  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2938  1.1  mrg _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
   2939  1.1  mrg {
   2940  1.1  mrg   return (__m512i)
   2941  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__A,
   2942  1.1  mrg 					     (__v16si)
   2943  1.1  mrg 					     _mm512_setzero_si512 (),
   2944  1.1  mrg 					     (__mmask16) -1,
   2945  1.1  mrg 					     __B);
   2946  1.1  mrg }
   2947  1.1  mrg 
   2948  1.1  mrg extern __inline __m512i
   2949  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2950  1.1  mrg _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
   2951  1.1  mrg {
   2952  1.1  mrg   return (__m512i)
   2953  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__C,
   2954  1.1  mrg 					     (__v16si) __A,
   2955  1.1  mrg 					     __B,
   2956  1.1  mrg 					     __D);
   2957  1.1  mrg }
   2958  1.1  mrg 
   2959  1.1  mrg extern __inline __m512i
   2960  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2961  1.1  mrg _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
   2962  1.1  mrg {
   2963  1.1  mrg   return (__m512i)
   2964  1.1  mrg     __builtin_ia32_vcvtph2udq512_mask_round (__B,
   2965  1.1  mrg 					     (__v16si)
   2966  1.1  mrg 					     _mm512_setzero_si512 (),
   2967  1.1  mrg 					     __A,
   2968  1.1  mrg 					     __C);
   2969  1.1  mrg }
   2970  1.1  mrg 
   2971  1.1  mrg #else
   2972  1.1  mrg #define _mm512_cvt_roundph_epu32(A, B)					\
   2973  1.1  mrg   ((__m512i)								\
   2974  1.1  mrg    __builtin_ia32_vcvtph2udq512_mask_round ((A),			\
   2975  1.1  mrg 					    (__v16si)			\
   2976  1.1  mrg 					    _mm512_setzero_si512 (),	\
   2977  1.1  mrg 					    (__mmask16)-1,		\
   2978  1.1  mrg 					    (B)))
   2979  1.1  mrg 
   2980  1.1  mrg #define _mm512_mask_cvt_roundph_epu32(A, B, C, D)			\
   2981  1.1  mrg   ((__m512i)								\
   2982  1.1  mrg    __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
   2983  1.1  mrg 
   2984  1.1  mrg #define _mm512_maskz_cvt_roundph_epu32(A, B, C)				\
   2985  1.1  mrg   ((__m512i)								\
   2986  1.1  mrg    __builtin_ia32_vcvtph2udq512_mask_round ((B),			\
   2987  1.1  mrg 					    (__v16si)			\
   2988  1.1  mrg 					    _mm512_setzero_si512 (),	\
   2989  1.1  mrg 					    (A),			\
   2990  1.1  mrg 					    (C)))
   2991  1.1  mrg 
   2992  1.1  mrg #endif /* __OPTIMIZE__ */
   2993  1.1  mrg 
   2994  1.1  mrg /* Intrinsics vcvttph2dq.  */
   2995  1.1  mrg extern __inline __m512i
   2996  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   2997  1.1  mrg _mm512_cvttph_epi32 (__m256h __A)
   2998  1.1  mrg {
   2999  1.1  mrg   return (__m512i)
   3000  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__A,
   3001  1.1  mrg 					     (__v16si)
   3002  1.1  mrg 					     _mm512_setzero_si512 (),
   3003  1.1  mrg 					     (__mmask16) -1,
   3004  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   3005  1.1  mrg }
   3006  1.1  mrg 
   3007  1.1  mrg extern __inline __m512i
   3008  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3009  1.1  mrg _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
   3010  1.1  mrg {
   3011  1.1  mrg   return (__m512i)
   3012  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__C,
   3013  1.1  mrg 					     (__v16si) __A,
   3014  1.1  mrg 					     __B,
   3015  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   3016  1.1  mrg }
   3017  1.1  mrg 
   3018  1.1  mrg extern __inline __m512i
   3019  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3020  1.1  mrg _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
   3021  1.1  mrg {
   3022  1.1  mrg   return (__m512i)
   3023  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__B,
   3024  1.1  mrg 					     (__v16si)
   3025  1.1  mrg 					     _mm512_setzero_si512 (),
   3026  1.1  mrg 					     __A,
   3027  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   3028  1.1  mrg }
   3029  1.1  mrg 
   3030  1.1  mrg #ifdef __OPTIMIZE__
   3031  1.1  mrg extern __inline __m512i
   3032  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3033  1.1  mrg _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
   3034  1.1  mrg {
   3035  1.1  mrg   return (__m512i)
   3036  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__A,
   3037  1.1  mrg 					     (__v16si)
   3038  1.1  mrg 					     _mm512_setzero_si512 (),
   3039  1.1  mrg 					     (__mmask16) -1,
   3040  1.1  mrg 					     __B);
   3041  1.1  mrg }
   3042  1.1  mrg 
   3043  1.1  mrg extern __inline __m512i
   3044  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3045  1.1  mrg _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
   3046  1.1  mrg 				__m256h __C, int __D)
   3047  1.1  mrg {
   3048  1.1  mrg   return (__m512i)
   3049  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__C,
   3050  1.1  mrg 					     (__v16si) __A,
   3051  1.1  mrg 					     __B,
   3052  1.1  mrg 					     __D);
   3053  1.1  mrg }
   3054  1.1  mrg 
   3055  1.1  mrg extern __inline __m512i
   3056  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3057  1.1  mrg _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
   3058  1.1  mrg {
   3059  1.1  mrg   return (__m512i)
   3060  1.1  mrg     __builtin_ia32_vcvttph2dq512_mask_round (__B,
   3061  1.1  mrg 					     (__v16si)
   3062  1.1  mrg 					     _mm512_setzero_si512 (),
   3063  1.1  mrg 					     __A,
   3064  1.1  mrg 					     __C);
   3065  1.1  mrg }
   3066  1.1  mrg 
   3067  1.1  mrg #else
   3068  1.1  mrg #define _mm512_cvtt_roundph_epi32(A, B)					\
   3069  1.1  mrg   ((__m512i)								\
   3070  1.1  mrg    __builtin_ia32_vcvttph2dq512_mask_round ((A),			\
   3071  1.1  mrg 					    (__v16si)			\
   3072  1.1  mrg 					    (_mm512_setzero_si512 ()),	\
   3073  1.1  mrg 					    (__mmask16)(-1), (B)))
   3074  1.1  mrg 
   3075  1.1  mrg #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D)		\
   3076  1.1  mrg   ((__m512i)							\
   3077  1.1  mrg    __builtin_ia32_vcvttph2dq512_mask_round ((C),		\
   3078  1.1  mrg 					    (__v16si)(A),	\
   3079  1.1  mrg 					    (B),		\
   3080  1.1  mrg 					    (D)))
   3081  1.1  mrg 
   3082  1.1  mrg #define _mm512_maskz_cvtt_roundph_epi32(A, B, C)			\
   3083  1.1  mrg   ((__m512i)								\
   3084  1.1  mrg    __builtin_ia32_vcvttph2dq512_mask_round ((B),			\
   3085  1.1  mrg 					    (__v16si)			\
   3086  1.1  mrg 					    _mm512_setzero_si512 (),	\
   3087  1.1  mrg 					    (A),			\
   3088  1.1  mrg 					    (C)))
   3089  1.1  mrg 
   3090  1.1  mrg #endif /* __OPTIMIZE__ */
   3091  1.1  mrg 
   3092  1.1  mrg /* Intrinsics vcvttph2udq.  */
   3093  1.1  mrg extern __inline __m512i
   3094  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3095  1.1  mrg _mm512_cvttph_epu32 (__m256h __A)
   3096  1.1  mrg {
   3097  1.1  mrg   return (__m512i)
   3098  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__A,
   3099  1.1  mrg 					      (__v16si)
   3100  1.1  mrg 					      _mm512_setzero_si512 (),
   3101  1.1  mrg 					      (__mmask16) -1,
   3102  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3103  1.1  mrg }
   3104  1.1  mrg 
   3105  1.1  mrg extern __inline __m512i
   3106  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3107  1.1  mrg _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
   3108  1.1  mrg {
   3109  1.1  mrg   return (__m512i)
   3110  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__C,
   3111  1.1  mrg 					      (__v16si) __A,
   3112  1.1  mrg 					      __B,
   3113  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3114  1.1  mrg }
   3115  1.1  mrg 
   3116  1.1  mrg extern __inline __m512i
   3117  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3118  1.1  mrg _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
   3119  1.1  mrg {
   3120  1.1  mrg   return (__m512i)
   3121  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__B,
   3122  1.1  mrg 					      (__v16si)
   3123  1.1  mrg 					      _mm512_setzero_si512 (),
   3124  1.1  mrg 					      __A,
   3125  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3126  1.1  mrg }
   3127  1.1  mrg 
   3128  1.1  mrg #ifdef __OPTIMIZE__
   3129  1.1  mrg extern __inline __m512i
   3130  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3131  1.1  mrg _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
   3132  1.1  mrg {
   3133  1.1  mrg   return (__m512i)
   3134  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__A,
   3135  1.1  mrg 					      (__v16si)
   3136  1.1  mrg 					      _mm512_setzero_si512 (),
   3137  1.1  mrg 					      (__mmask16) -1,
   3138  1.1  mrg 					      __B);
   3139  1.1  mrg }
   3140  1.1  mrg 
   3141  1.1  mrg extern __inline __m512i
   3142  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3143  1.1  mrg _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
   3144  1.1  mrg 				__m256h __C, int __D)
   3145  1.1  mrg {
   3146  1.1  mrg   return (__m512i)
   3147  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__C,
   3148  1.1  mrg 					      (__v16si) __A,
   3149  1.1  mrg 					      __B,
   3150  1.1  mrg 					      __D);
   3151  1.1  mrg }
   3152  1.1  mrg 
   3153  1.1  mrg extern __inline __m512i
   3154  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3155  1.1  mrg _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
   3156  1.1  mrg {
   3157  1.1  mrg   return (__m512i)
   3158  1.1  mrg     __builtin_ia32_vcvttph2udq512_mask_round (__B,
   3159  1.1  mrg 					      (__v16si)
   3160  1.1  mrg 					      _mm512_setzero_si512 (),
   3161  1.1  mrg 					      __A,
   3162  1.1  mrg 					      __C);
   3163  1.1  mrg }
   3164  1.1  mrg 
   3165  1.1  mrg #else
   3166  1.1  mrg #define _mm512_cvtt_roundph_epu32(A, B)					\
   3167  1.1  mrg   ((__m512i)								\
   3168  1.1  mrg    __builtin_ia32_vcvttph2udq512_mask_round ((A),			\
   3169  1.1  mrg 					     (__v16si)			\
   3170  1.1  mrg 					     _mm512_setzero_si512 (),	\
   3171  1.1  mrg 					     (__mmask16)-1,		\
   3172  1.1  mrg 					     (B)))
   3173  1.1  mrg 
   3174  1.1  mrg #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D)		\
   3175  1.1  mrg   ((__m512i)							\
   3176  1.1  mrg    __builtin_ia32_vcvttph2udq512_mask_round ((C),		\
   3177  1.1  mrg 					     (__v16si)(A),	\
   3178  1.1  mrg 					     (B),		\
   3179  1.1  mrg 					     (D)))
   3180  1.1  mrg 
   3181  1.1  mrg #define _mm512_maskz_cvtt_roundph_epu32(A, B, C)			\
   3182  1.1  mrg   ((__m512i)								\
   3183  1.1  mrg    __builtin_ia32_vcvttph2udq512_mask_round ((B),			\
   3184  1.1  mrg 					     (__v16si)			\
   3185  1.1  mrg 					     _mm512_setzero_si512 (),	\
   3186  1.1  mrg 					     (A),			\
   3187  1.1  mrg 					     (C)))
   3188  1.1  mrg 
   3189  1.1  mrg #endif /* __OPTIMIZE__ */
   3190  1.1  mrg 
   3191  1.1  mrg /* Intrinsics vcvtdq2ph.  */
   3192  1.1  mrg extern __inline __m256h
   3193  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3194  1.1  mrg _mm512_cvtepi32_ph (__m512i __A)
   3195  1.1  mrg {
   3196  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
   3197  1.1  mrg 						 _mm256_setzero_ph (),
   3198  1.1  mrg 						 (__mmask16) -1,
   3199  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3200  1.1  mrg }
   3201  1.1  mrg 
   3202  1.1  mrg extern __inline __m256h
   3203  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3204  1.1  mrg _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
   3205  1.1  mrg {
   3206  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
   3207  1.1  mrg 						 __A,
   3208  1.1  mrg 						 __B,
   3209  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3210  1.1  mrg }
   3211  1.1  mrg 
   3212  1.1  mrg extern __inline __m256h
   3213  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3214  1.1  mrg _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
   3215  1.1  mrg {
   3216  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
   3217  1.1  mrg 						 _mm256_setzero_ph (),
   3218  1.1  mrg 						 __A,
   3219  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3220  1.1  mrg }
   3221  1.1  mrg 
   3222  1.1  mrg #ifdef __OPTIMIZE__
   3223  1.1  mrg extern __inline __m256h
   3224  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3225  1.1  mrg _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
   3226  1.1  mrg {
   3227  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
   3228  1.1  mrg 						 _mm256_setzero_ph (),
   3229  1.1  mrg 						 (__mmask16) -1,
   3230  1.1  mrg 						 __B);
   3231  1.1  mrg }
   3232  1.1  mrg 
   3233  1.1  mrg extern __inline __m256h
   3234  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3235  1.1  mrg _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
   3236  1.1  mrg {
   3237  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
   3238  1.1  mrg 						 __A,
   3239  1.1  mrg 						 __B,
   3240  1.1  mrg 						 __D);
   3241  1.1  mrg }
   3242  1.1  mrg 
   3243  1.1  mrg extern __inline __m256h
   3244  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3245  1.1  mrg _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
   3246  1.1  mrg {
   3247  1.1  mrg   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
   3248  1.1  mrg 						 _mm256_setzero_ph (),
   3249  1.1  mrg 						 __A,
   3250  1.1  mrg 						 __C);
   3251  1.1  mrg }
   3252  1.1  mrg 
   3253  1.1  mrg #else
   3254  1.1  mrg #define _mm512_cvt_roundepi32_ph(A, B)					\
   3255  1.1  mrg   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A),		\
   3256  1.1  mrg 					   _mm256_setzero_ph (),	\
   3257  1.1  mrg 					   (__mmask16)-1,		\
   3258  1.1  mrg 					   (B)))
   3259  1.1  mrg 
   3260  1.1  mrg #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D)		\
   3261  1.1  mrg   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C),	\
   3262  1.1  mrg 					   (A),			\
   3263  1.1  mrg 					   (B),			\
   3264  1.1  mrg 					   (D)))
   3265  1.1  mrg 
   3266  1.1  mrg #define _mm512_maskz_cvt_roundepi32_ph(A, B, C)				\
   3267  1.1  mrg   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B),		\
   3268  1.1  mrg 					   _mm256_setzero_ph (),	\
   3269  1.1  mrg 					   (A),				\
   3270  1.1  mrg 					   (C)))
   3271  1.1  mrg 
   3272  1.1  mrg #endif /* __OPTIMIZE__ */
   3273  1.1  mrg 
   3274  1.1  mrg /* Intrinsics vcvtudq2ph.  */
   3275  1.1  mrg extern __inline __m256h
   3276  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3277  1.1  mrg _mm512_cvtepu32_ph (__m512i __A)
   3278  1.1  mrg {
   3279  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
   3280  1.1  mrg 						  _mm256_setzero_ph (),
   3281  1.1  mrg 						  (__mmask16) -1,
   3282  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3283  1.1  mrg }
   3284  1.1  mrg 
   3285  1.1  mrg extern __inline __m256h
   3286  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3287  1.1  mrg _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
   3288  1.1  mrg {
   3289  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
   3290  1.1  mrg 						  __A,
   3291  1.1  mrg 						  __B,
   3292  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3293  1.1  mrg }
   3294  1.1  mrg 
   3295  1.1  mrg extern __inline __m256h
   3296  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3297  1.1  mrg _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
   3298  1.1  mrg {
   3299  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
   3300  1.1  mrg 						  _mm256_setzero_ph (),
   3301  1.1  mrg 						  __A,
   3302  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3303  1.1  mrg }
   3304  1.1  mrg 
   3305  1.1  mrg #ifdef __OPTIMIZE__
   3306  1.1  mrg extern __inline __m256h
   3307  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3308  1.1  mrg _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
   3309  1.1  mrg {
   3310  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
   3311  1.1  mrg 						  _mm256_setzero_ph (),
   3312  1.1  mrg 						  (__mmask16) -1,
   3313  1.1  mrg 						  __B);
   3314  1.1  mrg }
   3315  1.1  mrg 
   3316  1.1  mrg extern __inline __m256h
   3317  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3318  1.1  mrg _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
   3319  1.1  mrg {
   3320  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
   3321  1.1  mrg 						  __A,
   3322  1.1  mrg 						  __B,
   3323  1.1  mrg 						  __D);
   3324  1.1  mrg }
   3325  1.1  mrg 
   3326  1.1  mrg extern __inline __m256h
   3327  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3328  1.1  mrg _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
   3329  1.1  mrg {
   3330  1.1  mrg   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
   3331  1.1  mrg 						  _mm256_setzero_ph (),
   3332  1.1  mrg 						  __A,
   3333  1.1  mrg 						  __C);
   3334  1.1  mrg }
   3335  1.1  mrg 
   3336  1.1  mrg #else
   3337  1.1  mrg #define _mm512_cvt_roundepu32_ph(A, B)					\
   3338  1.1  mrg   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A),		\
   3339  1.1  mrg 					    _mm256_setzero_ph (),	\
   3340  1.1  mrg 					    (__mmask16)-1,		\
   3341  1.1  mrg 					    B))
   3342  1.1  mrg 
   3343  1.1  mrg #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D)	\
   3344  1.1  mrg   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C,	\
   3345  1.1  mrg 					    A,		\
   3346  1.1  mrg 					    B,		\
   3347  1.1  mrg 					    D))
   3348  1.1  mrg 
   3349  1.1  mrg #define _mm512_maskz_cvt_roundepu32_ph(A, B, C)				\
   3350  1.1  mrg   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B,			\
   3351  1.1  mrg 					    _mm256_setzero_ph (),	\
   3352  1.1  mrg 					    A,				\
   3353  1.1  mrg 					    C))
   3354  1.1  mrg 
   3355  1.1  mrg #endif /* __OPTIMIZE__ */
   3356  1.1  mrg 
   3357  1.1  mrg /* Intrinsics vcvtph2qq.  */
   3358  1.1  mrg extern __inline __m512i
   3359  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3360  1.1  mrg _mm512_cvtph_epi64 (__m128h __A)
   3361  1.1  mrg {
   3362  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__A,
   3363  1.1  mrg 						 _mm512_setzero_si512 (),
   3364  1.1  mrg 						 (__mmask8) -1,
   3365  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3366  1.1  mrg }
   3367  1.1  mrg 
   3368  1.1  mrg extern __inline __m512i
   3369  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3370  1.1  mrg _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
   3371  1.1  mrg {
   3372  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
   3373  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3374  1.1  mrg }
   3375  1.1  mrg 
   3376  1.1  mrg extern __inline __m512i
   3377  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3378  1.1  mrg _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
   3379  1.1  mrg {
   3380  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__B,
   3381  1.1  mrg 						 _mm512_setzero_si512 (),
   3382  1.1  mrg 						 __A,
   3383  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3384  1.1  mrg }
   3385  1.1  mrg 
   3386  1.1  mrg #ifdef __OPTIMIZE__
   3387  1.1  mrg extern __inline __m512i
   3388  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3389  1.1  mrg _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
   3390  1.1  mrg {
   3391  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__A,
   3392  1.1  mrg 						 _mm512_setzero_si512 (),
   3393  1.1  mrg 						 (__mmask8) -1,
   3394  1.1  mrg 						 __B);
   3395  1.1  mrg }
   3396  1.1  mrg 
   3397  1.1  mrg extern __inline __m512i
   3398  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3399  1.1  mrg _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
   3400  1.1  mrg {
   3401  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
   3402  1.1  mrg }
   3403  1.1  mrg 
   3404  1.1  mrg extern __inline __m512i
   3405  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3406  1.1  mrg _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
   3407  1.1  mrg {
   3408  1.1  mrg   return __builtin_ia32_vcvtph2qq512_mask_round (__B,
   3409  1.1  mrg 						 _mm512_setzero_si512 (),
   3410  1.1  mrg 						 __A,
   3411  1.1  mrg 						 __C);
   3412  1.1  mrg }
   3413  1.1  mrg 
   3414  1.1  mrg #else
   3415  1.1  mrg #define _mm512_cvt_roundph_epi64(A, B)					\
   3416  1.1  mrg   (__builtin_ia32_vcvtph2qq512_mask_round ((A),				\
   3417  1.1  mrg 					   _mm512_setzero_si512 (),	\
   3418  1.1  mrg 					   (__mmask8)-1,		\
   3419  1.1  mrg 					   (B)))
   3420  1.1  mrg 
   3421  1.1  mrg #define _mm512_mask_cvt_roundph_epi64(A, B, C, D)		\
   3422  1.1  mrg   (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
   3423  1.1  mrg 
   3424  1.1  mrg #define _mm512_maskz_cvt_roundph_epi64(A, B, C)				\
   3425  1.1  mrg   (__builtin_ia32_vcvtph2qq512_mask_round ((B),				\
   3426  1.1  mrg 					   _mm512_setzero_si512 (),	\
   3427  1.1  mrg 					   (A),				\
   3428  1.1  mrg 					   (C)))
   3429  1.1  mrg 
   3430  1.1  mrg #endif /* __OPTIMIZE__ */
   3431  1.1  mrg 
   3432  1.1  mrg /* Intrinsics vcvtph2uqq.  */
   3433  1.1  mrg extern __inline __m512i
   3434  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3435  1.1  mrg _mm512_cvtph_epu64 (__m128h __A)
   3436  1.1  mrg {
   3437  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
   3438  1.1  mrg 						  _mm512_setzero_si512 (),
   3439  1.1  mrg 						  (__mmask8) -1,
   3440  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3441  1.1  mrg }
   3442  1.1  mrg 
   3443  1.1  mrg extern __inline __m512i
   3444  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3445  1.1  mrg _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
   3446  1.1  mrg {
   3447  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
   3448  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3449  1.1  mrg }
   3450  1.1  mrg 
   3451  1.1  mrg extern __inline __m512i
   3452  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3453  1.1  mrg _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
   3454  1.1  mrg {
   3455  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
   3456  1.1  mrg 						  _mm512_setzero_si512 (),
   3457  1.1  mrg 						  __A,
   3458  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3459  1.1  mrg }
   3460  1.1  mrg 
   3461  1.1  mrg #ifdef __OPTIMIZE__
   3462  1.1  mrg 
   3463  1.1  mrg extern __inline __m512i
   3464  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3465  1.1  mrg _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
   3466  1.1  mrg {
   3467  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
   3468  1.1  mrg 						  _mm512_setzero_si512 (),
   3469  1.1  mrg 						  (__mmask8) -1,
   3470  1.1  mrg 						  __B);
   3471  1.1  mrg }
   3472  1.1  mrg 
   3473  1.1  mrg extern __inline __m512i
   3474  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3475  1.1  mrg _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
   3476  1.1  mrg {
   3477  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
   3478  1.1  mrg }
   3479  1.1  mrg 
   3480  1.1  mrg extern __inline __m512i
   3481  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3482  1.1  mrg _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
   3483  1.1  mrg {
   3484  1.1  mrg   return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
   3485  1.1  mrg 						  _mm512_setzero_si512 (),
   3486  1.1  mrg 						  __A,
   3487  1.1  mrg 						  __C);
   3488  1.1  mrg }
   3489  1.1  mrg 
   3490  1.1  mrg #else
   3491  1.1  mrg #define _mm512_cvt_roundph_epu64(A, B)					\
   3492  1.1  mrg   (__builtin_ia32_vcvtph2uqq512_mask_round ((A),			\
   3493  1.1  mrg 					    _mm512_setzero_si512 (),	\
   3494  1.1  mrg 					    (__mmask8)-1,		\
   3495  1.1  mrg 					    (B)))
   3496  1.1  mrg 
   3497  1.1  mrg #define _mm512_mask_cvt_roundph_epu64(A, B, C, D)			\
   3498  1.1  mrg   (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
   3499  1.1  mrg 
   3500  1.1  mrg #define _mm512_maskz_cvt_roundph_epu64(A, B, C)				\
   3501  1.1  mrg   (__builtin_ia32_vcvtph2uqq512_mask_round ((B),			\
   3502  1.1  mrg 					    _mm512_setzero_si512 (),	\
   3503  1.1  mrg 					    (A),			\
   3504  1.1  mrg 					    (C)))
   3505  1.1  mrg 
   3506  1.1  mrg #endif /* __OPTIMIZE__ */
   3507  1.1  mrg 
   3508  1.1  mrg /* Intrinsics vcvttph2qq.  */
   3509  1.1  mrg extern __inline __m512i
   3510  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3511  1.1  mrg _mm512_cvttph_epi64 (__m128h __A)
   3512  1.1  mrg {
   3513  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__A,
   3514  1.1  mrg 						  _mm512_setzero_si512 (),
   3515  1.1  mrg 						  (__mmask8) -1,
   3516  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3517  1.1  mrg }
   3518  1.1  mrg 
   3519  1.1  mrg extern __inline __m512i
   3520  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3521  1.1  mrg _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
   3522  1.1  mrg {
   3523  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
   3524  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3525  1.1  mrg }
   3526  1.1  mrg 
   3527  1.1  mrg extern __inline __m512i
   3528  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3529  1.1  mrg _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
   3530  1.1  mrg {
   3531  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__B,
   3532  1.1  mrg 						  _mm512_setzero_si512 (),
   3533  1.1  mrg 						  __A,
   3534  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3535  1.1  mrg }
   3536  1.1  mrg 
   3537  1.1  mrg #ifdef __OPTIMIZE__
   3538  1.1  mrg extern __inline __m512i
   3539  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3540  1.1  mrg _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
   3541  1.1  mrg {
   3542  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__A,
   3543  1.1  mrg 						  _mm512_setzero_si512 (),
   3544  1.1  mrg 						  (__mmask8) -1,
   3545  1.1  mrg 						  __B);
   3546  1.1  mrg }
   3547  1.1  mrg 
   3548  1.1  mrg extern __inline __m512i
   3549  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3550  1.1  mrg _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
   3551  1.1  mrg {
   3552  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
   3553  1.1  mrg }
   3554  1.1  mrg 
   3555  1.1  mrg extern __inline __m512i
   3556  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3557  1.1  mrg _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
   3558  1.1  mrg {
   3559  1.1  mrg   return __builtin_ia32_vcvttph2qq512_mask_round (__B,
   3560  1.1  mrg 						  _mm512_setzero_si512 (),
   3561  1.1  mrg 						  __A,
   3562  1.1  mrg 						  __C);
   3563  1.1  mrg }
   3564  1.1  mrg 
   3565  1.1  mrg #else
   3566  1.1  mrg #define _mm512_cvtt_roundph_epi64(A, B)					\
   3567  1.1  mrg   (__builtin_ia32_vcvttph2qq512_mask_round ((A),			\
   3568  1.1  mrg 					    _mm512_setzero_si512 (),	\
   3569  1.1  mrg 					    (__mmask8)-1,		\
   3570  1.1  mrg 					    (B)))
   3571  1.1  mrg 
   3572  1.1  mrg #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D)			\
   3573  1.1  mrg   __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
   3574  1.1  mrg 
   3575  1.1  mrg #define _mm512_maskz_cvtt_roundph_epi64(A, B, C)			\
   3576  1.1  mrg   (__builtin_ia32_vcvttph2qq512_mask_round ((B),			\
   3577  1.1  mrg 					    _mm512_setzero_si512 (),	\
   3578  1.1  mrg 					    (A),			\
   3579  1.1  mrg 					    (C)))
   3580  1.1  mrg 
   3581  1.1  mrg #endif /* __OPTIMIZE__ */
   3582  1.1  mrg 
   3583  1.1  mrg /* Intrinsics vcvttph2uqq.  */
   3584  1.1  mrg extern __inline __m512i
   3585  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3586  1.1  mrg _mm512_cvttph_epu64 (__m128h __A)
   3587  1.1  mrg {
   3588  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
   3589  1.1  mrg 						   _mm512_setzero_si512 (),
   3590  1.1  mrg 						   (__mmask8) -1,
   3591  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   3592  1.1  mrg }
   3593  1.1  mrg 
   3594  1.1  mrg extern __inline __m512i
   3595  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3596  1.1  mrg _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
   3597  1.1  mrg {
   3598  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
   3599  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   3600  1.1  mrg }
   3601  1.1  mrg 
   3602  1.1  mrg extern __inline __m512i
   3603  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3604  1.1  mrg _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
   3605  1.1  mrg {
   3606  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
   3607  1.1  mrg 						   _mm512_setzero_si512 (),
   3608  1.1  mrg 						   __A,
   3609  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   3610  1.1  mrg }
   3611  1.1  mrg 
   3612  1.1  mrg #ifdef __OPTIMIZE__
   3613  1.1  mrg extern __inline __m512i
   3614  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3615  1.1  mrg _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
   3616  1.1  mrg {
   3617  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
   3618  1.1  mrg 						   _mm512_setzero_si512 (),
   3619  1.1  mrg 						   (__mmask8) -1,
   3620  1.1  mrg 						   __B);
   3621  1.1  mrg }
   3622  1.1  mrg 
   3623  1.1  mrg extern __inline __m512i
   3624  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3625  1.1  mrg _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
   3626  1.1  mrg {
   3627  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
   3628  1.1  mrg }
   3629  1.1  mrg 
   3630  1.1  mrg extern __inline __m512i
   3631  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3632  1.1  mrg _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
   3633  1.1  mrg {
   3634  1.1  mrg   return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
   3635  1.1  mrg 						   _mm512_setzero_si512 (),
   3636  1.1  mrg 						   __A,
   3637  1.1  mrg 						   __C);
   3638  1.1  mrg }
   3639  1.1  mrg 
   3640  1.1  mrg #else
   3641  1.1  mrg #define _mm512_cvtt_roundph_epu64(A, B)					\
   3642  1.1  mrg   (__builtin_ia32_vcvttph2uqq512_mask_round ((A),			\
   3643  1.1  mrg 					     _mm512_setzero_si512 (),	\
   3644  1.1  mrg 					     (__mmask8)-1,		\
   3645  1.1  mrg 					     (B)))
   3646  1.1  mrg 
   3647  1.1  mrg #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D)			\
   3648  1.1  mrg   __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
   3649  1.1  mrg 
   3650  1.1  mrg #define _mm512_maskz_cvtt_roundph_epu64(A, B, C)			\
   3651  1.1  mrg   (__builtin_ia32_vcvttph2uqq512_mask_round ((B),			\
   3652  1.1  mrg 					     _mm512_setzero_si512 (),	\
   3653  1.1  mrg 					     (A),			\
   3654  1.1  mrg 					     (C)))
   3655  1.1  mrg 
   3656  1.1  mrg #endif /* __OPTIMIZE__ */
   3657  1.1  mrg 
   3658  1.1  mrg /* Intrinsics vcvtqq2ph.  */
   3659  1.1  mrg extern __inline __m128h
   3660  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3661  1.1  mrg _mm512_cvtepi64_ph (__m512i __A)
   3662  1.1  mrg {
   3663  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
   3664  1.1  mrg 						 _mm_setzero_ph (),
   3665  1.1  mrg 						 (__mmask8) -1,
   3666  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3667  1.1  mrg }
   3668  1.1  mrg 
   3669  1.1  mrg extern __inline __m128h
   3670  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3671  1.1  mrg _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
   3672  1.1  mrg {
   3673  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
   3674  1.1  mrg 						 __A,
   3675  1.1  mrg 						 __B,
   3676  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3677  1.1  mrg }
   3678  1.1  mrg 
   3679  1.1  mrg extern __inline __m128h
   3680  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3681  1.1  mrg _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
   3682  1.1  mrg {
   3683  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
   3684  1.1  mrg 						 _mm_setzero_ph (),
   3685  1.1  mrg 						 __A,
   3686  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   3687  1.1  mrg }
   3688  1.1  mrg 
   3689  1.1  mrg #ifdef __OPTIMIZE__
   3690  1.1  mrg extern __inline __m128h
   3691  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3692  1.1  mrg _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
   3693  1.1  mrg {
   3694  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
   3695  1.1  mrg 						 _mm_setzero_ph (),
   3696  1.1  mrg 						 (__mmask8) -1,
   3697  1.1  mrg 						 __B);
   3698  1.1  mrg }
   3699  1.1  mrg 
   3700  1.1  mrg extern __inline __m128h
   3701  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3702  1.1  mrg _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
   3703  1.1  mrg {
   3704  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
   3705  1.1  mrg 						 __A,
   3706  1.1  mrg 						 __B,
   3707  1.1  mrg 						 __D);
   3708  1.1  mrg }
   3709  1.1  mrg 
   3710  1.1  mrg extern __inline __m128h
   3711  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3712  1.1  mrg _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
   3713  1.1  mrg {
   3714  1.1  mrg   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
   3715  1.1  mrg 						 _mm_setzero_ph (),
   3716  1.1  mrg 						 __A,
   3717  1.1  mrg 						 __C);
   3718  1.1  mrg }
   3719  1.1  mrg 
   3720  1.1  mrg #else
   3721  1.1  mrg #define _mm512_cvt_roundepi64_ph(A, B)				\
   3722  1.1  mrg   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A),		\
   3723  1.1  mrg 					   _mm_setzero_ph (),	\
   3724  1.1  mrg 					   (__mmask8)-1,	\
   3725  1.1  mrg 					   (B)))
   3726  1.1  mrg 
   3727  1.1  mrg #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D)			\
   3728  1.1  mrg   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
   3729  1.1  mrg 
   3730  1.1  mrg #define _mm512_maskz_cvt_roundepi64_ph(A, B, C)			\
   3731  1.1  mrg   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B),		\
   3732  1.1  mrg 					   _mm_setzero_ph (),	\
   3733  1.1  mrg 					   (A),			\
   3734  1.1  mrg 					   (C)))
   3735  1.1  mrg 
   3736  1.1  mrg #endif /* __OPTIMIZE__ */
   3737  1.1  mrg 
   3738  1.1  mrg /* Intrinsics vcvtuqq2ph.  */
   3739  1.1  mrg extern __inline __m128h
   3740  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3741  1.1  mrg _mm512_cvtepu64_ph (__m512i __A)
   3742  1.1  mrg {
   3743  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
   3744  1.1  mrg 						  _mm_setzero_ph (),
   3745  1.1  mrg 						  (__mmask8) -1,
   3746  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3747  1.1  mrg }
   3748  1.1  mrg 
   3749  1.1  mrg extern __inline __m128h
   3750  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3751  1.1  mrg _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
   3752  1.1  mrg {
   3753  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
   3754  1.1  mrg 						  __A,
   3755  1.1  mrg 						  __B,
   3756  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3757  1.1  mrg }
   3758  1.1  mrg 
   3759  1.1  mrg extern __inline __m128h
   3760  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3761  1.1  mrg _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
   3762  1.1  mrg {
   3763  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
   3764  1.1  mrg 						  _mm_setzero_ph (),
   3765  1.1  mrg 						  __A,
   3766  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   3767  1.1  mrg }
   3768  1.1  mrg 
   3769  1.1  mrg #ifdef __OPTIMIZE__
   3770  1.1  mrg extern __inline __m128h
   3771  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3772  1.1  mrg _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
   3773  1.1  mrg {
   3774  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
   3775  1.1  mrg 						  _mm_setzero_ph (),
   3776  1.1  mrg 						  (__mmask8) -1,
   3777  1.1  mrg 						  __B);
   3778  1.1  mrg }
   3779  1.1  mrg 
   3780  1.1  mrg extern __inline __m128h
   3781  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3782  1.1  mrg _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
   3783  1.1  mrg {
   3784  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
   3785  1.1  mrg 						  __A,
   3786  1.1  mrg 						  __B,
   3787  1.1  mrg 						  __D);
   3788  1.1  mrg }
   3789  1.1  mrg 
   3790  1.1  mrg extern __inline __m128h
   3791  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3792  1.1  mrg _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
   3793  1.1  mrg {
   3794  1.1  mrg   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
   3795  1.1  mrg 						  _mm_setzero_ph (),
   3796  1.1  mrg 						  __A,
   3797  1.1  mrg 						  __C);
   3798  1.1  mrg }
   3799  1.1  mrg 
   3800  1.1  mrg #else
   3801  1.1  mrg #define _mm512_cvt_roundepu64_ph(A, B)				\
   3802  1.1  mrg   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A),	\
   3803  1.1  mrg 					    _mm_setzero_ph (),	\
   3804  1.1  mrg 					    (__mmask8)-1,	\
   3805  1.1  mrg 					    (B)))
   3806  1.1  mrg 
   3807  1.1  mrg #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D)			\
   3808  1.1  mrg   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
   3809  1.1  mrg 
   3810  1.1  mrg #define _mm512_maskz_cvt_roundepu64_ph(A, B, C)			\
   3811  1.1  mrg   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B),	\
   3812  1.1  mrg 					    _mm_setzero_ph (),	\
   3813  1.1  mrg 					    (A),		\
   3814  1.1  mrg 					    (C)))
   3815  1.1  mrg 
   3816  1.1  mrg #endif /* __OPTIMIZE__ */
   3817  1.1  mrg 
   3818  1.1  mrg /* Intrinsics vcvtph2w.  */
   3819  1.1  mrg extern __inline __m512i
   3820  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3821  1.1  mrg _mm512_cvtph_epi16 (__m512h __A)
   3822  1.1  mrg {
   3823  1.1  mrg   return (__m512i)
   3824  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__A,
   3825  1.1  mrg 					      (__v32hi)
   3826  1.1  mrg 					      _mm512_setzero_si512 (),
   3827  1.1  mrg 					      (__mmask32) -1,
   3828  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3829  1.1  mrg }
   3830  1.1  mrg 
   3831  1.1  mrg extern __inline __m512i
   3832  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3833  1.1  mrg _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
   3834  1.1  mrg {
   3835  1.1  mrg   return (__m512i)
   3836  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__C,
   3837  1.1  mrg 					      (__v32hi) __A,
   3838  1.1  mrg 					      __B,
   3839  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3840  1.1  mrg }
   3841  1.1  mrg 
   3842  1.1  mrg extern __inline __m512i
   3843  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3844  1.1  mrg _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
   3845  1.1  mrg {
   3846  1.1  mrg   return (__m512i)
   3847  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__B,
   3848  1.1  mrg 					      (__v32hi)
   3849  1.1  mrg 					      _mm512_setzero_si512 (),
   3850  1.1  mrg 					      __A,
   3851  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   3852  1.1  mrg }
   3853  1.1  mrg 
   3854  1.1  mrg #ifdef __OPTIMIZE__
   3855  1.1  mrg extern __inline __m512i
   3856  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3857  1.1  mrg _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
   3858  1.1  mrg {
   3859  1.1  mrg   return (__m512i)
   3860  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__A,
   3861  1.1  mrg 					      (__v32hi)
   3862  1.1  mrg 					      _mm512_setzero_si512 (),
   3863  1.1  mrg 					      (__mmask32) -1,
   3864  1.1  mrg 					      __B);
   3865  1.1  mrg }
   3866  1.1  mrg 
   3867  1.1  mrg extern __inline __m512i
   3868  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3869  1.1  mrg _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
   3870  1.1  mrg {
   3871  1.1  mrg   return (__m512i)
   3872  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__C,
   3873  1.1  mrg 					      (__v32hi) __A,
   3874  1.1  mrg 					      __B,
   3875  1.1  mrg 					      __D);
   3876  1.1  mrg }
   3877  1.1  mrg 
   3878  1.1  mrg extern __inline __m512i
   3879  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3880  1.1  mrg _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
   3881  1.1  mrg {
   3882  1.1  mrg   return (__m512i)
   3883  1.1  mrg     __builtin_ia32_vcvtph2w512_mask_round (__B,
   3884  1.1  mrg 					      (__v32hi)
   3885  1.1  mrg 					      _mm512_setzero_si512 (),
   3886  1.1  mrg 					      __A,
   3887  1.1  mrg 					      __C);
   3888  1.1  mrg }
   3889  1.1  mrg 
   3890  1.1  mrg #else
   3891  1.1  mrg #define _mm512_cvt_roundph_epi16(A, B)					\
   3892  1.1  mrg   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A),		\
   3893  1.1  mrg 						      (__v32hi)		\
   3894  1.1  mrg 						      _mm512_setzero_si512 (), \
   3895  1.1  mrg 						      (__mmask32)-1,	\
   3896  1.1  mrg 						      (B)))
   3897  1.1  mrg 
   3898  1.1  mrg #define _mm512_mask_cvt_roundph_epi16(A, B, C, D)			\
   3899  1.1  mrg   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C),		\
   3900  1.1  mrg 						      (__v32hi)(A),	\
   3901  1.1  mrg 						      (B),		\
   3902  1.1  mrg 						      (D)))
   3903  1.1  mrg 
   3904  1.1  mrg #define _mm512_maskz_cvt_roundph_epi16(A, B, C)				\
   3905  1.1  mrg   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B),		\
   3906  1.1  mrg 						      (__v32hi)		\
   3907  1.1  mrg 						      _mm512_setzero_si512 (), \
   3908  1.1  mrg 						      (A),		\
   3909  1.1  mrg 						      (C)))
   3910  1.1  mrg 
   3911  1.1  mrg #endif /* __OPTIMIZE__ */
   3912  1.1  mrg 
   3913  1.1  mrg /* Intrinsics vcvtph2uw.  */
   3914  1.1  mrg extern __inline __m512i
   3915  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3916  1.1  mrg _mm512_cvtph_epu16 (__m512h __A)
   3917  1.1  mrg {
   3918  1.1  mrg   return (__m512i)
   3919  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__A,
   3920  1.1  mrg 					       (__v32hi)
   3921  1.1  mrg 					       _mm512_setzero_si512 (),
   3922  1.1  mrg 					       (__mmask32) -1,
   3923  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   3924  1.1  mrg }
   3925  1.1  mrg 
   3926  1.1  mrg extern __inline __m512i
   3927  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3928  1.1  mrg _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
   3929  1.1  mrg {
   3930  1.1  mrg   return (__m512i)
   3931  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
   3932  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   3933  1.1  mrg }
   3934  1.1  mrg 
   3935  1.1  mrg extern __inline __m512i
   3936  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3937  1.1  mrg _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
   3938  1.1  mrg {
   3939  1.1  mrg   return (__m512i)
   3940  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__B,
   3941  1.1  mrg 					       (__v32hi)
   3942  1.1  mrg 					       _mm512_setzero_si512 (),
   3943  1.1  mrg 					       __A,
   3944  1.1  mrg 					       _MM_FROUND_CUR_DIRECTION);
   3945  1.1  mrg }
   3946  1.1  mrg 
   3947  1.1  mrg #ifdef __OPTIMIZE__
   3948  1.1  mrg extern __inline __m512i
   3949  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3950  1.1  mrg _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
   3951  1.1  mrg {
   3952  1.1  mrg   return (__m512i)
   3953  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__A,
   3954  1.1  mrg 					       (__v32hi)
   3955  1.1  mrg 					       _mm512_setzero_si512 (),
   3956  1.1  mrg 					       (__mmask32) -1,
   3957  1.1  mrg 					       __B);
   3958  1.1  mrg }
   3959  1.1  mrg 
   3960  1.1  mrg extern __inline __m512i
   3961  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3962  1.1  mrg _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
   3963  1.1  mrg {
   3964  1.1  mrg   return (__m512i)
   3965  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
   3966  1.1  mrg }
   3967  1.1  mrg 
   3968  1.1  mrg extern __inline __m512i
   3969  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   3970  1.1  mrg _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
   3971  1.1  mrg {
   3972  1.1  mrg   return (__m512i)
   3973  1.1  mrg     __builtin_ia32_vcvtph2uw512_mask_round (__B,
   3974  1.1  mrg 					       (__v32hi)
   3975  1.1  mrg 					       _mm512_setzero_si512 (),
   3976  1.1  mrg 					       __A,
   3977  1.1  mrg 					       __C);
   3978  1.1  mrg }
   3979  1.1  mrg 
   3980  1.1  mrg #else
   3981  1.1  mrg #define _mm512_cvt_roundph_epu16(A, B)					\
   3982  1.1  mrg   ((__m512i)								\
   3983  1.1  mrg    __builtin_ia32_vcvtph2uw512_mask_round ((A),			\
   3984  1.1  mrg 					      (__v32hi)			\
   3985  1.1  mrg 					      _mm512_setzero_si512 (),	\
   3986  1.1  mrg 					      (__mmask32)-1, (B)))
   3987  1.1  mrg 
   3988  1.1  mrg #define _mm512_mask_cvt_roundph_epu16(A, B, C, D)			\
   3989  1.1  mrg   ((__m512i)								\
   3990  1.1  mrg    __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
   3991  1.1  mrg 
   3992  1.1  mrg #define _mm512_maskz_cvt_roundph_epu16(A, B, C)				\
   3993  1.1  mrg   ((__m512i)								\
   3994  1.1  mrg    __builtin_ia32_vcvtph2uw512_mask_round ((B),			\
   3995  1.1  mrg 					      (__v32hi)			\
   3996  1.1  mrg 					      _mm512_setzero_si512 (),	\
   3997  1.1  mrg 					      (A),			\
   3998  1.1  mrg 					      (C)))
   3999  1.1  mrg 
   4000  1.1  mrg #endif /* __OPTIMIZE__ */
   4001  1.1  mrg 
   4002  1.1  mrg /* Intrinsics vcvttph2w.  */
   4003  1.1  mrg extern __inline __m512i
   4004  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4005  1.1  mrg _mm512_cvttph_epi16 (__m512h __A)
   4006  1.1  mrg {
   4007  1.1  mrg   return (__m512i)
   4008  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__A,
   4009  1.1  mrg 					    (__v32hi)
   4010  1.1  mrg 					    _mm512_setzero_si512 (),
   4011  1.1  mrg 					    (__mmask32) -1,
   4012  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   4013  1.1  mrg }
   4014  1.1  mrg 
   4015  1.1  mrg extern __inline __m512i
   4016  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4017  1.1  mrg _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
   4018  1.1  mrg {
   4019  1.1  mrg   return (__m512i)
   4020  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__C,
   4021  1.1  mrg 					    (__v32hi) __A,
   4022  1.1  mrg 					    __B,
   4023  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   4024  1.1  mrg }
   4025  1.1  mrg 
   4026  1.1  mrg extern __inline __m512i
   4027  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4028  1.1  mrg _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
   4029  1.1  mrg {
   4030  1.1  mrg   return (__m512i)
   4031  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__B,
   4032  1.1  mrg 					    (__v32hi)
   4033  1.1  mrg 					    _mm512_setzero_si512 (),
   4034  1.1  mrg 					    __A,
   4035  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   4036  1.1  mrg }
   4037  1.1  mrg 
   4038  1.1  mrg #ifdef __OPTIMIZE__
   4039  1.1  mrg extern __inline __m512i
   4040  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4041  1.1  mrg _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
   4042  1.1  mrg {
   4043  1.1  mrg   return (__m512i)
   4044  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__A,
   4045  1.1  mrg 					    (__v32hi)
   4046  1.1  mrg 					    _mm512_setzero_si512 (),
   4047  1.1  mrg 					    (__mmask32) -1,
   4048  1.1  mrg 					    __B);
   4049  1.1  mrg }
   4050  1.1  mrg 
   4051  1.1  mrg extern __inline __m512i
   4052  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4053  1.1  mrg _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
   4054  1.1  mrg 				__m512h __C, int __D)
   4055  1.1  mrg {
   4056  1.1  mrg   return (__m512i)
   4057  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__C,
   4058  1.1  mrg 					    (__v32hi) __A,
   4059  1.1  mrg 					    __B,
   4060  1.1  mrg 					    __D);
   4061  1.1  mrg }
   4062  1.1  mrg 
   4063  1.1  mrg extern __inline __m512i
   4064  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4065  1.1  mrg _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
   4066  1.1  mrg {
   4067  1.1  mrg   return (__m512i)
   4068  1.1  mrg     __builtin_ia32_vcvttph2w512_mask_round (__B,
   4069  1.1  mrg 					    (__v32hi)
   4070  1.1  mrg 					    _mm512_setzero_si512 (),
   4071  1.1  mrg 					    __A,
   4072  1.1  mrg 					    __C);
   4073  1.1  mrg }
   4074  1.1  mrg 
   4075  1.1  mrg #else
   4076  1.1  mrg #define _mm512_cvtt_roundph_epi16(A, B)				    \
   4077  1.1  mrg   ((__m512i)							    \
   4078  1.1  mrg    __builtin_ia32_vcvttph2w512_mask_round ((A),			    \
   4079  1.1  mrg 					   (__v32hi)		    \
   4080  1.1  mrg 					   _mm512_setzero_si512 (), \
   4081  1.1  mrg 					   (__mmask32)-1,	    \
   4082  1.1  mrg 					   (B)))
   4083  1.1  mrg 
   4084  1.1  mrg #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D)		\
   4085  1.1  mrg   ((__m512i)							\
   4086  1.1  mrg    __builtin_ia32_vcvttph2w512_mask_round ((C),			\
   4087  1.1  mrg 					   (__v32hi)(A),	\
   4088  1.1  mrg 					   (B),			\
   4089  1.1  mrg 					   (D)))
   4090  1.1  mrg 
   4091  1.1  mrg #define _mm512_maskz_cvtt_roundph_epi16(A, B, C)		    \
   4092  1.1  mrg   ((__m512i)							    \
   4093  1.1  mrg    __builtin_ia32_vcvttph2w512_mask_round ((B),			    \
   4094  1.1  mrg 					   (__v32hi)		    \
   4095  1.1  mrg 					   _mm512_setzero_si512 (), \
   4096  1.1  mrg 					   (A),			    \
   4097  1.1  mrg 					   (C)))
   4098  1.1  mrg 
   4099  1.1  mrg #endif /* __OPTIMIZE__ */
   4100  1.1  mrg 
   4101  1.1  mrg /* Intrinsics vcvttph2uw.  */
   4102  1.1  mrg extern __inline __m512i
   4103  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4104  1.1  mrg _mm512_cvttph_epu16 (__m512h __A)
   4105  1.1  mrg {
   4106  1.1  mrg   return (__m512i)
   4107  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__A,
   4108  1.1  mrg 					     (__v32hi)
   4109  1.1  mrg 					     _mm512_setzero_si512 (),
   4110  1.1  mrg 					     (__mmask32) -1,
   4111  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   4112  1.1  mrg }
   4113  1.1  mrg 
   4114  1.1  mrg extern __inline __m512i
   4115  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4116  1.1  mrg _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
   4117  1.1  mrg {
   4118  1.1  mrg   return (__m512i)
   4119  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__C,
   4120  1.1  mrg 					     (__v32hi) __A,
   4121  1.1  mrg 					     __B,
   4122  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   4123  1.1  mrg }
   4124  1.1  mrg 
   4125  1.1  mrg extern __inline __m512i
   4126  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4127  1.1  mrg _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
   4128  1.1  mrg {
   4129  1.1  mrg   return (__m512i)
   4130  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__B,
   4131  1.1  mrg 					     (__v32hi)
   4132  1.1  mrg 					     _mm512_setzero_si512 (),
   4133  1.1  mrg 					     __A,
   4134  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   4135  1.1  mrg }
   4136  1.1  mrg 
   4137  1.1  mrg #ifdef __OPTIMIZE__
   4138  1.1  mrg extern __inline __m512i
   4139  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4140  1.1  mrg _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
   4141  1.1  mrg {
   4142  1.1  mrg   return (__m512i)
   4143  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__A,
   4144  1.1  mrg 					     (__v32hi)
   4145  1.1  mrg 					     _mm512_setzero_si512 (),
   4146  1.1  mrg 					     (__mmask32) -1,
   4147  1.1  mrg 					     __B);
   4148  1.1  mrg }
   4149  1.1  mrg 
   4150  1.1  mrg extern __inline __m512i
   4151  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4152  1.1  mrg _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
   4153  1.1  mrg 				__m512h __C, int __D)
   4154  1.1  mrg {
   4155  1.1  mrg   return (__m512i)
   4156  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__C,
   4157  1.1  mrg 					     (__v32hi) __A,
   4158  1.1  mrg 					     __B,
   4159  1.1  mrg 					     __D);
   4160  1.1  mrg }
   4161  1.1  mrg 
   4162  1.1  mrg extern __inline __m512i
   4163  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4164  1.1  mrg _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
   4165  1.1  mrg {
   4166  1.1  mrg   return (__m512i)
   4167  1.1  mrg     __builtin_ia32_vcvttph2uw512_mask_round (__B,
   4168  1.1  mrg 					     (__v32hi)
   4169  1.1  mrg 					     _mm512_setzero_si512 (),
   4170  1.1  mrg 					     __A,
   4171  1.1  mrg 					     __C);
   4172  1.1  mrg }
   4173  1.1  mrg 
   4174  1.1  mrg #else
   4175  1.1  mrg #define _mm512_cvtt_roundph_epu16(A, B)				     \
   4176  1.1  mrg   ((__m512i)							     \
   4177  1.1  mrg    __builtin_ia32_vcvttph2uw512_mask_round ((A),		     \
   4178  1.1  mrg 					    (__v32hi)		     \
   4179  1.1  mrg 					    _mm512_setzero_si512 (), \
   4180  1.1  mrg 					    (__mmask32)-1,	     \
   4181  1.1  mrg 					    (B)))
   4182  1.1  mrg 
   4183  1.1  mrg #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D)		\
   4184  1.1  mrg   ((__m512i)							\
   4185  1.1  mrg    __builtin_ia32_vcvttph2uw512_mask_round ((C),		\
   4186  1.1  mrg 					    (__v32hi)(A),	\
   4187  1.1  mrg 					    (B),		\
   4188  1.1  mrg 					    (D)))
   4189  1.1  mrg 
   4190  1.1  mrg #define _mm512_maskz_cvtt_roundph_epu16(A, B, C)		     \
   4191  1.1  mrg   ((__m512i)							     \
   4192  1.1  mrg    __builtin_ia32_vcvttph2uw512_mask_round ((B),		     \
   4193  1.1  mrg 					    (__v32hi)		     \
   4194  1.1  mrg 					    _mm512_setzero_si512 (), \
   4195  1.1  mrg 					    (A),		     \
   4196  1.1  mrg 					    (C)))
   4197  1.1  mrg 
   4198  1.1  mrg #endif /* __OPTIMIZE__ */
   4199  1.1  mrg 
   4200  1.1  mrg /* Intrinsics vcvtw2ph.  */
   4201  1.1  mrg extern __inline __m512h
   4202  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4203  1.1  mrg _mm512_cvtepi16_ph (__m512i __A)
   4204  1.1  mrg {
   4205  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
   4206  1.1  mrg 						_mm512_setzero_ph (),
   4207  1.1  mrg 						(__mmask32) -1,
   4208  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   4209  1.1  mrg }
   4210  1.1  mrg 
   4211  1.1  mrg extern __inline __m512h
   4212  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4213  1.1  mrg _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
   4214  1.1  mrg {
   4215  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
   4216  1.1  mrg 						__A,
   4217  1.1  mrg 						__B,
   4218  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   4219  1.1  mrg }
   4220  1.1  mrg 
   4221  1.1  mrg extern __inline __m512h
   4222  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4223  1.1  mrg _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
   4224  1.1  mrg {
   4225  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
   4226  1.1  mrg 						_mm512_setzero_ph (),
   4227  1.1  mrg 						__A,
   4228  1.1  mrg 						_MM_FROUND_CUR_DIRECTION);
   4229  1.1  mrg }
   4230  1.1  mrg 
   4231  1.1  mrg #ifdef __OPTIMIZE__
   4232  1.1  mrg extern __inline __m512h
   4233  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4234  1.1  mrg _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
   4235  1.1  mrg {
   4236  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
   4237  1.1  mrg 						_mm512_setzero_ph (),
   4238  1.1  mrg 						(__mmask32) -1,
   4239  1.1  mrg 						__B);
   4240  1.1  mrg }
   4241  1.1  mrg 
   4242  1.1  mrg extern __inline __m512h
   4243  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4244  1.1  mrg _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
   4245  1.1  mrg {
   4246  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
   4247  1.1  mrg 						__A,
   4248  1.1  mrg 						__B,
   4249  1.1  mrg 						__D);
   4250  1.1  mrg }
   4251  1.1  mrg 
   4252  1.1  mrg extern __inline __m512h
   4253  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4254  1.1  mrg _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
   4255  1.1  mrg {
   4256  1.1  mrg   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
   4257  1.1  mrg 						_mm512_setzero_ph (),
   4258  1.1  mrg 						__A,
   4259  1.1  mrg 						__C);
   4260  1.1  mrg }
   4261  1.1  mrg 
   4262  1.1  mrg #else
   4263  1.1  mrg #define _mm512_cvt_roundepi16_ph(A, B)				\
   4264  1.1  mrg   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A),		\
   4265  1.1  mrg 					  _mm512_setzero_ph (),	\
   4266  1.1  mrg 					  (__mmask32)-1,	\
   4267  1.1  mrg 					  (B)))
   4268  1.1  mrg 
   4269  1.1  mrg #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D)	\
   4270  1.1  mrg   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C),	\
   4271  1.1  mrg 					  (A),		\
   4272  1.1  mrg 					  (B),		\
   4273  1.1  mrg 					  (D)))
   4274  1.1  mrg 
   4275  1.1  mrg #define _mm512_maskz_cvt_roundepi16_ph(A, B, C)			\
   4276  1.1  mrg   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B),		\
   4277  1.1  mrg 					  _mm512_setzero_ph (),	\
   4278  1.1  mrg 					  (A),			\
   4279  1.1  mrg 					  (C)))
   4280  1.1  mrg 
   4281  1.1  mrg #endif /* __OPTIMIZE__ */
   4282  1.1  mrg 
   4283  1.1  mrg /* Intrinsics vcvtuw2ph.  */
   4284  1.1  mrg   extern __inline __m512h
   4285  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4286  1.1  mrg   _mm512_cvtepu16_ph (__m512i __A)
   4287  1.1  mrg   {
   4288  1.1  mrg     return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
   4289  1.1  mrg 						   _mm512_setzero_ph (),
   4290  1.1  mrg 						   (__mmask32) -1,
   4291  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   4292  1.1  mrg   }
   4293  1.1  mrg 
   4294  1.1  mrg extern __inline __m512h
   4295  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4296  1.1  mrg _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
   4297  1.1  mrg {
   4298  1.1  mrg   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
   4299  1.1  mrg 						 __A,
   4300  1.1  mrg 						 __B,
   4301  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4302  1.1  mrg }
   4303  1.1  mrg 
   4304  1.1  mrg extern __inline __m512h
   4305  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4306  1.1  mrg _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
   4307  1.1  mrg {
   4308  1.1  mrg   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
   4309  1.1  mrg 						 _mm512_setzero_ph (),
   4310  1.1  mrg 						 __A,
   4311  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4312  1.1  mrg }
   4313  1.1  mrg 
   4314  1.1  mrg #ifdef __OPTIMIZE__
   4315  1.1  mrg extern __inline __m512h
   4316  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4317  1.1  mrg _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
   4318  1.1  mrg {
   4319  1.1  mrg   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
   4320  1.1  mrg 						 _mm512_setzero_ph (),
   4321  1.1  mrg 						 (__mmask32) -1,
   4322  1.1  mrg 						 __B);
   4323  1.1  mrg }
   4324  1.1  mrg 
   4325  1.1  mrg extern __inline __m512h
   4326  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4327  1.1  mrg _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
   4328  1.1  mrg {
   4329  1.1  mrg   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
   4330  1.1  mrg 						 __A,
   4331  1.1  mrg 						 __B,
   4332  1.1  mrg 						 __D);
   4333  1.1  mrg }
   4334  1.1  mrg 
   4335  1.1  mrg extern __inline __m512h
   4336  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4337  1.1  mrg _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
   4338  1.1  mrg {
   4339  1.1  mrg   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
   4340  1.1  mrg 						 _mm512_setzero_ph (),
   4341  1.1  mrg 						 __A,
   4342  1.1  mrg 						 __C);
   4343  1.1  mrg }
   4344  1.1  mrg 
   4345  1.1  mrg #else
   4346  1.1  mrg #define _mm512_cvt_roundepu16_ph(A, B)					\
   4347  1.1  mrg   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A),		\
   4348  1.1  mrg 					   _mm512_setzero_ph (),	\
   4349  1.1  mrg 					   (__mmask32)-1,		\
   4350  1.1  mrg 					   (B)))
   4351  1.1  mrg 
   4352  1.1  mrg #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D)		\
   4353  1.1  mrg   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C),	\
   4354  1.1  mrg 					   (A),			\
   4355  1.1  mrg 					   (B),			\
   4356  1.1  mrg 					   (D)))
   4357  1.1  mrg 
   4358  1.1  mrg #define _mm512_maskz_cvt_roundepu16_ph(A, B, C)				\
   4359  1.1  mrg   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B),		\
   4360  1.1  mrg 					   _mm512_setzero_ph (),	\
   4361  1.1  mrg 					   (A),				\
   4362  1.1  mrg 					   (C)))
   4363  1.1  mrg 
   4364  1.1  mrg #endif /* __OPTIMIZE__ */
   4365  1.1  mrg 
   4366  1.1  mrg /* Intrinsics vcvtsh2si, vcvtsh2us.  */
   4367  1.1  mrg extern __inline int
   4368  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4369  1.1  mrg _mm_cvtsh_i32 (__m128h __A)
   4370  1.1  mrg {
   4371  1.1  mrg   return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
   4372  1.1  mrg }
   4373  1.1  mrg 
   4374  1.1  mrg extern __inline unsigned
   4375  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4376  1.1  mrg _mm_cvtsh_u32 (__m128h __A)
   4377  1.1  mrg {
   4378  1.1  mrg   return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
   4379  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4380  1.1  mrg }
   4381  1.1  mrg 
   4382  1.1  mrg #ifdef __OPTIMIZE__
   4383  1.1  mrg extern __inline int
   4384  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4385  1.1  mrg _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
   4386  1.1  mrg {
   4387  1.1  mrg   return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
   4388  1.1  mrg }
   4389  1.1  mrg 
   4390  1.1  mrg extern __inline unsigned
   4391  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4392  1.1  mrg _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
   4393  1.1  mrg {
   4394  1.1  mrg   return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
   4395  1.1  mrg }
   4396  1.1  mrg 
   4397  1.1  mrg #else
   4398  1.1  mrg #define _mm_cvt_roundsh_i32(A, B)		\
   4399  1.1  mrg   ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
   4400  1.1  mrg #define _mm_cvt_roundsh_u32(A, B)		\
   4401  1.1  mrg   ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
   4402  1.1  mrg 
   4403  1.1  mrg #endif /* __OPTIMIZE__ */
   4404  1.1  mrg 
   4405  1.1  mrg #ifdef __x86_64__
   4406  1.1  mrg extern __inline long long
   4407  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4408  1.1  mrg _mm_cvtsh_i64 (__m128h __A)
   4409  1.1  mrg {
   4410  1.1  mrg   return (long long)
   4411  1.1  mrg     __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
   4412  1.1  mrg }
   4413  1.1  mrg 
   4414  1.1  mrg extern __inline unsigned long long
   4415  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4416  1.1  mrg _mm_cvtsh_u64 (__m128h __A)
   4417  1.1  mrg {
   4418  1.1  mrg   return (long long)
   4419  1.1  mrg     __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
   4420  1.1  mrg }
   4421  1.1  mrg 
   4422  1.1  mrg #ifdef __OPTIMIZE__
   4423  1.1  mrg extern __inline long long
   4424  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4425  1.1  mrg _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
   4426  1.1  mrg {
   4427  1.1  mrg   return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
   4428  1.1  mrg }
   4429  1.1  mrg 
   4430  1.1  mrg extern __inline unsigned long long
   4431  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4432  1.1  mrg _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
   4433  1.1  mrg {
   4434  1.1  mrg   return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
   4435  1.1  mrg }
   4436  1.1  mrg 
   4437  1.1  mrg #else
   4438  1.1  mrg #define _mm_cvt_roundsh_i64(A, B)			\
   4439  1.1  mrg   ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
   4440  1.1  mrg #define _mm_cvt_roundsh_u64(A, B)			\
   4441  1.1  mrg   ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
   4442  1.1  mrg 
   4443  1.1  mrg #endif /* __OPTIMIZE__ */
   4444  1.1  mrg #endif /* __x86_64__ */
   4445  1.1  mrg 
   4446  1.1  mrg /* Intrinsics vcvttsh2si, vcvttsh2us.  */
   4447  1.1  mrg extern __inline int
   4448  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4449  1.1  mrg _mm_cvttsh_i32 (__m128h __A)
   4450  1.1  mrg {
   4451  1.1  mrg   return (int)
   4452  1.1  mrg     __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
   4453  1.1  mrg }
   4454  1.1  mrg 
   4455  1.1  mrg extern __inline unsigned
   4456  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4457  1.1  mrg _mm_cvttsh_u32 (__m128h __A)
   4458  1.1  mrg {
   4459  1.1  mrg   return (int)
   4460  1.1  mrg     __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
   4461  1.1  mrg }
   4462  1.1  mrg 
   4463  1.1  mrg #ifdef __OPTIMIZE__
   4464  1.1  mrg extern __inline int
   4465  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4466  1.1  mrg _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
   4467  1.1  mrg {
   4468  1.1  mrg   return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
   4469  1.1  mrg }
   4470  1.1  mrg 
   4471  1.1  mrg extern __inline unsigned
   4472  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4473  1.1  mrg _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
   4474  1.1  mrg {
   4475  1.1  mrg   return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
   4476  1.1  mrg }
   4477  1.1  mrg 
   4478  1.1  mrg #else
   4479  1.1  mrg #define _mm_cvtt_roundsh_i32(A, B)		\
   4480  1.1  mrg   ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
   4481  1.1  mrg #define _mm_cvtt_roundsh_u32(A, B)		\
   4482  1.1  mrg   ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
   4483  1.1  mrg 
   4484  1.1  mrg #endif /* __OPTIMIZE__ */
   4485  1.1  mrg 
   4486  1.1  mrg #ifdef __x86_64__
   4487  1.1  mrg extern __inline long long
   4488  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4489  1.1  mrg _mm_cvttsh_i64 (__m128h __A)
   4490  1.1  mrg {
   4491  1.1  mrg   return (long long)
   4492  1.1  mrg     __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
   4493  1.1  mrg }
   4494  1.1  mrg 
   4495  1.1  mrg extern __inline unsigned long long
   4496  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4497  1.1  mrg _mm_cvttsh_u64 (__m128h __A)
   4498  1.1  mrg {
   4499  1.1  mrg   return (long long)
   4500  1.1  mrg     __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
   4501  1.1  mrg }
   4502  1.1  mrg 
   4503  1.1  mrg #ifdef __OPTIMIZE__
   4504  1.1  mrg extern __inline long long
   4505  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4506  1.1  mrg _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
   4507  1.1  mrg {
   4508  1.1  mrg   return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
   4509  1.1  mrg }
   4510  1.1  mrg 
   4511  1.1  mrg extern __inline unsigned long long
   4512  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4513  1.1  mrg _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
   4514  1.1  mrg {
   4515  1.1  mrg   return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
   4516  1.1  mrg }
   4517  1.1  mrg 
   4518  1.1  mrg #else
   4519  1.1  mrg #define _mm_cvtt_roundsh_i64(A, B)			\
   4520  1.1  mrg   ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
   4521  1.1  mrg #define _mm_cvtt_roundsh_u64(A, B)			\
   4522  1.1  mrg   ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
   4523  1.1  mrg 
   4524  1.1  mrg #endif /* __OPTIMIZE__ */
   4525  1.1  mrg #endif /* __x86_64__ */
   4526  1.1  mrg 
   4527  1.1  mrg /* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
   4528  1.1  mrg extern __inline __m128h
   4529  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4530  1.1  mrg _mm_cvti32_sh (__m128h __A, int __B)
   4531  1.1  mrg {
   4532  1.1  mrg   return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
   4533  1.1  mrg }
   4534  1.1  mrg 
   4535  1.1  mrg extern __inline __m128h
   4536  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4537  1.1  mrg _mm_cvtu32_sh (__m128h __A, unsigned int __B)
   4538  1.1  mrg {
   4539  1.1  mrg   return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
   4540  1.1  mrg }
   4541  1.1  mrg 
   4542  1.1  mrg #ifdef __OPTIMIZE__
   4543  1.1  mrg extern __inline __m128h
   4544  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4545  1.1  mrg _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
   4546  1.1  mrg {
   4547  1.1  mrg   return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
   4548  1.1  mrg }
   4549  1.1  mrg 
   4550  1.1  mrg extern __inline __m128h
   4551  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4552  1.1  mrg _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
   4553  1.1  mrg {
   4554  1.1  mrg   return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
   4555  1.1  mrg }
   4556  1.1  mrg 
   4557  1.1  mrg #else
   4558  1.1  mrg #define _mm_cvt_roundi32_sh(A, B, C)		\
   4559  1.1  mrg   (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
   4560  1.1  mrg #define _mm_cvt_roundu32_sh(A, B, C)		\
   4561  1.1  mrg   (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
   4562  1.1  mrg 
   4563  1.1  mrg #endif /* __OPTIMIZE__ */
   4564  1.1  mrg 
   4565  1.1  mrg #ifdef __x86_64__
   4566  1.1  mrg extern __inline __m128h
   4567  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4568  1.1  mrg _mm_cvti64_sh (__m128h __A, long long __B)
   4569  1.1  mrg {
   4570  1.1  mrg   return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
   4571  1.1  mrg }
   4572  1.1  mrg 
   4573  1.1  mrg extern __inline __m128h
   4574  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4575  1.1  mrg _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
   4576  1.1  mrg {
   4577  1.1  mrg   return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
   4578  1.1  mrg }
   4579  1.1  mrg 
   4580  1.1  mrg #ifdef __OPTIMIZE__
   4581  1.1  mrg extern __inline __m128h
   4582  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4583  1.1  mrg _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
   4584  1.1  mrg {
   4585  1.1  mrg   return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
   4586  1.1  mrg }
   4587  1.1  mrg 
   4588  1.1  mrg extern __inline __m128h
   4589  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4590  1.1  mrg _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
   4591  1.1  mrg {
   4592  1.1  mrg   return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
   4593  1.1  mrg }
   4594  1.1  mrg 
   4595  1.1  mrg #else
   4596  1.1  mrg #define _mm_cvt_roundi64_sh(A, B, C)		\
   4597  1.1  mrg   (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
   4598  1.1  mrg #define _mm_cvt_roundu64_sh(A, B, C)		\
   4599  1.1  mrg   (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
   4600  1.1  mrg 
   4601  1.1  mrg #endif /* __OPTIMIZE__ */
   4602  1.1  mrg #endif /* __x86_64__ */
   4603  1.1  mrg 
   4604  1.1  mrg /* Intrinsics vcvtph2pd.  */
   4605  1.1  mrg extern __inline __m512d
   4606  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4607  1.1  mrg _mm512_cvtph_pd (__m128h __A)
   4608  1.1  mrg {
   4609  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__A,
   4610  1.1  mrg 						 _mm512_setzero_pd (),
   4611  1.1  mrg 						 (__mmask8) -1,
   4612  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4613  1.1  mrg }
   4614  1.1  mrg 
   4615  1.1  mrg extern __inline __m512d
   4616  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4617  1.1  mrg _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
   4618  1.1  mrg {
   4619  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
   4620  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4621  1.1  mrg }
   4622  1.1  mrg 
   4623  1.1  mrg extern __inline __m512d
   4624  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4625  1.1  mrg _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
   4626  1.1  mrg {
   4627  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__B,
   4628  1.1  mrg 						 _mm512_setzero_pd (),
   4629  1.1  mrg 						 __A,
   4630  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4631  1.1  mrg }
   4632  1.1  mrg 
   4633  1.1  mrg #ifdef __OPTIMIZE__
   4634  1.1  mrg extern __inline __m512d
   4635  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4636  1.1  mrg _mm512_cvt_roundph_pd (__m128h __A, int __B)
   4637  1.1  mrg {
   4638  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__A,
   4639  1.1  mrg 						 _mm512_setzero_pd (),
   4640  1.1  mrg 						 (__mmask8) -1,
   4641  1.1  mrg 						 __B);
   4642  1.1  mrg }
   4643  1.1  mrg 
   4644  1.1  mrg extern __inline __m512d
   4645  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4646  1.1  mrg _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
   4647  1.1  mrg {
   4648  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
   4649  1.1  mrg }
   4650  1.1  mrg 
   4651  1.1  mrg extern __inline __m512d
   4652  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4653  1.1  mrg _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
   4654  1.1  mrg {
   4655  1.1  mrg   return __builtin_ia32_vcvtph2pd512_mask_round (__B,
   4656  1.1  mrg 						 _mm512_setzero_pd (),
   4657  1.1  mrg 						 __A,
   4658  1.1  mrg 						 __C);
   4659  1.1  mrg }
   4660  1.1  mrg 
   4661  1.1  mrg #else
   4662  1.1  mrg #define _mm512_cvt_roundph_pd(A, B)					\
   4663  1.1  mrg   (__builtin_ia32_vcvtph2pd512_mask_round ((A),			\
   4664  1.1  mrg 					   _mm512_setzero_pd (),	\
   4665  1.1  mrg 					   (__mmask8)-1,		\
   4666  1.1  mrg 					   (B)))
   4667  1.1  mrg 
   4668  1.1  mrg #define _mm512_mask_cvt_roundph_pd(A, B, C, D)				\
   4669  1.1  mrg   (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
   4670  1.1  mrg 
   4671  1.1  mrg #define _mm512_maskz_cvt_roundph_pd(A, B, C)				\
   4672  1.1  mrg   (__builtin_ia32_vcvtph2pd512_mask_round ((B),			\
   4673  1.1  mrg 					   _mm512_setzero_pd (),	\
   4674  1.1  mrg 					   (A),			\
   4675  1.1  mrg 					   (C)))
   4676  1.1  mrg 
   4677  1.1  mrg #endif /* __OPTIMIZE__ */
   4678  1.1  mrg 
   4679  1.1  mrg /* Intrinsics vcvtph2psx.  */
   4680  1.1  mrg extern __inline __m512
   4681  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4682  1.1  mrg _mm512_cvtxph_ps (__m256h __A)
   4683  1.1  mrg {
   4684  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__A,
   4685  1.1  mrg 						  _mm512_setzero_ps (),
   4686  1.1  mrg 						  (__mmask16) -1,
   4687  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4688  1.1  mrg }
   4689  1.1  mrg 
   4690  1.1  mrg extern __inline __m512
   4691  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4692  1.1  mrg _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
   4693  1.1  mrg {
   4694  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
   4695  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4696  1.1  mrg }
   4697  1.1  mrg 
   4698  1.1  mrg extern __inline __m512
   4699  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4700  1.1  mrg _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
   4701  1.1  mrg {
   4702  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__B,
   4703  1.1  mrg 						  _mm512_setzero_ps (),
   4704  1.1  mrg 						  __A,
   4705  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4706  1.1  mrg }
   4707  1.1  mrg 
   4708  1.1  mrg #ifdef __OPTIMIZE__
   4709  1.1  mrg extern __inline __m512
   4710  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4711  1.1  mrg _mm512_cvtx_roundph_ps (__m256h __A, int __B)
   4712  1.1  mrg {
   4713  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__A,
   4714  1.1  mrg 						  _mm512_setzero_ps (),
   4715  1.1  mrg 						  (__mmask16) -1,
   4716  1.1  mrg 						  __B);
   4717  1.1  mrg }
   4718  1.1  mrg 
   4719  1.1  mrg extern __inline __m512
   4720  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4721  1.1  mrg _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
   4722  1.1  mrg {
   4723  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
   4724  1.1  mrg }
   4725  1.1  mrg 
   4726  1.1  mrg extern __inline __m512
   4727  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4728  1.1  mrg _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
   4729  1.1  mrg {
   4730  1.1  mrg   return __builtin_ia32_vcvtph2psx512_mask_round (__B,
   4731  1.1  mrg 						  _mm512_setzero_ps (),
   4732  1.1  mrg 						  __A,
   4733  1.1  mrg 						  __C);
   4734  1.1  mrg }
   4735  1.1  mrg 
   4736  1.1  mrg #else
   4737  1.1  mrg #define _mm512_cvtx_roundph_ps(A, B)					\
   4738  1.1  mrg   (__builtin_ia32_vcvtph2psx512_mask_round ((A),			\
   4739  1.1  mrg 					    _mm512_setzero_ps (),	\
   4740  1.1  mrg 					    (__mmask16)-1,		\
   4741  1.1  mrg 					    (B)))
   4742  1.1  mrg 
   4743  1.1  mrg #define _mm512_mask_cvtx_roundph_ps(A, B, C, D)				\
   4744  1.1  mrg   (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
   4745  1.1  mrg 
   4746  1.1  mrg #define _mm512_maskz_cvtx_roundph_ps(A, B, C)				\
   4747  1.1  mrg   (__builtin_ia32_vcvtph2psx512_mask_round ((B),			\
   4748  1.1  mrg 					    _mm512_setzero_ps (),	\
   4749  1.1  mrg 					    (A),			\
   4750  1.1  mrg 					    (C)))
   4751  1.1  mrg #endif /* __OPTIMIZE__ */
   4752  1.1  mrg 
   4753  1.1  mrg /* Intrinsics vcvtps2ph.  */
   4754  1.1  mrg extern __inline __m256h
   4755  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4756  1.1  mrg _mm512_cvtxps_ph (__m512 __A)
   4757  1.1  mrg {
   4758  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
   4759  1.1  mrg 						  _mm256_setzero_ph (),
   4760  1.1  mrg 						  (__mmask16) -1,
   4761  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4762  1.1  mrg }
   4763  1.1  mrg 
   4764  1.1  mrg extern __inline __m256h
   4765  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4766  1.1  mrg _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
   4767  1.1  mrg {
   4768  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
   4769  1.1  mrg 						  __A, __B,
   4770  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4771  1.1  mrg }
   4772  1.1  mrg 
   4773  1.1  mrg extern __inline __m256h
   4774  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4775  1.1  mrg _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
   4776  1.1  mrg {
   4777  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
   4778  1.1  mrg 						  _mm256_setzero_ph (),
   4779  1.1  mrg 						  __A,
   4780  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   4781  1.1  mrg }
   4782  1.1  mrg 
   4783  1.1  mrg #ifdef __OPTIMIZE__
   4784  1.1  mrg extern __inline __m256h
   4785  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4786  1.1  mrg _mm512_cvtx_roundps_ph (__m512 __A, int __B)
   4787  1.1  mrg {
   4788  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
   4789  1.1  mrg 						  _mm256_setzero_ph (),
   4790  1.1  mrg 						  (__mmask16) -1,
   4791  1.1  mrg 						  __B);
   4792  1.1  mrg }
   4793  1.1  mrg 
   4794  1.1  mrg extern __inline __m256h
   4795  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4796  1.1  mrg _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
   4797  1.1  mrg {
   4798  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
   4799  1.1  mrg 						  __A, __B, __D);
   4800  1.1  mrg }
   4801  1.1  mrg 
   4802  1.1  mrg extern __inline __m256h
   4803  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4804  1.1  mrg _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
   4805  1.1  mrg {
   4806  1.1  mrg   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
   4807  1.1  mrg 						  _mm256_setzero_ph (),
   4808  1.1  mrg 						  __A, __C);
   4809  1.1  mrg }
   4810  1.1  mrg 
   4811  1.1  mrg #else
   4812  1.1  mrg #define _mm512_cvtx_roundps_ph(A, B)				\
   4813  1.1  mrg   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A),	\
   4814  1.1  mrg 					    _mm256_setzero_ph (),\
   4815  1.1  mrg 					    (__mmask16)-1, (B)))
   4816  1.1  mrg 
   4817  1.1  mrg #define _mm512_mask_cvtx_roundps_ph(A, B, C, D)			\
   4818  1.1  mrg   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C),	\
   4819  1.1  mrg 					    (A), (B), (D)))
   4820  1.1  mrg 
   4821  1.1  mrg #define _mm512_maskz_cvtx_roundps_ph(A, B, C)			\
   4822  1.1  mrg   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B),	\
   4823  1.1  mrg 					    _mm256_setzero_ph (),\
   4824  1.1  mrg 					    (A), (C)))
   4825  1.1  mrg #endif /* __OPTIMIZE__ */
   4826  1.1  mrg 
   4827  1.1  mrg /* Intrinsics vcvtpd2ph.  */
   4828  1.1  mrg extern __inline __m128h
   4829  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4830  1.1  mrg _mm512_cvtpd_ph (__m512d __A)
   4831  1.1  mrg {
   4832  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
   4833  1.1  mrg 						 _mm_setzero_ph (),
   4834  1.1  mrg 						 (__mmask8) -1,
   4835  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4836  1.1  mrg }
   4837  1.1  mrg 
   4838  1.1  mrg extern __inline __m128h
   4839  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4840  1.1  mrg _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
   4841  1.1  mrg {
   4842  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
   4843  1.1  mrg 						 __A, __B,
   4844  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4845  1.1  mrg }
   4846  1.1  mrg 
   4847  1.1  mrg extern __inline __m128h
   4848  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4849  1.1  mrg _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
   4850  1.1  mrg {
   4851  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
   4852  1.1  mrg 						 _mm_setzero_ph (),
   4853  1.1  mrg 						 __A,
   4854  1.1  mrg 						 _MM_FROUND_CUR_DIRECTION);
   4855  1.1  mrg }
   4856  1.1  mrg 
   4857  1.1  mrg #ifdef __OPTIMIZE__
   4858  1.1  mrg extern __inline __m128h
   4859  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4860  1.1  mrg _mm512_cvt_roundpd_ph (__m512d __A, int __B)
   4861  1.1  mrg {
   4862  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
   4863  1.1  mrg 						 _mm_setzero_ph (),
   4864  1.1  mrg 						 (__mmask8) -1,
   4865  1.1  mrg 						 __B);
   4866  1.1  mrg }
   4867  1.1  mrg 
   4868  1.1  mrg extern __inline __m128h
   4869  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4870  1.1  mrg _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
   4871  1.1  mrg {
   4872  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
   4873  1.1  mrg 						 __A, __B, __D);
   4874  1.1  mrg }
   4875  1.1  mrg 
   4876  1.1  mrg extern __inline __m128h
   4877  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4878  1.1  mrg _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
   4879  1.1  mrg {
   4880  1.1  mrg   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
   4881  1.1  mrg 						 _mm_setzero_ph (),
   4882  1.1  mrg 						 __A, __C);
   4883  1.1  mrg }
   4884  1.1  mrg 
   4885  1.1  mrg #else
   4886  1.1  mrg #define _mm512_cvt_roundpd_ph(A, B)				\
   4887  1.1  mrg   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A),		\
   4888  1.1  mrg 					   _mm_setzero_ph (),	\
   4889  1.1  mrg 					   (__mmask8)-1, (B)))
   4890  1.1  mrg 
   4891  1.1  mrg #define _mm512_mask_cvt_roundpd_ph(A, B, C, D)			\
   4892  1.1  mrg   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C),		\
   4893  1.1  mrg 					   (A), (B), (D)))
   4894  1.1  mrg 
   4895  1.1  mrg #define _mm512_maskz_cvt_roundpd_ph(A, B, C)			\
   4896  1.1  mrg   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B),		\
   4897  1.1  mrg 					   _mm_setzero_ph (),	\
   4898  1.1  mrg 					   (A), (C)))
   4899  1.1  mrg 
   4900  1.1  mrg #endif /* __OPTIMIZE__ */
   4901  1.1  mrg 
   4902  1.1  mrg /* Intrinsics vcvtsh2ss, vcvtsh2sd.  */
   4903  1.1  mrg extern __inline __m128
   4904  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4905  1.1  mrg _mm_cvtsh_ss (__m128 __A, __m128h __B)
   4906  1.1  mrg {
   4907  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
   4908  1.1  mrg 					      _mm_setzero_ps (),
   4909  1.1  mrg 					      (__mmask8) -1,
   4910  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   4911  1.1  mrg }
   4912  1.1  mrg 
   4913  1.1  mrg extern __inline __m128
   4914  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4915  1.1  mrg _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
   4916  1.1  mrg 			 __m128h __D)
   4917  1.1  mrg {
   4918  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
   4919  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   4920  1.1  mrg }
   4921  1.1  mrg 
   4922  1.1  mrg extern __inline __m128
   4923  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4924  1.1  mrg _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
   4925  1.1  mrg 			  __m128h __C)
   4926  1.1  mrg {
   4927  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
   4928  1.1  mrg 					      _mm_setzero_ps (),
   4929  1.1  mrg 					      __A, _MM_FROUND_CUR_DIRECTION);
   4930  1.1  mrg }
   4931  1.1  mrg 
   4932  1.1  mrg extern __inline __m128d
   4933  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4934  1.1  mrg _mm_cvtsh_sd (__m128d __A, __m128h __B)
   4935  1.1  mrg {
   4936  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
   4937  1.1  mrg 					      _mm_setzero_pd (),
   4938  1.1  mrg 					      (__mmask8) -1,
   4939  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   4940  1.1  mrg }
   4941  1.1  mrg 
   4942  1.1  mrg extern __inline __m128d
   4943  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4944  1.1  mrg _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
   4945  1.1  mrg 			 __m128h __D)
   4946  1.1  mrg {
   4947  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
   4948  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   4949  1.1  mrg }
   4950  1.1  mrg 
   4951  1.1  mrg extern __inline __m128d
   4952  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4953  1.1  mrg _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
   4954  1.1  mrg {
   4955  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
   4956  1.1  mrg 					      _mm_setzero_pd (),
   4957  1.1  mrg 					      __A, _MM_FROUND_CUR_DIRECTION);
   4958  1.1  mrg }
   4959  1.1  mrg 
   4960  1.1  mrg #ifdef __OPTIMIZE__
   4961  1.1  mrg extern __inline __m128
   4962  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4963  1.1  mrg _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
   4964  1.1  mrg {
   4965  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
   4966  1.1  mrg 					      _mm_setzero_ps (),
   4967  1.1  mrg 					      (__mmask8) -1, __R);
   4968  1.1  mrg }
   4969  1.1  mrg 
   4970  1.1  mrg extern __inline __m128
   4971  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4972  1.1  mrg _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
   4973  1.1  mrg 			 __m128h __D, const int __R)
   4974  1.1  mrg {
   4975  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
   4976  1.1  mrg }
   4977  1.1  mrg 
   4978  1.1  mrg extern __inline __m128
   4979  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4980  1.1  mrg _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
   4981  1.1  mrg 			  __m128h __C, const int __R)
   4982  1.1  mrg {
   4983  1.1  mrg   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
   4984  1.1  mrg 					      _mm_setzero_ps (),
   4985  1.1  mrg 					      __A, __R);
   4986  1.1  mrg }
   4987  1.1  mrg 
   4988  1.1  mrg extern __inline __m128d
   4989  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4990  1.1  mrg _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
   4991  1.1  mrg {
   4992  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
   4993  1.1  mrg 					      _mm_setzero_pd (),
   4994  1.1  mrg 					      (__mmask8) -1, __R);
   4995  1.1  mrg }
   4996  1.1  mrg 
   4997  1.1  mrg extern __inline __m128d
   4998  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   4999  1.1  mrg _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
   5000  1.1  mrg 			 __m128h __D, const int __R)
   5001  1.1  mrg {
   5002  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
   5003  1.1  mrg }
   5004  1.1  mrg 
   5005  1.1  mrg extern __inline __m128d
   5006  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5007  1.1  mrg _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
   5008  1.1  mrg {
   5009  1.1  mrg   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
   5010  1.1  mrg 					      _mm_setzero_pd (),
   5011  1.1  mrg 					      __A, __R);
   5012  1.1  mrg }
   5013  1.1  mrg 
   5014  1.1  mrg #else
   5015  1.1  mrg #define _mm_cvt_roundsh_ss(A, B, R)				\
   5016  1.1  mrg   (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A),		\
   5017  1.1  mrg 					_mm_setzero_ps (),	\
   5018  1.1  mrg 					(__mmask8) -1, (R)))
   5019  1.1  mrg 
   5020  1.1  mrg #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R)				\
   5021  1.1  mrg   (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
   5022  1.1  mrg 
   5023  1.1  mrg #define _mm_maskz_cvt_roundsh_ss(A, B, C, R)			\
   5024  1.1  mrg   (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B),		\
   5025  1.1  mrg 					_mm_setzero_ps (),	\
   5026  1.1  mrg 					(A), (R)))
   5027  1.1  mrg 
   5028  1.1  mrg #define _mm_cvt_roundsh_sd(A, B, R)				\
   5029  1.1  mrg   (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A),		\
   5030  1.1  mrg 					_mm_setzero_pd (),	\
   5031  1.1  mrg 					(__mmask8) -1, (R)))
   5032  1.1  mrg 
   5033  1.1  mrg #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R)				\
   5034  1.1  mrg   (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
   5035  1.1  mrg 
   5036  1.1  mrg #define _mm_maskz_cvt_roundsh_sd(A, B, C, R)			\
   5037  1.1  mrg   (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B),		\
   5038  1.1  mrg 					_mm_setzero_pd (),	\
   5039  1.1  mrg 					(A), (R)))
   5040  1.1  mrg 
   5041  1.1  mrg #endif /* __OPTIMIZE__ */
   5042  1.1  mrg 
   5043  1.1  mrg /* Intrinsics vcvtss2sh, vcvtsd2sh.  */
   5044  1.1  mrg extern __inline __m128h
   5045  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5046  1.1  mrg _mm_cvtss_sh (__m128h __A, __m128 __B)
   5047  1.1  mrg {
   5048  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
   5049  1.1  mrg 					      _mm_setzero_ph (),
   5050  1.1  mrg 					      (__mmask8) -1,
   5051  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   5052  1.1  mrg }
   5053  1.1  mrg 
   5054  1.1  mrg extern __inline __m128h
   5055  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5056  1.1  mrg _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
   5057  1.1  mrg {
   5058  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
   5059  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   5060  1.1  mrg }
   5061  1.1  mrg 
   5062  1.1  mrg extern __inline __m128h
   5063  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5064  1.1  mrg _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
   5065  1.1  mrg {
   5066  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
   5067  1.1  mrg 					      _mm_setzero_ph (),
   5068  1.1  mrg 					      __A, _MM_FROUND_CUR_DIRECTION);
   5069  1.1  mrg }
   5070  1.1  mrg 
   5071  1.1  mrg extern __inline __m128h
   5072  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5073  1.1  mrg _mm_cvtsd_sh (__m128h __A, __m128d __B)
   5074  1.1  mrg {
   5075  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
   5076  1.1  mrg 					      _mm_setzero_ph (),
   5077  1.1  mrg 					      (__mmask8) -1,
   5078  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   5079  1.1  mrg }
   5080  1.1  mrg 
   5081  1.1  mrg extern __inline __m128h
   5082  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5083  1.1  mrg _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
   5084  1.1  mrg {
   5085  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
   5086  1.1  mrg 					      _MM_FROUND_CUR_DIRECTION);
   5087  1.1  mrg }
   5088  1.1  mrg 
   5089  1.1  mrg extern __inline __m128h
   5090  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5091  1.1  mrg _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
   5092  1.1  mrg {
   5093  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
   5094  1.1  mrg 					      _mm_setzero_ph (),
   5095  1.1  mrg 					      __A, _MM_FROUND_CUR_DIRECTION);
   5096  1.1  mrg }
   5097  1.1  mrg 
   5098  1.1  mrg #ifdef __OPTIMIZE__
   5099  1.1  mrg extern __inline __m128h
   5100  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5101  1.1  mrg _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
   5102  1.1  mrg {
   5103  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
   5104  1.1  mrg 					      _mm_setzero_ph (),
   5105  1.1  mrg 					      (__mmask8) -1, __R);
   5106  1.1  mrg }
   5107  1.1  mrg 
   5108  1.1  mrg extern __inline __m128h
   5109  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5110  1.1  mrg _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
   5111  1.1  mrg 			 const int __R)
   5112  1.1  mrg {
   5113  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
   5114  1.1  mrg }
   5115  1.1  mrg 
   5116  1.1  mrg extern __inline __m128h
   5117  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5118  1.1  mrg _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
   5119  1.1  mrg 			  const int __R)
   5120  1.1  mrg {
   5121  1.1  mrg   return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
   5122  1.1  mrg 					      _mm_setzero_ph (),
   5123  1.1  mrg 					      __A, __R);
   5124  1.1  mrg }
   5125  1.1  mrg 
   5126  1.1  mrg extern __inline __m128h
   5127  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5128  1.1  mrg _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
   5129  1.1  mrg {
   5130  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
   5131  1.1  mrg 					      _mm_setzero_ph (),
   5132  1.1  mrg 					      (__mmask8) -1, __R);
   5133  1.1  mrg }
   5134  1.1  mrg 
   5135  1.1  mrg extern __inline __m128h
   5136  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5137  1.1  mrg _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
   5138  1.1  mrg 			 const int __R)
   5139  1.1  mrg {
   5140  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
   5141  1.1  mrg }
   5142  1.1  mrg 
   5143  1.1  mrg extern __inline __m128h
   5144  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5145  1.1  mrg _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
   5146  1.1  mrg 			  const int __R)
   5147  1.1  mrg {
   5148  1.1  mrg   return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
   5149  1.1  mrg 					      _mm_setzero_ph (),
   5150  1.1  mrg 					      __A, __R);
   5151  1.1  mrg }
   5152  1.1  mrg 
   5153  1.1  mrg #else
   5154  1.1  mrg #define _mm_cvt_roundss_sh(A, B, R)				\
   5155  1.1  mrg   (__builtin_ia32_vcvtss2sh_mask_round ((B), (A),		\
   5156  1.1  mrg 					_mm_setzero_ph (),	\
   5157  1.1  mrg 					(__mmask8) -1, R))
   5158  1.1  mrg 
   5159  1.1  mrg #define _mm_mask_cvt_roundss_sh(A, B, C, D, R)				\
   5160  1.1  mrg   (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
   5161  1.1  mrg 
   5162  1.1  mrg #define _mm_maskz_cvt_roundss_sh(A, B, C, R)			\
   5163  1.1  mrg   (__builtin_ia32_vcvtss2sh_mask_round ((C), (B),		\
   5164  1.1  mrg 					_mm_setzero_ph (),	\
   5165  1.1  mrg 					A, R))
   5166  1.1  mrg 
   5167  1.1  mrg #define _mm_cvt_roundsd_sh(A, B, R)				\
   5168  1.1  mrg   (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A),		\
   5169  1.1  mrg 					_mm_setzero_ph (),	\
   5170  1.1  mrg 					(__mmask8) -1, R))
   5171  1.1  mrg 
   5172  1.1  mrg #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R)				\
   5173  1.1  mrg   (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
   5174  1.1  mrg 
   5175  1.1  mrg #define _mm_maskz_cvt_roundsd_sh(A, B, C, R)			\
   5176  1.1  mrg   (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B),		\
   5177  1.1  mrg 					_mm_setzero_ph (),	\
   5178  1.1  mrg 					(A), (R)))
   5179  1.1  mrg 
   5180  1.1  mrg #endif /* __OPTIMIZE__ */
   5181  1.1  mrg 
   5182  1.1  mrg /* Intrinsics vfmaddsub[132,213,231]ph.  */
   5183  1.1  mrg extern __inline __m512h
   5184  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5185  1.1  mrg _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
   5186  1.1  mrg {
   5187  1.1  mrg   return (__m512h)
   5188  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
   5189  1.1  mrg 					(__v32hf) __B,
   5190  1.1  mrg 					(__v32hf) __C,
   5191  1.1  mrg 					(__mmask32) -1,
   5192  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   5193  1.1  mrg }
   5194  1.1  mrg 
   5195  1.1  mrg extern __inline __m512h
   5196  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5197  1.1  mrg _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
   5198  1.1  mrg {
   5199  1.1  mrg   return (__m512h)
   5200  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
   5201  1.1  mrg 					(__v32hf) __B,
   5202  1.1  mrg 					(__v32hf) __C,
   5203  1.1  mrg 					(__mmask32) __U,
   5204  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   5205  1.1  mrg }
   5206  1.1  mrg 
   5207  1.1  mrg extern __inline __m512h
   5208  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5209  1.1  mrg _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
   5210  1.1  mrg {
   5211  1.1  mrg   return (__m512h)
   5212  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
   5213  1.1  mrg 					 (__v32hf) __B,
   5214  1.1  mrg 					 (__v32hf) __C,
   5215  1.1  mrg 					 (__mmask32) __U,
   5216  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   5217  1.1  mrg }
   5218  1.1  mrg 
   5219  1.1  mrg extern __inline __m512h
   5220  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5221  1.1  mrg _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
   5222  1.1  mrg {
   5223  1.1  mrg   return (__m512h)
   5224  1.1  mrg     __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
   5225  1.1  mrg 					 (__v32hf) __B,
   5226  1.1  mrg 					 (__v32hf) __C,
   5227  1.1  mrg 					 (__mmask32) __U,
   5228  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   5229  1.1  mrg }
   5230  1.1  mrg 
   5231  1.1  mrg #ifdef __OPTIMIZE__
   5232  1.1  mrg extern __inline __m512h
   5233  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5234  1.1  mrg _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
   5235  1.1  mrg {
   5236  1.1  mrg   return (__m512h)
   5237  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
   5238  1.1  mrg 					(__v32hf) __B,
   5239  1.1  mrg 					(__v32hf) __C,
   5240  1.1  mrg 					(__mmask32) -1, __R);
   5241  1.1  mrg }
   5242  1.1  mrg 
   5243  1.1  mrg extern __inline __m512h
   5244  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5245  1.1  mrg _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5246  1.1  mrg 			       __m512h __C, const int __R)
   5247  1.1  mrg {
   5248  1.1  mrg   return (__m512h)
   5249  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
   5250  1.1  mrg 					(__v32hf) __B,
   5251  1.1  mrg 					(__v32hf) __C,
   5252  1.1  mrg 					(__mmask32) __U, __R);
   5253  1.1  mrg }
   5254  1.1  mrg 
   5255  1.1  mrg extern __inline __m512h
   5256  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5257  1.1  mrg _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5258  1.1  mrg 				__mmask32 __U, const int __R)
   5259  1.1  mrg {
   5260  1.1  mrg   return (__m512h)
   5261  1.1  mrg     __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
   5262  1.1  mrg 					 (__v32hf) __B,
   5263  1.1  mrg 					 (__v32hf) __C,
   5264  1.1  mrg 					 (__mmask32) __U, __R);
   5265  1.1  mrg }
   5266  1.1  mrg 
   5267  1.1  mrg extern __inline __m512h
   5268  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5269  1.1  mrg _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5270  1.1  mrg 				__m512h __C, const int __R)
   5271  1.1  mrg {
   5272  1.1  mrg   return (__m512h)
   5273  1.1  mrg     __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
   5274  1.1  mrg 					 (__v32hf) __B,
   5275  1.1  mrg 					 (__v32hf) __C,
   5276  1.1  mrg 					 (__mmask32) __U, __R);
   5277  1.1  mrg }
   5278  1.1  mrg 
   5279  1.1  mrg #else
   5280  1.1  mrg #define _mm512_fmaddsub_round_ph(A, B, C, R)				\
   5281  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
   5282  1.1  mrg 
   5283  1.1  mrg #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R)			\
   5284  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
   5285  1.1  mrg 
   5286  1.1  mrg #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R)			\
   5287  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
   5288  1.1  mrg 
   5289  1.1  mrg #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R)			\
   5290  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
   5291  1.1  mrg 
   5292  1.1  mrg #endif /* __OPTIMIZE__ */
   5293  1.1  mrg 
   5294  1.1  mrg /* Intrinsics vfmsubadd[132,213,231]ph.  */
   5295  1.1  mrg extern __inline __m512h
   5296  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5297  1.1  mrg   _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
   5298  1.1  mrg {
   5299  1.1  mrg   return (__m512h)
   5300  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
   5301  1.1  mrg 					(__v32hf) __B,
   5302  1.1  mrg 					(__v32hf) __C,
   5303  1.1  mrg 					(__mmask32) -1,
   5304  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   5305  1.1  mrg }
   5306  1.1  mrg 
   5307  1.1  mrg extern __inline __m512h
   5308  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5309  1.1  mrg _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
   5310  1.1  mrg 			 __m512h __B, __m512h __C)
   5311  1.1  mrg {
   5312  1.1  mrg   return (__m512h)
   5313  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
   5314  1.1  mrg 					(__v32hf) __B,
   5315  1.1  mrg 					(__v32hf) __C,
   5316  1.1  mrg 					(__mmask32) __U,
   5317  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   5318  1.1  mrg }
   5319  1.1  mrg 
   5320  1.1  mrg extern __inline __m512h
   5321  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5322  1.1  mrg _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
   5323  1.1  mrg 			  __m512h __C, __mmask32 __U)
   5324  1.1  mrg {
   5325  1.1  mrg   return (__m512h)
   5326  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
   5327  1.1  mrg 					 (__v32hf) __B,
   5328  1.1  mrg 					 (__v32hf) __C,
   5329  1.1  mrg 					 (__mmask32) __U,
   5330  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   5331  1.1  mrg }
   5332  1.1  mrg 
   5333  1.1  mrg extern __inline __m512h
   5334  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5335  1.1  mrg _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
   5336  1.1  mrg 			  __m512h __B, __m512h __C)
   5337  1.1  mrg {
   5338  1.1  mrg   return (__m512h)
   5339  1.1  mrg     __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
   5340  1.1  mrg 					 (__v32hf) __B,
   5341  1.1  mrg 					 (__v32hf) __C,
   5342  1.1  mrg 					 (__mmask32) __U,
   5343  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   5344  1.1  mrg }
   5345  1.1  mrg 
   5346  1.1  mrg #ifdef __OPTIMIZE__
   5347  1.1  mrg extern __inline __m512h
   5348  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5349  1.1  mrg _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
   5350  1.1  mrg 			  __m512h __C, const int __R)
   5351  1.1  mrg {
   5352  1.1  mrg   return (__m512h)
   5353  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
   5354  1.1  mrg 					(__v32hf) __B,
   5355  1.1  mrg 					(__v32hf) __C,
   5356  1.1  mrg 					(__mmask32) -1, __R);
   5357  1.1  mrg }
   5358  1.1  mrg 
   5359  1.1  mrg extern __inline __m512h
   5360  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5361  1.1  mrg _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5362  1.1  mrg 			       __m512h __C, const int __R)
   5363  1.1  mrg {
   5364  1.1  mrg   return (__m512h)
   5365  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
   5366  1.1  mrg 					(__v32hf) __B,
   5367  1.1  mrg 					(__v32hf) __C,
   5368  1.1  mrg 					(__mmask32) __U, __R);
   5369  1.1  mrg }
   5370  1.1  mrg 
   5371  1.1  mrg extern __inline __m512h
   5372  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5373  1.1  mrg _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5374  1.1  mrg 				__mmask32 __U, const int __R)
   5375  1.1  mrg {
   5376  1.1  mrg   return (__m512h)
   5377  1.1  mrg     __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
   5378  1.1  mrg 					 (__v32hf) __B,
   5379  1.1  mrg 					 (__v32hf) __C,
   5380  1.1  mrg 					 (__mmask32) __U, __R);
   5381  1.1  mrg }
   5382  1.1  mrg 
   5383  1.1  mrg extern __inline __m512h
   5384  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5385  1.1  mrg _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5386  1.1  mrg 				__m512h __C, const int __R)
   5387  1.1  mrg {
   5388  1.1  mrg   return (__m512h)
   5389  1.1  mrg     __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
   5390  1.1  mrg 					 (__v32hf) __B,
   5391  1.1  mrg 					 (__v32hf) __C,
   5392  1.1  mrg 					 (__mmask32) __U, __R);
   5393  1.1  mrg }
   5394  1.1  mrg 
   5395  1.1  mrg #else
   5396  1.1  mrg #define _mm512_fmsubadd_round_ph(A, B, C, R)				\
   5397  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
   5398  1.1  mrg 
   5399  1.1  mrg #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R)			\
   5400  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
   5401  1.1  mrg 
   5402  1.1  mrg #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R)			\
   5403  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
   5404  1.1  mrg 
   5405  1.1  mrg #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R)			\
   5406  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
   5407  1.1  mrg 
   5408  1.1  mrg #endif /* __OPTIMIZE__ */
   5409  1.1  mrg 
   5410  1.1  mrg /* Intrinsics vfmadd[132,213,231]ph.  */
   5411  1.1  mrg extern __inline __m512h
   5412  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5413  1.1  mrg   _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
   5414  1.1  mrg {
   5415  1.1  mrg   return (__m512h)
   5416  1.1  mrg     __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
   5417  1.1  mrg 				     (__v32hf) __B,
   5418  1.1  mrg 				     (__v32hf) __C,
   5419  1.1  mrg 				     (__mmask32) -1,
   5420  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   5421  1.1  mrg }
   5422  1.1  mrg 
   5423  1.1  mrg extern __inline __m512h
   5424  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5425  1.1  mrg _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
   5426  1.1  mrg {
   5427  1.1  mrg   return (__m512h)
   5428  1.1  mrg     __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
   5429  1.1  mrg 				     (__v32hf) __B,
   5430  1.1  mrg 				     (__v32hf) __C,
   5431  1.1  mrg 				     (__mmask32) __U,
   5432  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   5433  1.1  mrg }
   5434  1.1  mrg 
   5435  1.1  mrg extern __inline __m512h
   5436  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5437  1.1  mrg _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
   5438  1.1  mrg {
   5439  1.1  mrg   return (__m512h)
   5440  1.1  mrg     __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
   5441  1.1  mrg 				      (__v32hf) __B,
   5442  1.1  mrg 				      (__v32hf) __C,
   5443  1.1  mrg 				      (__mmask32) __U,
   5444  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5445  1.1  mrg }
   5446  1.1  mrg 
   5447  1.1  mrg extern __inline __m512h
   5448  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5449  1.1  mrg _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
   5450  1.1  mrg {
   5451  1.1  mrg   return (__m512h)
   5452  1.1  mrg     __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
   5453  1.1  mrg 				      (__v32hf) __B,
   5454  1.1  mrg 				      (__v32hf) __C,
   5455  1.1  mrg 				      (__mmask32) __U,
   5456  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5457  1.1  mrg }
   5458  1.1  mrg 
   5459  1.1  mrg #ifdef __OPTIMIZE__
   5460  1.1  mrg extern __inline __m512h
   5461  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5462  1.1  mrg _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
   5463  1.1  mrg {
   5464  1.1  mrg   return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
   5465  1.1  mrg 						       (__v32hf) __B,
   5466  1.1  mrg 						       (__v32hf) __C,
   5467  1.1  mrg 						       (__mmask32) -1, __R);
   5468  1.1  mrg }
   5469  1.1  mrg 
   5470  1.1  mrg extern __inline __m512h
   5471  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5472  1.1  mrg _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5473  1.1  mrg 			       __m512h __C, const int __R)
   5474  1.1  mrg {
   5475  1.1  mrg   return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
   5476  1.1  mrg 						       (__v32hf) __B,
   5477  1.1  mrg 						       (__v32hf) __C,
   5478  1.1  mrg 						       (__mmask32) __U, __R);
   5479  1.1  mrg }
   5480  1.1  mrg 
   5481  1.1  mrg extern __inline __m512h
   5482  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5483  1.1  mrg _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5484  1.1  mrg 				__mmask32 __U, const int __R)
   5485  1.1  mrg {
   5486  1.1  mrg   return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
   5487  1.1  mrg 							(__v32hf) __B,
   5488  1.1  mrg 							(__v32hf) __C,
   5489  1.1  mrg 							(__mmask32) __U, __R);
   5490  1.1  mrg }
   5491  1.1  mrg 
   5492  1.1  mrg extern __inline __m512h
   5493  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5494  1.1  mrg _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5495  1.1  mrg 				__m512h __C, const int __R)
   5496  1.1  mrg {
   5497  1.1  mrg   return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
   5498  1.1  mrg 							(__v32hf) __B,
   5499  1.1  mrg 							(__v32hf) __C,
   5500  1.1  mrg 							(__mmask32) __U, __R);
   5501  1.1  mrg }
   5502  1.1  mrg 
   5503  1.1  mrg #else
   5504  1.1  mrg #define _mm512_fmadd_round_ph(A, B, C, R)				\
   5505  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
   5506  1.1  mrg 
   5507  1.1  mrg #define _mm512_mask_fmadd_round_ph(A, U, B, C, R)			\
   5508  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
   5509  1.1  mrg 
   5510  1.1  mrg #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R)			\
   5511  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
   5512  1.1  mrg 
   5513  1.1  mrg #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R)			\
   5514  1.1  mrg   ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
   5515  1.1  mrg 
   5516  1.1  mrg #endif /* __OPTIMIZE__ */
   5517  1.1  mrg 
   5518  1.1  mrg /* Intrinsics vfnmadd[132,213,231]ph.  */
   5519  1.1  mrg extern __inline __m512h
   5520  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5521  1.1  mrg _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
   5522  1.1  mrg {
   5523  1.1  mrg   return (__m512h)
   5524  1.1  mrg     __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
   5525  1.1  mrg 				      (__v32hf) __B,
   5526  1.1  mrg 				      (__v32hf) __C,
   5527  1.1  mrg 				      (__mmask32) -1,
   5528  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5529  1.1  mrg }
   5530  1.1  mrg 
   5531  1.1  mrg extern __inline __m512h
   5532  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5533  1.1  mrg _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
   5534  1.1  mrg {
   5535  1.1  mrg   return (__m512h)
   5536  1.1  mrg     __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
   5537  1.1  mrg 				      (__v32hf) __B,
   5538  1.1  mrg 				      (__v32hf) __C,
   5539  1.1  mrg 				      (__mmask32) __U,
   5540  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5541  1.1  mrg }
   5542  1.1  mrg 
   5543  1.1  mrg extern __inline __m512h
   5544  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5545  1.1  mrg _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
   5546  1.1  mrg {
   5547  1.1  mrg   return (__m512h)
   5548  1.1  mrg     __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
   5549  1.1  mrg 				       (__v32hf) __B,
   5550  1.1  mrg 				       (__v32hf) __C,
   5551  1.1  mrg 				       (__mmask32) __U,
   5552  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   5553  1.1  mrg }
   5554  1.1  mrg 
   5555  1.1  mrg extern __inline __m512h
   5556  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5557  1.1  mrg _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
   5558  1.1  mrg {
   5559  1.1  mrg   return (__m512h)
   5560  1.1  mrg     __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
   5561  1.1  mrg 				       (__v32hf) __B,
   5562  1.1  mrg 				       (__v32hf) __C,
   5563  1.1  mrg 				       (__mmask32) __U,
   5564  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   5565  1.1  mrg }
   5566  1.1  mrg 
   5567  1.1  mrg #ifdef __OPTIMIZE__
   5568  1.1  mrg extern __inline __m512h
   5569  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5570  1.1  mrg _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
   5571  1.1  mrg {
   5572  1.1  mrg   return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
   5573  1.1  mrg 						       (__v32hf) __B,
   5574  1.1  mrg 						       (__v32hf) __C,
   5575  1.1  mrg 						       (__mmask32) -1, __R);
   5576  1.1  mrg }
   5577  1.1  mrg 
   5578  1.1  mrg extern __inline __m512h
   5579  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5580  1.1  mrg _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5581  1.1  mrg 			       __m512h __C, const int __R)
   5582  1.1  mrg {
   5583  1.1  mrg   return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
   5584  1.1  mrg 						       (__v32hf) __B,
   5585  1.1  mrg 						       (__v32hf) __C,
   5586  1.1  mrg 						       (__mmask32) __U, __R);
   5587  1.1  mrg }
   5588  1.1  mrg 
   5589  1.1  mrg extern __inline __m512h
   5590  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5591  1.1  mrg _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5592  1.1  mrg 				__mmask32 __U, const int __R)
   5593  1.1  mrg {
   5594  1.1  mrg   return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
   5595  1.1  mrg 							(__v32hf) __B,
   5596  1.1  mrg 							(__v32hf) __C,
   5597  1.1  mrg 							(__mmask32) __U, __R);
   5598  1.1  mrg }
   5599  1.1  mrg 
   5600  1.1  mrg extern __inline __m512h
   5601  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5602  1.1  mrg _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5603  1.1  mrg 				__m512h __C, const int __R)
   5604  1.1  mrg {
   5605  1.1  mrg   return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
   5606  1.1  mrg 							(__v32hf) __B,
   5607  1.1  mrg 							(__v32hf) __C,
   5608  1.1  mrg 							(__mmask32) __U, __R);
   5609  1.1  mrg }
   5610  1.1  mrg 
   5611  1.1  mrg #else
   5612  1.1  mrg #define _mm512_fnmadd_round_ph(A, B, C, R)				\
   5613  1.1  mrg   ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
   5614  1.1  mrg 
   5615  1.1  mrg #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R)			\
   5616  1.1  mrg   ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
   5617  1.1  mrg 
   5618  1.1  mrg #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R)			\
   5619  1.1  mrg   ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
   5620  1.1  mrg 
   5621  1.1  mrg #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R)			\
   5622  1.1  mrg   ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
   5623  1.1  mrg 
   5624  1.1  mrg #endif /* __OPTIMIZE__ */
   5625  1.1  mrg 
   5626  1.1  mrg /* Intrinsics vfmsub[132,213,231]ph.  */
   5627  1.1  mrg extern __inline __m512h
   5628  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5629  1.1  mrg _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
   5630  1.1  mrg {
   5631  1.1  mrg   return (__m512h)
   5632  1.1  mrg     __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
   5633  1.1  mrg 				     (__v32hf) __B,
   5634  1.1  mrg 				     (__v32hf) __C,
   5635  1.1  mrg 				     (__mmask32) -1,
   5636  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   5637  1.1  mrg }
   5638  1.1  mrg 
   5639  1.1  mrg extern __inline __m512h
   5640  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5641  1.1  mrg _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
   5642  1.1  mrg {
   5643  1.1  mrg   return (__m512h)
   5644  1.1  mrg     __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
   5645  1.1  mrg 				     (__v32hf) __B,
   5646  1.1  mrg 				     (__v32hf) __C,
   5647  1.1  mrg 				     (__mmask32) __U,
   5648  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   5649  1.1  mrg }
   5650  1.1  mrg 
   5651  1.1  mrg extern __inline __m512h
   5652  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5653  1.1  mrg _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
   5654  1.1  mrg {
   5655  1.1  mrg   return (__m512h)
   5656  1.1  mrg     __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
   5657  1.1  mrg 				      (__v32hf) __B,
   5658  1.1  mrg 				      (__v32hf) __C,
   5659  1.1  mrg 				      (__mmask32) __U,
   5660  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5661  1.1  mrg }
   5662  1.1  mrg 
   5663  1.1  mrg extern __inline __m512h
   5664  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5665  1.1  mrg _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
   5666  1.1  mrg {
   5667  1.1  mrg   return (__m512h)
   5668  1.1  mrg     __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
   5669  1.1  mrg 				      (__v32hf) __B,
   5670  1.1  mrg 				      (__v32hf) __C,
   5671  1.1  mrg 				      (__mmask32) __U,
   5672  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5673  1.1  mrg }
   5674  1.1  mrg 
   5675  1.1  mrg #ifdef __OPTIMIZE__
   5676  1.1  mrg extern __inline __m512h
   5677  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5678  1.1  mrg _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
   5679  1.1  mrg {
   5680  1.1  mrg   return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
   5681  1.1  mrg 						       (__v32hf) __B,
   5682  1.1  mrg 						       (__v32hf) __C,
   5683  1.1  mrg 						       (__mmask32) -1, __R);
   5684  1.1  mrg }
   5685  1.1  mrg 
   5686  1.1  mrg extern __inline __m512h
   5687  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5688  1.1  mrg _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5689  1.1  mrg 			       __m512h __C, const int __R)
   5690  1.1  mrg {
   5691  1.1  mrg   return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
   5692  1.1  mrg 						       (__v32hf) __B,
   5693  1.1  mrg 						       (__v32hf) __C,
   5694  1.1  mrg 						       (__mmask32) __U, __R);
   5695  1.1  mrg }
   5696  1.1  mrg 
   5697  1.1  mrg extern __inline __m512h
   5698  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5699  1.1  mrg _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5700  1.1  mrg 				__mmask32 __U, const int __R)
   5701  1.1  mrg {
   5702  1.1  mrg   return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
   5703  1.1  mrg 							(__v32hf) __B,
   5704  1.1  mrg 							(__v32hf) __C,
   5705  1.1  mrg 							(__mmask32) __U, __R);
   5706  1.1  mrg }
   5707  1.1  mrg 
   5708  1.1  mrg extern __inline __m512h
   5709  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5710  1.1  mrg _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5711  1.1  mrg 				__m512h __C, const int __R)
   5712  1.1  mrg {
   5713  1.1  mrg   return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
   5714  1.1  mrg 							(__v32hf) __B,
   5715  1.1  mrg 							(__v32hf) __C,
   5716  1.1  mrg 							(__mmask32) __U, __R);
   5717  1.1  mrg }
   5718  1.1  mrg 
   5719  1.1  mrg #else
   5720  1.1  mrg #define _mm512_fmsub_round_ph(A, B, C, R)				\
   5721  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
   5722  1.1  mrg 
   5723  1.1  mrg #define _mm512_mask_fmsub_round_ph(A, U, B, C, R)			\
   5724  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
   5725  1.1  mrg 
   5726  1.1  mrg #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R)			\
   5727  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
   5728  1.1  mrg 
   5729  1.1  mrg #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R)			\
   5730  1.1  mrg   ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
   5731  1.1  mrg 
   5732  1.1  mrg #endif /* __OPTIMIZE__ */
   5733  1.1  mrg 
   5734  1.1  mrg /* Intrinsics vfnmsub[132,213,231]ph.  */
   5735  1.1  mrg extern __inline __m512h
   5736  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5737  1.1  mrg _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
   5738  1.1  mrg {
   5739  1.1  mrg   return (__m512h)
   5740  1.1  mrg     __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
   5741  1.1  mrg 				      (__v32hf) __B,
   5742  1.1  mrg 				      (__v32hf) __C,
   5743  1.1  mrg 				      (__mmask32) -1,
   5744  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5745  1.1  mrg }
   5746  1.1  mrg 
   5747  1.1  mrg extern __inline __m512h
   5748  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5749  1.1  mrg _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
   5750  1.1  mrg {
   5751  1.1  mrg   return (__m512h)
   5752  1.1  mrg     __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
   5753  1.1  mrg 				      (__v32hf) __B,
   5754  1.1  mrg 				      (__v32hf) __C,
   5755  1.1  mrg 				      (__mmask32) __U,
   5756  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   5757  1.1  mrg }
   5758  1.1  mrg 
   5759  1.1  mrg extern __inline __m512h
   5760  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5761  1.1  mrg _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
   5762  1.1  mrg {
   5763  1.1  mrg   return (__m512h)
   5764  1.1  mrg     __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
   5765  1.1  mrg 				       (__v32hf) __B,
   5766  1.1  mrg 				       (__v32hf) __C,
   5767  1.1  mrg 				       (__mmask32) __U,
   5768  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   5769  1.1  mrg }
   5770  1.1  mrg 
   5771  1.1  mrg extern __inline __m512h
   5772  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5773  1.1  mrg _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
   5774  1.1  mrg {
   5775  1.1  mrg   return (__m512h)
   5776  1.1  mrg     __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
   5777  1.1  mrg 				       (__v32hf) __B,
   5778  1.1  mrg 				       (__v32hf) __C,
   5779  1.1  mrg 				       (__mmask32) __U,
   5780  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   5781  1.1  mrg }
   5782  1.1  mrg 
   5783  1.1  mrg #ifdef __OPTIMIZE__
   5784  1.1  mrg extern __inline __m512h
   5785  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5786  1.1  mrg _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
   5787  1.1  mrg {
   5788  1.1  mrg   return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
   5789  1.1  mrg 						       (__v32hf) __B,
   5790  1.1  mrg 						       (__v32hf) __C,
   5791  1.1  mrg 						       (__mmask32) -1, __R);
   5792  1.1  mrg }
   5793  1.1  mrg 
   5794  1.1  mrg extern __inline __m512h
   5795  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5796  1.1  mrg _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
   5797  1.1  mrg 			       __m512h __C, const int __R)
   5798  1.1  mrg {
   5799  1.1  mrg   return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
   5800  1.1  mrg 						       (__v32hf) __B,
   5801  1.1  mrg 						       (__v32hf) __C,
   5802  1.1  mrg 						       (__mmask32) __U, __R);
   5803  1.1  mrg }
   5804  1.1  mrg 
   5805  1.1  mrg extern __inline __m512h
   5806  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5807  1.1  mrg _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
   5808  1.1  mrg 				__mmask32 __U, const int __R)
   5809  1.1  mrg {
   5810  1.1  mrg   return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
   5811  1.1  mrg 							(__v32hf) __B,
   5812  1.1  mrg 							(__v32hf) __C,
   5813  1.1  mrg 							(__mmask32) __U, __R);
   5814  1.1  mrg }
   5815  1.1  mrg 
   5816  1.1  mrg extern __inline __m512h
   5817  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5818  1.1  mrg _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
   5819  1.1  mrg 				__m512h __C, const int __R)
   5820  1.1  mrg {
   5821  1.1  mrg   return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
   5822  1.1  mrg 							(__v32hf) __B,
   5823  1.1  mrg 							(__v32hf) __C,
   5824  1.1  mrg 							(__mmask32) __U, __R);
   5825  1.1  mrg }
   5826  1.1  mrg 
   5827  1.1  mrg #else
   5828  1.1  mrg #define _mm512_fnmsub_round_ph(A, B, C, R)				\
   5829  1.1  mrg   ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
   5830  1.1  mrg 
   5831  1.1  mrg #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R)			\
   5832  1.1  mrg   ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
   5833  1.1  mrg 
   5834  1.1  mrg #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R)			\
   5835  1.1  mrg   ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
   5836  1.1  mrg 
   5837  1.1  mrg #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R)			\
   5838  1.1  mrg   ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
   5839  1.1  mrg 
   5840  1.1  mrg #endif /* __OPTIMIZE__ */
   5841  1.1  mrg 
   5842  1.1  mrg /* Intrinsics vfmadd[132,213,231]sh.  */
   5843  1.1  mrg extern __inline __m128h
   5844  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5845  1.1  mrg _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
   5846  1.1  mrg {
   5847  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   5848  1.1  mrg 						  (__v8hf) __A,
   5849  1.1  mrg 						  (__v8hf) __B,
   5850  1.1  mrg 						  (__mmask8) -1,
   5851  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   5852  1.1  mrg }
   5853  1.1  mrg 
   5854  1.1  mrg extern __inline __m128h
   5855  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5856  1.1  mrg _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
   5857  1.1  mrg {
   5858  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   5859  1.1  mrg 						  (__v8hf) __A,
   5860  1.1  mrg 						  (__v8hf) __B,
   5861  1.1  mrg 						  (__mmask8) __U,
   5862  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   5863  1.1  mrg }
   5864  1.1  mrg 
   5865  1.1  mrg extern __inline __m128h
   5866  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5867  1.1  mrg _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
   5868  1.1  mrg {
   5869  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
   5870  1.1  mrg 						   (__v8hf) __A,
   5871  1.1  mrg 						   (__v8hf) __B,
   5872  1.1  mrg 						   (__mmask8) __U,
   5873  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   5874  1.1  mrg }
   5875  1.1  mrg 
   5876  1.1  mrg extern __inline __m128h
   5877  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5878  1.1  mrg _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
   5879  1.1  mrg {
   5880  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   5881  1.1  mrg 						   (__v8hf) __A,
   5882  1.1  mrg 						   (__v8hf) __B,
   5883  1.1  mrg 						   (__mmask8) __U,
   5884  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   5885  1.1  mrg }
   5886  1.1  mrg 
   5887  1.1  mrg 
   5888  1.1  mrg #ifdef __OPTIMIZE__
   5889  1.1  mrg extern __inline __m128h
   5890  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5891  1.1  mrg _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
   5892  1.1  mrg {
   5893  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   5894  1.1  mrg 						  (__v8hf) __A,
   5895  1.1  mrg 						  (__v8hf) __B,
   5896  1.1  mrg 						  (__mmask8) -1,
   5897  1.1  mrg 						  __R);
   5898  1.1  mrg }
   5899  1.1  mrg 
   5900  1.1  mrg extern __inline __m128h
   5901  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5902  1.1  mrg _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
   5903  1.1  mrg 			 const int __R)
   5904  1.1  mrg {
   5905  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   5906  1.1  mrg 						  (__v8hf) __A,
   5907  1.1  mrg 						  (__v8hf) __B,
   5908  1.1  mrg 						  (__mmask8) __U, __R);
   5909  1.1  mrg }
   5910  1.1  mrg 
   5911  1.1  mrg extern __inline __m128h
   5912  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5913  1.1  mrg _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
   5914  1.1  mrg 			  const int __R)
   5915  1.1  mrg {
   5916  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
   5917  1.1  mrg 						   (__v8hf) __A,
   5918  1.1  mrg 						   (__v8hf) __B,
   5919  1.1  mrg 						   (__mmask8) __U, __R);
   5920  1.1  mrg }
   5921  1.1  mrg 
   5922  1.1  mrg extern __inline __m128h
   5923  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5924  1.1  mrg _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
   5925  1.1  mrg 			  __m128h __B, const int __R)
   5926  1.1  mrg {
   5927  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   5928  1.1  mrg 						   (__v8hf) __A,
   5929  1.1  mrg 						   (__v8hf) __B,
   5930  1.1  mrg 						   (__mmask8) __U, __R);
   5931  1.1  mrg }
   5932  1.1  mrg 
   5933  1.1  mrg #else
   5934  1.1  mrg #define _mm_fmadd_round_sh(A, B, C, R)					\
   5935  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
   5936  1.1  mrg #define _mm_mask_fmadd_round_sh(A, U, B, C, R)				\
   5937  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
   5938  1.1  mrg #define _mm_mask3_fmadd_round_sh(A, B, C, U, R)				\
   5939  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
   5940  1.1  mrg #define _mm_maskz_fmadd_round_sh(U, A, B, C, R)				\
   5941  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
   5942  1.1  mrg 
   5943  1.1  mrg #endif /* __OPTIMIZE__ */
   5944  1.1  mrg 
   5945  1.1  mrg /* Intrinsics vfnmadd[132,213,231]sh.  */
   5946  1.1  mrg extern __inline __m128h
   5947  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5948  1.1  mrg _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
   5949  1.1  mrg {
   5950  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
   5951  1.1  mrg 						   (__v8hf) __A,
   5952  1.1  mrg 						   (__v8hf) __B,
   5953  1.1  mrg 						   (__mmask8) -1,
   5954  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   5955  1.1  mrg }
   5956  1.1  mrg 
   5957  1.1  mrg extern __inline __m128h
   5958  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5959  1.1  mrg _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
   5960  1.1  mrg {
   5961  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
   5962  1.1  mrg 						  (__v8hf) __A,
   5963  1.1  mrg 						  (__v8hf) __B,
   5964  1.1  mrg 						  (__mmask8) __U,
   5965  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   5966  1.1  mrg }
   5967  1.1  mrg 
   5968  1.1  mrg extern __inline __m128h
   5969  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5970  1.1  mrg _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
   5971  1.1  mrg {
   5972  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
   5973  1.1  mrg 						   (__v8hf) __A,
   5974  1.1  mrg 						   (__v8hf) __B,
   5975  1.1  mrg 						   (__mmask8) __U,
   5976  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   5977  1.1  mrg }
   5978  1.1  mrg 
   5979  1.1  mrg extern __inline __m128h
   5980  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5981  1.1  mrg _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
   5982  1.1  mrg {
   5983  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
   5984  1.1  mrg 						   (__v8hf) __A,
   5985  1.1  mrg 						   (__v8hf) __B,
   5986  1.1  mrg 						   (__mmask8) __U,
   5987  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   5988  1.1  mrg }
   5989  1.1  mrg 
   5990  1.1  mrg 
   5991  1.1  mrg #ifdef __OPTIMIZE__
   5992  1.1  mrg extern __inline __m128h
   5993  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   5994  1.1  mrg _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
   5995  1.1  mrg {
   5996  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
   5997  1.1  mrg 						   (__v8hf) __A,
   5998  1.1  mrg 						   (__v8hf) __B,
   5999  1.1  mrg 						   (__mmask8) -1,
   6000  1.1  mrg 						   __R);
   6001  1.1  mrg }
   6002  1.1  mrg 
   6003  1.1  mrg extern __inline __m128h
   6004  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6005  1.1  mrg _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
   6006  1.1  mrg 			 const int __R)
   6007  1.1  mrg {
   6008  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
   6009  1.1  mrg 						  (__v8hf) __A,
   6010  1.1  mrg 						  (__v8hf) __B,
   6011  1.1  mrg 						  (__mmask8) __U, __R);
   6012  1.1  mrg }
   6013  1.1  mrg 
   6014  1.1  mrg extern __inline __m128h
   6015  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6016  1.1  mrg _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
   6017  1.1  mrg 			  const int __R)
   6018  1.1  mrg {
   6019  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
   6020  1.1  mrg 						   (__v8hf) __A,
   6021  1.1  mrg 						   (__v8hf) __B,
   6022  1.1  mrg 						   (__mmask8) __U, __R);
   6023  1.1  mrg }
   6024  1.1  mrg 
   6025  1.1  mrg extern __inline __m128h
   6026  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6027  1.1  mrg _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
   6028  1.1  mrg 			  __m128h __B, const int __R)
   6029  1.1  mrg {
   6030  1.1  mrg   return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
   6031  1.1  mrg 						   (__v8hf) __A,
   6032  1.1  mrg 						   (__v8hf) __B,
   6033  1.1  mrg 						   (__mmask8) __U, __R);
   6034  1.1  mrg }
   6035  1.1  mrg 
   6036  1.1  mrg #else
   6037  1.1  mrg #define _mm_fnmadd_round_sh(A, B, C, R)					\
   6038  1.1  mrg   ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
   6039  1.1  mrg #define _mm_mask_fnmadd_round_sh(A, U, B, C, R)				\
   6040  1.1  mrg   ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
   6041  1.1  mrg #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R)			\
   6042  1.1  mrg   ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
   6043  1.1  mrg #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R)			\
   6044  1.1  mrg   ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
   6045  1.1  mrg 
   6046  1.1  mrg #endif /* __OPTIMIZE__ */
   6047  1.1  mrg 
   6048  1.1  mrg /* Intrinsics vfmsub[132,213,231]sh.  */
   6049  1.1  mrg extern __inline __m128h
   6050  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6051  1.1  mrg _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
   6052  1.1  mrg {
   6053  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6054  1.1  mrg 						  (__v8hf) __A,
   6055  1.1  mrg 						  -(__v8hf) __B,
   6056  1.1  mrg 						  (__mmask8) -1,
   6057  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   6058  1.1  mrg }
   6059  1.1  mrg 
   6060  1.1  mrg extern __inline __m128h
   6061  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6062  1.1  mrg _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
   6063  1.1  mrg {
   6064  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6065  1.1  mrg 						  (__v8hf) __A,
   6066  1.1  mrg 						  -(__v8hf) __B,
   6067  1.1  mrg 						  (__mmask8) __U,
   6068  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   6069  1.1  mrg }
   6070  1.1  mrg 
   6071  1.1  mrg extern __inline __m128h
   6072  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6073  1.1  mrg _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
   6074  1.1  mrg {
   6075  1.1  mrg   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
   6076  1.1  mrg 						   (__v8hf) __A,
   6077  1.1  mrg 						   (__v8hf) __B,
   6078  1.1  mrg 						   (__mmask8) __U,
   6079  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   6080  1.1  mrg }
   6081  1.1  mrg 
   6082  1.1  mrg extern __inline __m128h
   6083  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6084  1.1  mrg _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
   6085  1.1  mrg {
   6086  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   6087  1.1  mrg 						   (__v8hf) __A,
   6088  1.1  mrg 						   -(__v8hf) __B,
   6089  1.1  mrg 						   (__mmask8) __U,
   6090  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   6091  1.1  mrg }
   6092  1.1  mrg 
   6093  1.1  mrg 
   6094  1.1  mrg #ifdef __OPTIMIZE__
   6095  1.1  mrg extern __inline __m128h
   6096  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6097  1.1  mrg _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
   6098  1.1  mrg {
   6099  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6100  1.1  mrg 						  (__v8hf) __A,
   6101  1.1  mrg 						  -(__v8hf) __B,
   6102  1.1  mrg 						  (__mmask8) -1,
   6103  1.1  mrg 						  __R);
   6104  1.1  mrg }
   6105  1.1  mrg 
   6106  1.1  mrg extern __inline __m128h
   6107  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6108  1.1  mrg _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
   6109  1.1  mrg 			 const int __R)
   6110  1.1  mrg {
   6111  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6112  1.1  mrg 						  (__v8hf) __A,
   6113  1.1  mrg 						  -(__v8hf) __B,
   6114  1.1  mrg 						  (__mmask8) __U, __R);
   6115  1.1  mrg }
   6116  1.1  mrg 
   6117  1.1  mrg extern __inline __m128h
   6118  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6119  1.1  mrg _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
   6120  1.1  mrg 			  const int __R)
   6121  1.1  mrg {
   6122  1.1  mrg   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
   6123  1.1  mrg 						   (__v8hf) __A,
   6124  1.1  mrg 						   (__v8hf) __B,
   6125  1.1  mrg 						   (__mmask8) __U, __R);
   6126  1.1  mrg }
   6127  1.1  mrg 
   6128  1.1  mrg extern __inline __m128h
   6129  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6130  1.1  mrg _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
   6131  1.1  mrg 			  __m128h __B, const int __R)
   6132  1.1  mrg {
   6133  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   6134  1.1  mrg 						   (__v8hf) __A,
   6135  1.1  mrg 						   -(__v8hf) __B,
   6136  1.1  mrg 						   (__mmask8) __U, __R);
   6137  1.1  mrg }
   6138  1.1  mrg 
   6139  1.1  mrg #else
   6140  1.1  mrg #define _mm_fmsub_round_sh(A, B, C, R)					\
   6141  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
   6142  1.1  mrg #define _mm_mask_fmsub_round_sh(A, U, B, C, R)				\
   6143  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
   6144  1.1  mrg #define _mm_mask3_fmsub_round_sh(A, B, C, U, R)				\
   6145  1.1  mrg   ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
   6146  1.1  mrg #define _mm_maskz_fmsub_round_sh(U, A, B, C, R)				\
   6147  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
   6148  1.1  mrg 
   6149  1.1  mrg #endif /* __OPTIMIZE__ */
   6150  1.1  mrg 
   6151  1.1  mrg /* Intrinsics vfnmsub[132,213,231]sh.  */
   6152  1.1  mrg extern __inline __m128h
   6153  1.1  mrg   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6154  1.1  mrg _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
   6155  1.1  mrg {
   6156  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6157  1.1  mrg 						  -(__v8hf) __A,
   6158  1.1  mrg 						  -(__v8hf) __B,
   6159  1.1  mrg 						  (__mmask8) -1,
   6160  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   6161  1.1  mrg }
   6162  1.1  mrg 
   6163  1.1  mrg extern __inline __m128h
   6164  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6165  1.1  mrg _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
   6166  1.1  mrg {
   6167  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6168  1.1  mrg 						  -(__v8hf) __A,
   6169  1.1  mrg 						  -(__v8hf) __B,
   6170  1.1  mrg 						  (__mmask8) __U,
   6171  1.1  mrg 						  _MM_FROUND_CUR_DIRECTION);
   6172  1.1  mrg }
   6173  1.1  mrg 
   6174  1.1  mrg extern __inline __m128h
   6175  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6176  1.1  mrg _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
   6177  1.1  mrg {
   6178  1.1  mrg   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
   6179  1.1  mrg 						   -(__v8hf) __A,
   6180  1.1  mrg 						   (__v8hf) __B,
   6181  1.1  mrg 						   (__mmask8) __U,
   6182  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   6183  1.1  mrg }
   6184  1.1  mrg 
   6185  1.1  mrg extern __inline __m128h
   6186  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6187  1.1  mrg _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
   6188  1.1  mrg {
   6189  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   6190  1.1  mrg 						   -(__v8hf) __A,
   6191  1.1  mrg 						   -(__v8hf) __B,
   6192  1.1  mrg 						   (__mmask8) __U,
   6193  1.1  mrg 						   _MM_FROUND_CUR_DIRECTION);
   6194  1.1  mrg }
   6195  1.1  mrg 
   6196  1.1  mrg 
   6197  1.1  mrg #ifdef __OPTIMIZE__
   6198  1.1  mrg extern __inline __m128h
   6199  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6200  1.1  mrg _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
   6201  1.1  mrg {
   6202  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6203  1.1  mrg 						  -(__v8hf) __A,
   6204  1.1  mrg 						  -(__v8hf) __B,
   6205  1.1  mrg 						  (__mmask8) -1,
   6206  1.1  mrg 						  __R);
   6207  1.1  mrg }
   6208  1.1  mrg 
   6209  1.1  mrg extern __inline __m128h
   6210  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6211  1.1  mrg _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
   6212  1.1  mrg 			 const int __R)
   6213  1.1  mrg {
   6214  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
   6215  1.1  mrg 						  -(__v8hf) __A,
   6216  1.1  mrg 						  -(__v8hf) __B,
   6217  1.1  mrg 						  (__mmask8) __U, __R);
   6218  1.1  mrg }
   6219  1.1  mrg 
   6220  1.1  mrg extern __inline __m128h
   6221  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6222  1.1  mrg _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
   6223  1.1  mrg 			  const int __R)
   6224  1.1  mrg {
   6225  1.1  mrg   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
   6226  1.1  mrg 						   -(__v8hf) __A,
   6227  1.1  mrg 						   (__v8hf) __B,
   6228  1.1  mrg 						   (__mmask8) __U, __R);
   6229  1.1  mrg }
   6230  1.1  mrg 
   6231  1.1  mrg extern __inline __m128h
   6232  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6233  1.1  mrg _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
   6234  1.1  mrg 			  __m128h __B, const int __R)
   6235  1.1  mrg {
   6236  1.1  mrg   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
   6237  1.1  mrg 						   -(__v8hf) __A,
   6238  1.1  mrg 						   -(__v8hf) __B,
   6239  1.1  mrg 						   (__mmask8) __U, __R);
   6240  1.1  mrg }
   6241  1.1  mrg 
   6242  1.1  mrg #else
   6243  1.1  mrg #define _mm_fnmsub_round_sh(A, B, C, R)					\
   6244  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
   6245  1.1  mrg #define _mm_mask_fnmsub_round_sh(A, U, B, C, R)				\
   6246  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
   6247  1.1  mrg #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R)			\
   6248  1.1  mrg   ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
   6249  1.1  mrg #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R)			\
   6250  1.1  mrg   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
   6251  1.1  mrg 
   6252  1.1  mrg #endif /* __OPTIMIZE__ */
   6253  1.1  mrg 
   6254  1.1  mrg /* Intrinsics vf[,c]maddcph.  */
   6255  1.1  mrg extern __inline __m512h
   6256  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6257  1.1  mrg _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
   6258  1.1  mrg {
   6259  1.1  mrg   return (__m512h)
   6260  1.1  mrg     __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
   6261  1.1  mrg 					(__v32hf) __B,
   6262  1.1  mrg 					(__v32hf) __C,
   6263  1.1  mrg 					_MM_FROUND_CUR_DIRECTION);
   6264  1.1  mrg }
   6265  1.1  mrg 
   6266  1.1  mrg extern __inline __m512h
   6267  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6268  1.1  mrg _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
   6269  1.1  mrg {
   6270  1.1  mrg   return (__m512h)
   6271  1.1  mrg     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
   6272  1.1  mrg 					     (__v32hf) __C,
   6273  1.1  mrg 					     (__v32hf) __D, __B,
   6274  1.1  mrg 					     _MM_FROUND_CUR_DIRECTION);
   6275  1.1  mrg }
   6276  1.1  mrg 
   6277  1.1  mrg extern __inline __m512h
   6278  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6279  1.1  mrg _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
   6280  1.1  mrg {
   6281  1.1  mrg   return (__m512h)
   6282  1.1  mrg     __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
   6283  1.1  mrg 					      (__v32hf) __B,
   6284  1.1  mrg 					      (__v32hf) __C,
   6285  1.1  mrg 					      __D, _MM_FROUND_CUR_DIRECTION);
   6286  1.1  mrg }
   6287  1.1  mrg 
   6288  1.1  mrg extern __inline __m512h
   6289  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6290  1.1  mrg _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
   6291  1.1  mrg {
   6292  1.1  mrg   return (__m512h)
   6293  1.1  mrg     __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
   6294  1.1  mrg 					      (__v32hf) __C,
   6295  1.1  mrg 					      (__v32hf) __D,
   6296  1.1  mrg 					      __A, _MM_FROUND_CUR_DIRECTION);
   6297  1.1  mrg }
   6298  1.1  mrg 
   6299  1.1  mrg extern __inline __m512h
   6300  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6301  1.1  mrg _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
   6302  1.1  mrg {
   6303  1.1  mrg   return (__m512h)
   6304  1.1  mrg     __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
   6305  1.1  mrg 				       (__v32hf) __B,
   6306  1.1  mrg 				       (__v32hf) __C,
   6307  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   6308  1.1  mrg }
   6309  1.1  mrg 
   6310  1.1  mrg extern __inline __m512h
   6311  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6312  1.1  mrg _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
   6313  1.1  mrg {
   6314  1.1  mrg   return (__m512h)
   6315  1.1  mrg     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
   6316  1.1  mrg 					    (__v32hf) __C,
   6317  1.1  mrg 					    (__v32hf) __D, __B,
   6318  1.1  mrg 					    _MM_FROUND_CUR_DIRECTION);
   6319  1.1  mrg }
   6320  1.1  mrg 
   6321  1.1  mrg extern __inline __m512h
   6322  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6323  1.1  mrg _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
   6324  1.1  mrg {
   6325  1.1  mrg   return (__m512h)
   6326  1.1  mrg     __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
   6327  1.1  mrg 					     (__v32hf) __B,
   6328  1.1  mrg 					     (__v32hf) __C,
   6329  1.1  mrg 					     __D, _MM_FROUND_CUR_DIRECTION);
   6330  1.1  mrg }
   6331  1.1  mrg 
   6332  1.1  mrg extern __inline __m512h
   6333  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6334  1.1  mrg _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
   6335  1.1  mrg {
   6336  1.1  mrg   return (__m512h)
   6337  1.1  mrg     __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
   6338  1.1  mrg 					     (__v32hf) __C,
   6339  1.1  mrg 					     (__v32hf) __D,
   6340  1.1  mrg 					     __A, _MM_FROUND_CUR_DIRECTION);
   6341  1.1  mrg }
   6342  1.1  mrg 
   6343  1.1  mrg #ifdef __OPTIMIZE__
   6344  1.1  mrg extern __inline __m512h
   6345  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6346  1.1  mrg _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
   6347  1.1  mrg {
   6348  1.1  mrg   return (__m512h)
   6349  1.1  mrg     __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
   6350  1.1  mrg 					(__v32hf) __B,
   6351  1.1  mrg 					(__v32hf) __C,
   6352  1.1  mrg 					__D);
   6353  1.1  mrg }
   6354  1.1  mrg 
   6355  1.1  mrg extern __inline __m512h
   6356  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6357  1.1  mrg _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
   6358  1.1  mrg 			      __m512h __D, const int __E)
   6359  1.1  mrg {
   6360  1.1  mrg   return (__m512h)
   6361  1.1  mrg     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
   6362  1.1  mrg 					     (__v32hf) __C,
   6363  1.1  mrg 					     (__v32hf) __D, __B,
   6364  1.1  mrg 					     __E);
   6365  1.1  mrg }
   6366  1.1  mrg 
   6367  1.1  mrg extern __inline __m512h
   6368  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6369  1.1  mrg _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
   6370  1.1  mrg 			       __mmask16 __D, const int __E)
   6371  1.1  mrg {
   6372  1.1  mrg   return (__m512h)
   6373  1.1  mrg     __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
   6374  1.1  mrg 					      (__v32hf) __B,
   6375  1.1  mrg 					      (__v32hf) __C,
   6376  1.1  mrg 					      __D, __E);
   6377  1.1  mrg }
   6378  1.1  mrg 
   6379  1.1  mrg extern __inline __m512h
   6380  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6381  1.1  mrg _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
   6382  1.1  mrg 			       __m512h __D, const int __E)
   6383  1.1  mrg {
   6384  1.1  mrg   return (__m512h)
   6385  1.1  mrg     __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
   6386  1.1  mrg 					      (__v32hf) __C,
   6387  1.1  mrg 					      (__v32hf) __D,
   6388  1.1  mrg 					      __A, __E);
   6389  1.1  mrg }
   6390  1.1  mrg 
   6391  1.1  mrg extern __inline __m512h
   6392  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6393  1.1  mrg _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
   6394  1.1  mrg {
   6395  1.1  mrg   return (__m512h)
   6396  1.1  mrg     __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
   6397  1.1  mrg 				       (__v32hf) __B,
   6398  1.1  mrg 				       (__v32hf) __C,
   6399  1.1  mrg 				       __D);
   6400  1.1  mrg }
   6401  1.1  mrg 
   6402  1.1  mrg extern __inline __m512h
   6403  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6404  1.1  mrg _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
   6405  1.1  mrg 			     __m512h __D, const int __E)
   6406  1.1  mrg {
   6407  1.1  mrg   return (__m512h)
   6408  1.1  mrg     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
   6409  1.1  mrg 					    (__v32hf) __C,
   6410  1.1  mrg 					    (__v32hf) __D, __B,
   6411  1.1  mrg 					    __E);
   6412  1.1  mrg }
   6413  1.1  mrg 
   6414  1.1  mrg extern __inline __m512h
   6415  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6416  1.1  mrg _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
   6417  1.1  mrg 			      __mmask16 __D, const int __E)
   6418  1.1  mrg {
   6419  1.1  mrg   return (__m512h)
   6420  1.1  mrg     __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
   6421  1.1  mrg 					     (__v32hf) __B,
   6422  1.1  mrg 					     (__v32hf) __C,
   6423  1.1  mrg 					     __D, __E);
   6424  1.1  mrg }
   6425  1.1  mrg 
   6426  1.1  mrg extern __inline __m512h
   6427  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6428  1.1  mrg _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
   6429  1.1  mrg 			      __m512h __D, const int __E)
   6430  1.1  mrg {
   6431  1.1  mrg   return (__m512h)
   6432  1.1  mrg     __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
   6433  1.1  mrg 					     (__v32hf) __C,
   6434  1.1  mrg 					     (__v32hf) __D,
   6435  1.1  mrg 					     __A, __E);
   6436  1.1  mrg }
   6437  1.1  mrg 
   6438  1.1  mrg #else
   6439  1.1  mrg #define _mm512_fcmadd_round_pch(A, B, C, D)			\
   6440  1.1  mrg   (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
   6441  1.1  mrg 
   6442  1.1  mrg #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E)			\
   6443  1.1  mrg   ((__m512h) 								\
   6444  1.1  mrg     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A),		\
   6445  1.1  mrg 					     (__v32hf) (C),		\
   6446  1.1  mrg 					     (__v32hf) (D),		\
   6447  1.1  mrg 					     (B), (E)))
   6448  1.1  mrg 
   6449  1.1  mrg 
   6450  1.1  mrg #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E)			\
   6451  1.1  mrg   ((__m512h)								\
   6452  1.1  mrg    __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
   6453  1.1  mrg 
   6454  1.1  mrg #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E)			\
   6455  1.1  mrg   (__m512h)								\
   6456  1.1  mrg    __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
   6457  1.1  mrg 
   6458  1.1  mrg #define _mm512_fmadd_round_pch(A, B, C, D)			\
   6459  1.1  mrg   (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
   6460  1.1  mrg 
   6461  1.1  mrg #define _mm512_mask_fmadd_round_pch(A, B, C, D, E)			\
   6462  1.1  mrg   ((__m512h)								\
   6463  1.1  mrg     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A),		\
   6464  1.1  mrg 					    (__v32hf) (C),		\
   6465  1.1  mrg 					    (__v32hf) (D),		\
   6466  1.1  mrg 					    (B), (E)))
   6467  1.1  mrg 
   6468  1.1  mrg #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E)			\
   6469  1.1  mrg   (__m512h)								\
   6470  1.1  mrg    __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
   6471  1.1  mrg 
   6472  1.1  mrg #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E)			\
   6473  1.1  mrg   (__m512h)								\
   6474  1.1  mrg    __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
   6475  1.1  mrg 
   6476  1.1  mrg #endif /* __OPTIMIZE__ */
   6477  1.1  mrg 
   6478  1.1  mrg /* Intrinsics vf[,c]mulcph.  */
   6479  1.1  mrg extern __inline __m512h
   6480  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6481  1.1  mrg _mm512_fcmul_pch (__m512h __A, __m512h __B)
   6482  1.1  mrg {
   6483  1.1  mrg   return (__m512h)
   6484  1.1  mrg     __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
   6485  1.1  mrg 				       (__v32hf) __B,
   6486  1.1  mrg 				       _MM_FROUND_CUR_DIRECTION);
   6487  1.1  mrg }
   6488  1.1  mrg 
   6489  1.1  mrg extern __inline __m512h
   6490  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6491  1.1  mrg _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
   6492  1.1  mrg {
   6493  1.1  mrg   return (__m512h)
   6494  1.1  mrg     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
   6495  1.1  mrg 					    (__v32hf) __D,
   6496  1.1  mrg 					    (__v32hf) __A,
   6497  1.1  mrg 					    __B, _MM_FROUND_CUR_DIRECTION);
   6498  1.1  mrg }
   6499  1.1  mrg 
   6500  1.1  mrg extern __inline __m512h
   6501  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6502  1.1  mrg _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
   6503  1.1  mrg {
   6504  1.1  mrg   return (__m512h)
   6505  1.1  mrg     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
   6506  1.1  mrg 					    (__v32hf) __C,
   6507  1.1  mrg 					    _mm512_setzero_ph (),
   6508  1.1  mrg 					    __A, _MM_FROUND_CUR_DIRECTION);
   6509  1.1  mrg }
   6510  1.1  mrg 
   6511  1.1  mrg extern __inline __m512h
   6512  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6513  1.1  mrg _mm512_fmul_pch (__m512h __A, __m512h __B)
   6514  1.1  mrg {
   6515  1.1  mrg   return (__m512h)
   6516  1.1  mrg     __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
   6517  1.1  mrg 				      (__v32hf) __B,
   6518  1.1  mrg 				      _MM_FROUND_CUR_DIRECTION);
   6519  1.1  mrg }
   6520  1.1  mrg 
   6521  1.1  mrg extern __inline __m512h
   6522  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6523  1.1  mrg _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
   6524  1.1  mrg {
   6525  1.1  mrg   return (__m512h)
   6526  1.1  mrg     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
   6527  1.1  mrg 					   (__v32hf) __D,
   6528  1.1  mrg 					   (__v32hf) __A,
   6529  1.1  mrg 					   __B, _MM_FROUND_CUR_DIRECTION);
   6530  1.1  mrg }
   6531  1.1  mrg 
   6532  1.1  mrg extern __inline __m512h
   6533  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6534  1.1  mrg _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
   6535  1.1  mrg {
   6536  1.1  mrg   return (__m512h)
   6537  1.1  mrg     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
   6538  1.1  mrg 					   (__v32hf) __C,
   6539  1.1  mrg 					   _mm512_setzero_ph (),
   6540  1.1  mrg 					   __A, _MM_FROUND_CUR_DIRECTION);
   6541  1.1  mrg }
   6542  1.1  mrg 
   6543  1.1  mrg #ifdef __OPTIMIZE__
   6544  1.1  mrg extern __inline __m512h
   6545  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6546  1.1  mrg _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
   6547  1.1  mrg {
   6548  1.1  mrg   return (__m512h)
   6549  1.1  mrg     __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
   6550  1.1  mrg 				       (__v32hf) __B, __D);
   6551  1.1  mrg }
   6552  1.1  mrg 
   6553  1.1  mrg extern __inline __m512h
   6554  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6555  1.1  mrg _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
   6556  1.1  mrg 			     __m512h __D, const int __E)
   6557  1.1  mrg {
   6558  1.1  mrg   return (__m512h)
   6559  1.1  mrg     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
   6560  1.1  mrg 					    (__v32hf) __D,
   6561  1.1  mrg 					    (__v32hf) __A,
   6562  1.1  mrg 					    __B, __E);
   6563  1.1  mrg }
   6564  1.1  mrg 
   6565  1.1  mrg extern __inline __m512h
   6566  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6567  1.1  mrg _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
   6568  1.1  mrg 			      __m512h __C, const int __E)
   6569  1.1  mrg {
   6570  1.1  mrg   return (__m512h)
   6571  1.1  mrg     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
   6572  1.1  mrg 					    (__v32hf) __C,
   6573  1.1  mrg 					    _mm512_setzero_ph (),
   6574  1.1  mrg 					    __A, __E);
   6575  1.1  mrg }
   6576  1.1  mrg 
   6577  1.1  mrg extern __inline __m512h
   6578  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6579  1.1  mrg _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
   6580  1.1  mrg {
   6581  1.1  mrg   return (__m512h)
   6582  1.1  mrg     __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
   6583  1.1  mrg 				      (__v32hf) __B,
   6584  1.1  mrg 				      __D);
   6585  1.1  mrg }
   6586  1.1  mrg 
   6587  1.1  mrg extern __inline __m512h
   6588  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6589  1.1  mrg _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
   6590  1.1  mrg 			    __m512h __D, const int __E)
   6591  1.1  mrg {
   6592  1.1  mrg   return (__m512h)
   6593  1.1  mrg     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
   6594  1.1  mrg 					   (__v32hf) __D,
   6595  1.1  mrg 					   (__v32hf) __A,
   6596  1.1  mrg 					   __B, __E);
   6597  1.1  mrg }
   6598  1.1  mrg 
   6599  1.1  mrg extern __inline __m512h
   6600  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6601  1.1  mrg _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
   6602  1.1  mrg 			     __m512h __C, const int __E)
   6603  1.1  mrg {
   6604  1.1  mrg   return (__m512h)
   6605  1.1  mrg     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
   6606  1.1  mrg 					   (__v32hf) __C,
   6607  1.1  mrg 					   _mm512_setzero_ph (),
   6608  1.1  mrg 					   __A, __E);
   6609  1.1  mrg }
   6610  1.1  mrg 
   6611  1.1  mrg #else
   6612  1.1  mrg #define _mm512_fcmul_round_pch(A, B, D)				\
   6613  1.1  mrg   (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
   6614  1.1  mrg 
   6615  1.1  mrg #define _mm512_mask_fcmul_round_pch(A, B, C, D, E)			\
   6616  1.1  mrg   (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
   6617  1.1  mrg 
   6618  1.1  mrg #define _mm512_maskz_fcmul_round_pch(A, B, C, E)			\
   6619  1.1  mrg   (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C),		\
   6620  1.1  mrg 						    (__v32hf)		\
   6621  1.1  mrg 						    _mm512_setzero_ph (), \
   6622  1.1  mrg 						    (A), (E))
   6623  1.1  mrg 
   6624  1.1  mrg #define _mm512_fmul_round_pch(A, B, D)			\
   6625  1.1  mrg   (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
   6626  1.1  mrg 
   6627  1.1  mrg #define _mm512_mask_fmul_round_pch(A, B, C, D, E)			  \
   6628  1.1  mrg   (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
   6629  1.1  mrg 
   6630  1.1  mrg #define _mm512_maskz_fmul_round_pch(A, B, C, E)				  \
   6631  1.1  mrg   (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C),		  \
   6632  1.1  mrg 						   (__v32hf)		  \
   6633  1.1  mrg 						   _mm512_setzero_ph (),  \
   6634  1.1  mrg 						   (A), (E))
   6635  1.1  mrg 
   6636  1.1  mrg #endif /* __OPTIMIZE__ */
   6637  1.1  mrg 
   6638  1.1  mrg /* Intrinsics vf[,c]maddcsh.  */
   6639  1.1  mrg extern __inline __m128h
   6640  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6641  1.1  mrg _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   6642  1.1  mrg {
   6643  1.1  mrg   return (__m128h)
   6644  1.1  mrg     __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
   6645  1.1  mrg 					  (__v8hf) __C,
   6646  1.1  mrg 					  (__v8hf) __D, __B,
   6647  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   6648  1.1  mrg }
   6649  1.1  mrg 
   6650  1.1  mrg extern __inline __m128h
   6651  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6652  1.1  mrg _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
   6653  1.1  mrg {
   6654  1.1  mrg   return (__m128h)
   6655  1.1  mrg     __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
   6656  1.1  mrg 					   (__v8hf) __B,
   6657  1.1  mrg 					   (__v8hf) __C, __D,
   6658  1.1  mrg 					   _MM_FROUND_CUR_DIRECTION);
   6659  1.1  mrg }
   6660  1.1  mrg 
   6661  1.1  mrg extern __inline __m128h
   6662  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6663  1.1  mrg _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
   6664  1.1  mrg {
   6665  1.1  mrg   return (__m128h)
   6666  1.1  mrg     __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
   6667  1.1  mrg 					   (__v8hf) __C,
   6668  1.1  mrg 					   (__v8hf) __D,
   6669  1.1  mrg 					   __A, _MM_FROUND_CUR_DIRECTION);
   6670  1.1  mrg }
   6671  1.1  mrg 
   6672  1.1  mrg extern __inline __m128h
   6673  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6674  1.1  mrg _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
   6675  1.1  mrg {
   6676  1.1  mrg   return (__m128h)
   6677  1.1  mrg     __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
   6678  1.1  mrg 				     (__v8hf) __B,
   6679  1.1  mrg 				     (__v8hf) __C,
   6680  1.1  mrg 				     _MM_FROUND_CUR_DIRECTION);
   6681  1.1  mrg }
   6682  1.1  mrg 
   6683  1.1  mrg extern __inline __m128h
   6684  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6685  1.1  mrg _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   6686  1.1  mrg {
   6687  1.1  mrg   return (__m128h)
   6688  1.1  mrg     __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
   6689  1.1  mrg 					 (__v8hf) __C,
   6690  1.1  mrg 					 (__v8hf) __D, __B,
   6691  1.1  mrg 					 _MM_FROUND_CUR_DIRECTION);
   6692  1.1  mrg }
   6693  1.1  mrg 
   6694  1.1  mrg extern __inline __m128h
   6695  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6696  1.1  mrg _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
   6697  1.1  mrg {
   6698  1.1  mrg   return (__m128h)
   6699  1.1  mrg     __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
   6700  1.1  mrg 					  (__v8hf) __B,
   6701  1.1  mrg 					  (__v8hf) __C, __D,
   6702  1.1  mrg 					  _MM_FROUND_CUR_DIRECTION);
   6703  1.1  mrg }
   6704  1.1  mrg 
   6705  1.1  mrg extern __inline __m128h
   6706  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6707  1.1  mrg _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
   6708  1.1  mrg {
   6709  1.1  mrg   return (__m128h)
   6710  1.1  mrg     __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
   6711  1.1  mrg 					  (__v8hf) __C,
   6712  1.1  mrg 					  (__v8hf) __D,
   6713  1.1  mrg 					  __A, _MM_FROUND_CUR_DIRECTION);
   6714  1.1  mrg }
   6715  1.1  mrg 
   6716  1.1  mrg extern __inline __m128h
   6717  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6718  1.1  mrg _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
   6719  1.1  mrg {
   6720  1.1  mrg   return (__m128h)
   6721  1.1  mrg     __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
   6722  1.1  mrg 				    (__v8hf) __B,
   6723  1.1  mrg 				    (__v8hf) __C,
   6724  1.1  mrg 				    _MM_FROUND_CUR_DIRECTION);
   6725  1.1  mrg }
   6726  1.1  mrg 
   6727  1.1  mrg #ifdef __OPTIMIZE__
   6728  1.1  mrg extern __inline __m128h
   6729  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6730  1.1  mrg _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
   6731  1.1  mrg 			   __m128h __D, const int __E)
   6732  1.1  mrg {
   6733  1.1  mrg   return (__m128h)
   6734  1.1  mrg     __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
   6735  1.1  mrg 					  (__v8hf) __C,
   6736  1.1  mrg 					  (__v8hf) __D,
   6737  1.1  mrg 					  __B, __E);
   6738  1.1  mrg }
   6739  1.1  mrg 
   6740  1.1  mrg extern __inline __m128h
   6741  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6742  1.1  mrg _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
   6743  1.1  mrg 			    __mmask8 __D, const int __E)
   6744  1.1  mrg {
   6745  1.1  mrg   return (__m128h)
   6746  1.1  mrg     __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
   6747  1.1  mrg 					   (__v8hf) __B,
   6748  1.1  mrg 					   (__v8hf) __C,
   6749  1.1  mrg 					   __D, __E);
   6750  1.1  mrg }
   6751  1.1  mrg 
   6752  1.1  mrg extern __inline __m128h
   6753  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6754  1.1  mrg _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
   6755  1.1  mrg 			    __m128h __D, const int __E)
   6756  1.1  mrg {
   6757  1.1  mrg   return (__m128h)
   6758  1.1  mrg     __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
   6759  1.1  mrg 					   (__v8hf) __C,
   6760  1.1  mrg 					   (__v8hf) __D,
   6761  1.1  mrg 					   __A, __E);
   6762  1.1  mrg }
   6763  1.1  mrg 
   6764  1.1  mrg extern __inline __m128h
   6765  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6766  1.1  mrg _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
   6767  1.1  mrg {
   6768  1.1  mrg   return (__m128h)
   6769  1.1  mrg     __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
   6770  1.1  mrg 				     (__v8hf) __B,
   6771  1.1  mrg 				     (__v8hf) __C,
   6772  1.1  mrg 				     __D);
   6773  1.1  mrg }
   6774  1.1  mrg 
   6775  1.1  mrg extern __inline __m128h
   6776  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6777  1.1  mrg _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
   6778  1.1  mrg 			  __m128h __D, const int __E)
   6779  1.1  mrg {
   6780  1.1  mrg   return (__m128h)
   6781  1.1  mrg     __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
   6782  1.1  mrg 					 (__v8hf) __C,
   6783  1.1  mrg 					 (__v8hf) __D,
   6784  1.1  mrg 					 __B, __E);
   6785  1.1  mrg }
   6786  1.1  mrg 
   6787  1.1  mrg extern __inline __m128h
   6788  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6789  1.1  mrg _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
   6790  1.1  mrg 			   __mmask8 __D, const int __E)
   6791  1.1  mrg {
   6792  1.1  mrg   return (__m128h)
   6793  1.1  mrg     __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
   6794  1.1  mrg 					  (__v8hf) __B,
   6795  1.1  mrg 					  (__v8hf) __C,
   6796  1.1  mrg 					  __D, __E);
   6797  1.1  mrg }
   6798  1.1  mrg 
   6799  1.1  mrg extern __inline __m128h
   6800  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6801  1.1  mrg _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
   6802  1.1  mrg 			   __m128h __D, const int __E)
   6803  1.1  mrg {
   6804  1.1  mrg   return (__m128h)
   6805  1.1  mrg     __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
   6806  1.1  mrg 					  (__v8hf) __C,
   6807  1.1  mrg 					  (__v8hf) __D,
   6808  1.1  mrg 					  __A, __E);
   6809  1.1  mrg }
   6810  1.1  mrg 
   6811  1.1  mrg extern __inline __m128h
   6812  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6813  1.1  mrg _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
   6814  1.1  mrg {
   6815  1.1  mrg   return (__m128h)
   6816  1.1  mrg     __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
   6817  1.1  mrg 				    (__v8hf) __B,
   6818  1.1  mrg 				    (__v8hf) __C,
   6819  1.1  mrg 				    __D);
   6820  1.1  mrg }
   6821  1.1  mrg #else
   6822  1.1  mrg #define _mm_mask_fcmadd_round_sch(A, B, C, D, E)			\
   6823  1.1  mrg     ((__m128h)								\
   6824  1.1  mrg      __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A),		\
   6825  1.1  mrg 					   (__v8hf) (C),		\
   6826  1.1  mrg 					   (__v8hf) (D),		\
   6827  1.1  mrg 					   (B), (E)))
   6828  1.1  mrg 
   6829  1.1  mrg 
   6830  1.1  mrg #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E)			\
   6831  1.1  mrg   ((__m128h)								\
   6832  1.1  mrg    __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A),		\
   6833  1.1  mrg 					  (__v8hf) (B),		\
   6834  1.1  mrg 					  (__v8hf) (C),		\
   6835  1.1  mrg 					  (D), (E)))
   6836  1.1  mrg 
   6837  1.1  mrg #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E)		\
   6838  1.1  mrg   __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
   6839  1.1  mrg 
   6840  1.1  mrg #define _mm_fcmadd_round_sch(A, B, C, D)		\
   6841  1.1  mrg   __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
   6842  1.1  mrg 
   6843  1.1  mrg #define _mm_mask_fmadd_round_sch(A, B, C, D, E)				\
   6844  1.1  mrg     ((__m128h)								\
   6845  1.1  mrg      __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A),		\
   6846  1.1  mrg 					  (__v8hf) (C),		\
   6847  1.1  mrg 					  (__v8hf) (D),		\
   6848  1.1  mrg 					  (B), (E)))
   6849  1.1  mrg 
   6850  1.1  mrg #define _mm_mask3_fmadd_round_sch(A, B, C, D, E)			\
   6851  1.1  mrg   ((__m128h)								\
   6852  1.1  mrg    __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A),		\
   6853  1.1  mrg 					 (__v8hf) (B),		\
   6854  1.1  mrg 					 (__v8hf) (C),		\
   6855  1.1  mrg 					 (D), (E)))
   6856  1.1  mrg 
   6857  1.1  mrg #define _mm_maskz_fmadd_round_sch(A, B, C, D, E)		\
   6858  1.1  mrg   __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
   6859  1.1  mrg 
   6860  1.1  mrg #define _mm_fmadd_round_sch(A, B, C, D)		\
   6861  1.1  mrg   __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
   6862  1.1  mrg 
   6863  1.1  mrg #endif /* __OPTIMIZE__ */
   6864  1.1  mrg 
   6865  1.1  mrg /* Intrinsics vf[,c]mulcsh.  */
   6866  1.1  mrg extern __inline __m128h
   6867  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6868  1.1  mrg _mm_fcmul_sch (__m128h __A, __m128h __B)
   6869  1.1  mrg {
   6870  1.1  mrg   return (__m128h)
   6871  1.1  mrg     __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
   6872  1.1  mrg 				    (__v8hf) __B,
   6873  1.1  mrg 				    _MM_FROUND_CUR_DIRECTION);
   6874  1.1  mrg }
   6875  1.1  mrg 
   6876  1.1  mrg extern __inline __m128h
   6877  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6878  1.1  mrg _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   6879  1.1  mrg {
   6880  1.1  mrg   return (__m128h)
   6881  1.1  mrg     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
   6882  1.1  mrg 					 (__v8hf) __D,
   6883  1.1  mrg 					 (__v8hf) __A,
   6884  1.1  mrg 					 __B, _MM_FROUND_CUR_DIRECTION);
   6885  1.1  mrg }
   6886  1.1  mrg 
   6887  1.1  mrg extern __inline __m128h
   6888  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6889  1.1  mrg _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
   6890  1.1  mrg {
   6891  1.1  mrg   return (__m128h)
   6892  1.1  mrg     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
   6893  1.1  mrg 					 (__v8hf) __C,
   6894  1.1  mrg 					 _mm_setzero_ph (),
   6895  1.1  mrg 					 __A, _MM_FROUND_CUR_DIRECTION);
   6896  1.1  mrg }
   6897  1.1  mrg 
   6898  1.1  mrg extern __inline __m128h
   6899  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6900  1.1  mrg _mm_fmul_sch (__m128h __A, __m128h __B)
   6901  1.1  mrg {
   6902  1.1  mrg   return (__m128h)
   6903  1.1  mrg     __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
   6904  1.1  mrg 				   (__v8hf) __B,
   6905  1.1  mrg 				   _MM_FROUND_CUR_DIRECTION);
   6906  1.1  mrg }
   6907  1.1  mrg 
   6908  1.1  mrg extern __inline __m128h
   6909  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6910  1.1  mrg _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
   6911  1.1  mrg {
   6912  1.1  mrg   return (__m128h)
   6913  1.1  mrg     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
   6914  1.1  mrg 					(__v8hf) __D,
   6915  1.1  mrg 					(__v8hf) __A,
   6916  1.1  mrg 					__B, _MM_FROUND_CUR_DIRECTION);
   6917  1.1  mrg }
   6918  1.1  mrg 
   6919  1.1  mrg extern __inline __m128h
   6920  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6921  1.1  mrg _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
   6922  1.1  mrg {
   6923  1.1  mrg   return (__m128h)
   6924  1.1  mrg     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
   6925  1.1  mrg 					(__v8hf) __C,
   6926  1.1  mrg 					_mm_setzero_ph (),
   6927  1.1  mrg 					__A, _MM_FROUND_CUR_DIRECTION);
   6928  1.1  mrg }
   6929  1.1  mrg 
   6930  1.1  mrg #ifdef __OPTIMIZE__
   6931  1.1  mrg extern __inline __m128h
   6932  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6933  1.1  mrg _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
   6934  1.1  mrg {
   6935  1.1  mrg   return (__m128h)
   6936  1.1  mrg     __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
   6937  1.1  mrg 				    (__v8hf) __B,
   6938  1.1  mrg 				    __D);
   6939  1.1  mrg }
   6940  1.1  mrg 
   6941  1.1  mrg extern __inline __m128h
   6942  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6943  1.1  mrg _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
   6944  1.1  mrg 			  __m128h __D, const int __E)
   6945  1.1  mrg {
   6946  1.1  mrg   return (__m128h)
   6947  1.1  mrg     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
   6948  1.1  mrg 					 (__v8hf) __D,
   6949  1.1  mrg 					 (__v8hf) __A,
   6950  1.1  mrg 					 __B, __E);
   6951  1.1  mrg }
   6952  1.1  mrg 
   6953  1.1  mrg extern __inline __m128h
   6954  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6955  1.1  mrg _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
   6956  1.1  mrg 			   const int __E)
   6957  1.1  mrg {
   6958  1.1  mrg   return (__m128h)
   6959  1.1  mrg     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
   6960  1.1  mrg 					 (__v8hf) __C,
   6961  1.1  mrg 					 _mm_setzero_ph (),
   6962  1.1  mrg 					 __A, __E);
   6963  1.1  mrg }
   6964  1.1  mrg 
   6965  1.1  mrg extern __inline __m128h
   6966  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6967  1.1  mrg _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
   6968  1.1  mrg {
   6969  1.1  mrg   return (__m128h)
   6970  1.1  mrg     __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
   6971  1.1  mrg 				   (__v8hf) __B, __D);
   6972  1.1  mrg }
   6973  1.1  mrg 
   6974  1.1  mrg extern __inline __m128h
   6975  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6976  1.1  mrg _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
   6977  1.1  mrg 			 __m128h __D, const int __E)
   6978  1.1  mrg {
   6979  1.1  mrg   return (__m128h)
   6980  1.1  mrg     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
   6981  1.1  mrg 					(__v8hf) __D,
   6982  1.1  mrg 					(__v8hf) __A,
   6983  1.1  mrg 					__B, __E);
   6984  1.1  mrg }
   6985  1.1  mrg 
   6986  1.1  mrg extern __inline __m128h
   6987  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   6988  1.1  mrg _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
   6989  1.1  mrg {
   6990  1.1  mrg   return (__m128h)
   6991  1.1  mrg     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
   6992  1.1  mrg 					(__v8hf) __C,
   6993  1.1  mrg 					_mm_setzero_ph (),
   6994  1.1  mrg 					__A, __E);
   6995  1.1  mrg }
   6996  1.1  mrg 
   6997  1.1  mrg #else
   6998  1.1  mrg #define _mm_fcmul_round_sch(__A, __B, __D)				\
   6999  1.1  mrg   (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,		\
   7000  1.1  mrg 					    (__v8hf) __B, __D)
   7001  1.1  mrg 
   7002  1.1  mrg #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E)		\
   7003  1.1  mrg   (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,		\
   7004  1.1  mrg 						 (__v8hf) __D,		\
   7005  1.1  mrg 						 (__v8hf) __A,		\
   7006  1.1  mrg 						 __B, __E)
   7007  1.1  mrg 
   7008  1.1  mrg #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E)			\
   7009  1.1  mrg   (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,		\
   7010  1.1  mrg 						 (__v8hf) __C,		\
   7011  1.1  mrg 						 _mm_setzero_ph (),	\
   7012  1.1  mrg 						 __A, __E)
   7013  1.1  mrg 
   7014  1.1  mrg #define _mm_fmul_round_sch(__A, __B, __D)				\
   7015  1.1  mrg   (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A,		\
   7016  1.1  mrg 					   (__v8hf) __B, __D)
   7017  1.1  mrg 
   7018  1.1  mrg #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E)		\
   7019  1.1  mrg   (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,		\
   7020  1.1  mrg 						(__v8hf) __D,		\
   7021  1.1  mrg 						(__v8hf) __A,		\
   7022  1.1  mrg 						__B, __E)
   7023  1.1  mrg 
   7024  1.1  mrg #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E)			\
   7025  1.1  mrg   (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,		\
   7026  1.1  mrg 						(__v8hf) __C,		\
   7027  1.1  mrg 						_mm_setzero_ph (),	\
   7028  1.1  mrg 						__A, __E)
   7029  1.1  mrg 
   7030  1.1  mrg #endif /* __OPTIMIZE__ */
   7031  1.1  mrg 
   7032  1.1  mrg #define _MM512_REDUCE_OP(op)						\
   7033  1.1  mrg   __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0);	\
   7034  1.1  mrg   __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1);	\
   7035  1.1  mrg   __m256h __T3 = (__T1 op __T2);					\
   7036  1.1  mrg   __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0);	\
   7037  1.1  mrg   __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1);	\
   7038  1.1  mrg   __m128h __T6 = (__T4 op __T5);					\
   7039  1.1  mrg   __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6,		\
   7040  1.1  mrg 		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
   7041  1.1  mrg   __m128h __T8 = (__T6 op __T7);					\
   7042  1.1  mrg   __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8,		\
   7043  1.1  mrg 		 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 });			\
   7044  1.1  mrg   __m128h __T10 = __T8 op __T9;					\
   7045  1.1  mrg   return __T10[0] op __T10[1]
   7046  1.1  mrg 
   7047  1.1  mrg // TODO reduce
   7048  1.1  mrg extern __inline _Float16
   7049  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7050  1.1  mrg _mm512_reduce_add_ph (__m512h __A)
   7051  1.1  mrg {
   7052  1.1  mrg    _MM512_REDUCE_OP (+);
   7053  1.1  mrg }
   7054  1.1  mrg 
   7055  1.1  mrg extern __inline _Float16
   7056  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7057  1.1  mrg _mm512_reduce_mul_ph (__m512h __A)
   7058  1.1  mrg {
   7059  1.1  mrg    _MM512_REDUCE_OP (*);
   7060  1.1  mrg }
   7061  1.1  mrg 
   7062  1.1  mrg #undef _MM512_REDUCE_OP
   7063  1.1  mrg 
   7064  1.1  mrg #ifdef __AVX512VL__
   7065  1.1  mrg 
   7066  1.1  mrg #define _MM512_REDUCE_OP(op)						\
   7067  1.1  mrg   __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0);	\
   7068  1.1  mrg   __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1);	\
   7069  1.1  mrg   __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2,		\
   7070  1.1  mrg 		 _mm256_setzero_ph (), (__mmask16) -1);		\
   7071  1.1  mrg   __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0);	\
   7072  1.1  mrg   __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1);	\
   7073  1.1  mrg   __m128h __T6 = __builtin_ia32_##op##ph128_mask			\
   7074  1.1  mrg 		 (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1);	\
   7075  1.1  mrg   __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6,		\
   7076  1.1  mrg 		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
   7077  1.1  mrg   __m128h __T8 = (__m128h)  __builtin_ia32_##op##ph128_mask		\
   7078  1.1  mrg 		 (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1);	\
   7079  1.1  mrg   __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8,		\
   7080  1.1  mrg 		 (__v8hi) { 4, 5 });					\
   7081  1.1  mrg   __m128h __T10 = __builtin_ia32_##op##ph128_mask			\
   7082  1.1  mrg 		  (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1);	\
   7083  1.1  mrg   __m128h __T11 = (__m128h) __builtin_shuffle (__T10,			\
   7084  1.1  mrg 		  (__v8hi) { 1, 0 });					\
   7085  1.1  mrg   __m128h __T12 = __builtin_ia32_##op##ph128_mask			\
   7086  1.1  mrg 		  (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1);	\
   7087  1.1  mrg   return __T12[0]
   7088  1.1  mrg 
   7089  1.1  mrg #else
   7090  1.1  mrg 
   7091  1.1  mrg #define _MM512_REDUCE_OP(op)						\
   7092  1.1  mrg   __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A,		\
   7093  1.1  mrg 		 (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 });			\
   7094  1.1  mrg   __m512h __T2 = _mm512_##op##_ph (__A, __T1);				\
   7095  1.1  mrg   __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2,		\
   7096  1.1  mrg 		 (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 });			\
   7097  1.1  mrg   __m512h __T4 = _mm512_##op##_ph (__T2, __T3);			\
   7098  1.1  mrg   __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4,		\
   7099  1.1  mrg 		 (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 });			\
   7100  1.1  mrg   __m512h __T6 = _mm512_##op##_ph (__T4, __T5);			\
   7101  1.1  mrg   __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6,		\
   7102  1.1  mrg 		 (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0,			\
   7103  1.1  mrg 			     0, 0, 0, 0, 0, 0, 0, 0 });		\
   7104  1.1  mrg   __m512h __T8 = _mm512_##op##_ph (__T6, __T7);			\
   7105  1.1  mrg   __m512h __T9 = (__m512h) __builtin_shuffle (__T8,			\
   7106  1.1  mrg 		 (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0,			\
   7107  1.1  mrg 			     0, 0, 0, 0, 0, 0, 0, 0,			\
   7108  1.1  mrg 			     0, 0, 0, 0, 0, 0, 0, 0,			\
   7109  1.1  mrg 			     0, 0, 0, 0, 0, 0, 0, 0 });		\
   7110  1.1  mrg   __m512h __T10 = _mm512_##op##_ph (__T8, __T9);			\
   7111  1.1  mrg   return __T10[0]
   7112  1.1  mrg #endif
   7113  1.1  mrg 
   7114  1.1  mrg extern __inline _Float16
   7115  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7116  1.1  mrg _mm512_reduce_min_ph (__m512h __A)
   7117  1.1  mrg {
   7118  1.1  mrg   _MM512_REDUCE_OP (min);
   7119  1.1  mrg }
   7120  1.1  mrg 
   7121  1.1  mrg extern __inline _Float16
   7122  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7123  1.1  mrg _mm512_reduce_max_ph (__m512h __A)
   7124  1.1  mrg {
   7125  1.1  mrg   _MM512_REDUCE_OP (max);
   7126  1.1  mrg }
   7127  1.1  mrg 
   7128  1.1  mrg #undef _MM512_REDUCE_OP
   7129  1.1  mrg 
   7130  1.1  mrg extern __inline __m512h
   7131  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7132  1.1  mrg _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
   7133  1.1  mrg {
   7134  1.1  mrg   return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
   7135  1.1  mrg 						    (__v32hi) __A,
   7136  1.1  mrg 						    (__mmask32) __U);
   7137  1.1  mrg 
   7138  1.1  mrg }
   7139  1.1  mrg 
   7140  1.1  mrg extern __inline __m512h
   7141  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7142  1.1  mrg _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
   7143  1.1  mrg {
   7144  1.1  mrg   return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
   7145  1.1  mrg 						       (__v32hi) __I,
   7146  1.1  mrg 						       (__v32hi) __B,
   7147  1.1  mrg 						       (__mmask32)-1);
   7148  1.1  mrg }
   7149  1.1  mrg 
   7150  1.1  mrg extern __inline __m512h
   7151  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7152  1.1  mrg _mm512_permutexvar_ph (__m512i __A, __m512h __B)
   7153  1.1  mrg {
   7154  1.1  mrg   return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
   7155  1.1  mrg 						     (__v32hi) __A,
   7156  1.1  mrg 						     (__v32hi)
   7157  1.1  mrg 						     (_mm512_setzero_ph ()),
   7158  1.1  mrg 						     (__mmask32)-1);
   7159  1.1  mrg }
   7160  1.1  mrg 
   7161  1.1  mrg extern __inline __m512h
   7162  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
   7163  1.1  mrg _mm512_set1_pch (_Float16 _Complex __A)
   7164  1.1  mrg {
   7165  1.1  mrg   union
   7166  1.1  mrg   {
   7167  1.1  mrg     _Float16 _Complex __a;
   7168  1.1  mrg     float __b;
   7169  1.1  mrg   } __u = { .__a = __A};
   7170  1.1  mrg 
   7171  1.1  mrg   return (__m512h) _mm512_set1_ps (__u.__b);
   7172  1.1  mrg }
   7173  1.1  mrg 
   7174  1.1  mrg // intrinsics below are alias for f*mul_*ch
   7175  1.1  mrg #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
   7176  1.1  mrg #define _mm512_mask_mul_pch(W, U, A, B)				      \
   7177  1.1  mrg   _mm512_mask_fmul_pch ((W), (U), (A), (B))
   7178  1.1  mrg #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
   7179  1.1  mrg #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
   7180  1.1  mrg #define _mm512_mask_mul_round_pch(W, U, A, B, R)		      \
   7181  1.1  mrg   _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
   7182  1.1  mrg #define _mm512_maskz_mul_round_pch(U, A, B, R)			      \
   7183  1.1  mrg   _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
   7184  1.1  mrg 
   7185  1.1  mrg #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
   7186  1.1  mrg #define _mm512_mask_cmul_pch(W, U, A, B)			      \
   7187  1.1  mrg   _mm512_mask_fcmul_pch ((W), (U), (A), (B))
   7188  1.1  mrg #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
   7189  1.1  mrg #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
   7190  1.1  mrg #define _mm512_mask_cmul_round_pch(W, U, A, B, R)		      \
   7191  1.1  mrg   _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
   7192  1.1  mrg #define _mm512_maskz_cmul_round_pch(U, A, B, R)			      \
   7193  1.1  mrg   _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
   7194  1.1  mrg 
   7195  1.1  mrg #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
   7196  1.1  mrg #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
   7197  1.1  mrg #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
   7198  1.1  mrg #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
   7199  1.1  mrg #define _mm_mask_mul_round_sch(W, U, A, B, R)			      \
   7200  1.1  mrg   _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
   7201  1.1  mrg #define _mm_maskz_mul_round_sch(U, A, B, R)			      \
   7202  1.1  mrg   _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
   7203  1.1  mrg 
   7204  1.1  mrg #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
   7205  1.1  mrg #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
   7206  1.1  mrg #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
   7207  1.1  mrg #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
   7208  1.1  mrg #define _mm_mask_cmul_round_sch(W, U, A, B, R)			      \
   7209  1.1  mrg   _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
   7210  1.1  mrg #define _mm_maskz_cmul_round_sch(U, A, B, R)			      \
   7211  1.1  mrg   _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
   7212  1.1  mrg 
   7213  1.1  mrg #ifdef __DISABLE_AVX512FP16__
   7214  1.1  mrg #undef __DISABLE_AVX512FP16__
   7215  1.1  mrg #pragma GCC pop_options
   7216  1.1  mrg #endif /* __DISABLE_AVX512FP16__ */
   7217  1.1  mrg 
   7218  1.1  mrg #endif /* __AVX512FP16INTRIN_H_INCLUDED */
   7219