Home | History | Annotate | Line # | Download | only in mips
      1      1.1  mrg /* Intrinsics for Loongson MultiMedia extension Instructions operations.
      2      1.1  mrg 
      3  1.1.1.3  mrg    Copyright (C) 2008-2022 Free Software Foundation, Inc.
      4      1.1  mrg    Contributed by CodeSourcery.
      5      1.1  mrg 
      6      1.1  mrg    This file is part of GCC.
      7      1.1  mrg 
      8      1.1  mrg    GCC is free software; you can redistribute it and/or modify it
      9      1.1  mrg    under the terms of the GNU General Public License as published
     10      1.1  mrg    by the Free Software Foundation; either version 3, or (at your
     11      1.1  mrg    option) any later version.
     12      1.1  mrg 
     13      1.1  mrg    GCC is distributed in the hope that it will be useful, but WITHOUT
     14      1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15      1.1  mrg    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     16      1.1  mrg    License for more details.
     17      1.1  mrg 
     18      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     19      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     20      1.1  mrg    3.1, as published by the Free Software Foundation.
     21      1.1  mrg 
     22      1.1  mrg    You should have received a copy of the GNU General Public License and
     23      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     24      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     25      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     26      1.1  mrg 
     27      1.1  mrg #ifndef _GCC_LOONGSON_MMIINTRIN_H
     28      1.1  mrg #define _GCC_LOONGSON_MMIINTRIN_H
     29      1.1  mrg 
     30      1.1  mrg #if !defined(__mips_loongson_mmi)
     31      1.1  mrg # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
     32      1.1  mrg  loongson-mmiintrin.h
     33      1.1  mrg #endif
     34      1.1  mrg 
     35      1.1  mrg #ifdef __cplusplus
     36      1.1  mrg extern "C" {
     37      1.1  mrg #endif
     38      1.1  mrg 
     39      1.1  mrg #include <stdint.h>
     40      1.1  mrg 
     41      1.1  mrg /* Vectors of unsigned bytes, halfwords and words.  */
     42      1.1  mrg typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
     43      1.1  mrg typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
     44      1.1  mrg typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
     45      1.1  mrg 
     46      1.1  mrg /* Vectors of signed bytes, halfwords and words.  */
     47      1.1  mrg typedef int8_t int8x8_t __attribute__((vector_size (8)));
     48      1.1  mrg typedef int16_t int16x4_t __attribute__((vector_size (8)));
     49      1.1  mrg typedef int32_t int32x2_t __attribute__((vector_size (8)));
     50      1.1  mrg 
     51      1.1  mrg /* SIMD intrinsics.
     52      1.1  mrg    Unless otherwise noted, calls to the functions below will expand into
     53      1.1  mrg    precisely one machine instruction, modulo any moves required to
     54      1.1  mrg    satisfy register allocation constraints.  */
     55      1.1  mrg 
     56      1.1  mrg /* Pack with signed saturation.  */
     57      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     58      1.1  mrg packsswh (int32x2_t s, int32x2_t t)
     59      1.1  mrg {
     60      1.1  mrg   return __builtin_loongson_packsswh (s, t);
     61      1.1  mrg }
     62      1.1  mrg 
     63      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     64      1.1  mrg packsshb (int16x4_t s, int16x4_t t)
     65      1.1  mrg {
     66      1.1  mrg   return __builtin_loongson_packsshb (s, t);
     67      1.1  mrg }
     68      1.1  mrg 
     69      1.1  mrg /* Pack with unsigned saturation.  */
     70      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     71      1.1  mrg packushb (uint16x4_t s, uint16x4_t t)
     72      1.1  mrg {
     73      1.1  mrg   return __builtin_loongson_packushb (s, t);
     74      1.1  mrg }
     75      1.1  mrg 
     76      1.1  mrg /* Vector addition, treating overflow by wraparound.  */
     77      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     78      1.1  mrg paddw_u (uint32x2_t s, uint32x2_t t)
     79      1.1  mrg {
     80      1.1  mrg   return __builtin_loongson_paddw_u (s, t);
     81      1.1  mrg }
     82      1.1  mrg 
     83      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     84      1.1  mrg paddh_u (uint16x4_t s, uint16x4_t t)
     85      1.1  mrg {
     86      1.1  mrg   return __builtin_loongson_paddh_u (s, t);
     87      1.1  mrg }
     88      1.1  mrg 
     89      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     90      1.1  mrg paddb_u (uint8x8_t s, uint8x8_t t)
     91      1.1  mrg {
     92      1.1  mrg   return __builtin_loongson_paddb_u (s, t);
     93      1.1  mrg }
     94      1.1  mrg 
     95      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     96      1.1  mrg paddw_s (int32x2_t s, int32x2_t t)
     97      1.1  mrg {
     98      1.1  mrg   return __builtin_loongson_paddw_s (s, t);
     99      1.1  mrg }
    100      1.1  mrg 
    101      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    102      1.1  mrg paddh_s (int16x4_t s, int16x4_t t)
    103      1.1  mrg {
    104      1.1  mrg   return __builtin_loongson_paddh_s (s, t);
    105      1.1  mrg }
    106      1.1  mrg 
    107      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    108      1.1  mrg paddb_s (int8x8_t s, int8x8_t t)
    109      1.1  mrg {
    110      1.1  mrg   return __builtin_loongson_paddb_s (s, t);
    111      1.1  mrg }
    112      1.1  mrg 
    113      1.1  mrg /* Addition of doubleword integers, treating overflow by wraparound.  */
    114      1.1  mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    115      1.1  mrg paddd_u (uint64_t s, uint64_t t)
    116      1.1  mrg {
    117      1.1  mrg   return __builtin_loongson_paddd_u (s, t);
    118      1.1  mrg }
    119      1.1  mrg 
    120      1.1  mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    121      1.1  mrg paddd_s (int64_t s, int64_t t)
    122      1.1  mrg {
    123      1.1  mrg   return __builtin_loongson_paddd_s (s, t);
    124      1.1  mrg }
    125      1.1  mrg 
    126      1.1  mrg /* Vector addition, treating overflow by signed saturation.  */
    127      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    128      1.1  mrg paddsh (int16x4_t s, int16x4_t t)
    129      1.1  mrg {
    130      1.1  mrg   return __builtin_loongson_paddsh (s, t);
    131      1.1  mrg }
    132      1.1  mrg 
    133      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    134      1.1  mrg paddsb (int8x8_t s, int8x8_t t)
    135      1.1  mrg {
    136      1.1  mrg   return __builtin_loongson_paddsb (s, t);
    137      1.1  mrg }
    138      1.1  mrg 
    139      1.1  mrg /* Vector addition, treating overflow by unsigned saturation.  */
    140      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    141      1.1  mrg paddush (uint16x4_t s, uint16x4_t t)
    142      1.1  mrg {
    143      1.1  mrg   return __builtin_loongson_paddush (s, t);
    144      1.1  mrg }
    145      1.1  mrg 
    146      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    147      1.1  mrg paddusb (uint8x8_t s, uint8x8_t t)
    148      1.1  mrg {
    149      1.1  mrg   return __builtin_loongson_paddusb (s, t);
    150      1.1  mrg }
    151      1.1  mrg 
    152      1.1  mrg /* Logical AND NOT.  */
    153      1.1  mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    154      1.1  mrg pandn_ud (uint64_t s, uint64_t t)
    155      1.1  mrg {
    156      1.1  mrg   return __builtin_loongson_pandn_ud (s, t);
    157      1.1  mrg }
    158      1.1  mrg 
    159      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    160      1.1  mrg pandn_uw (uint32x2_t s, uint32x2_t t)
    161      1.1  mrg {
    162      1.1  mrg   return __builtin_loongson_pandn_uw (s, t);
    163      1.1  mrg }
    164      1.1  mrg 
    165      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    166      1.1  mrg pandn_uh (uint16x4_t s, uint16x4_t t)
    167      1.1  mrg {
    168      1.1  mrg   return __builtin_loongson_pandn_uh (s, t);
    169      1.1  mrg }
    170      1.1  mrg 
    171      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    172      1.1  mrg pandn_ub (uint8x8_t s, uint8x8_t t)
    173      1.1  mrg {
    174      1.1  mrg   return __builtin_loongson_pandn_ub (s, t);
    175      1.1  mrg }
    176      1.1  mrg 
    177      1.1  mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    178      1.1  mrg pandn_sd (int64_t s, int64_t t)
    179      1.1  mrg {
    180      1.1  mrg   return __builtin_loongson_pandn_sd (s, t);
    181      1.1  mrg }
    182      1.1  mrg 
    183      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    184      1.1  mrg pandn_sw (int32x2_t s, int32x2_t t)
    185      1.1  mrg {
    186      1.1  mrg   return __builtin_loongson_pandn_sw (s, t);
    187      1.1  mrg }
    188      1.1  mrg 
    189      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    190      1.1  mrg pandn_sh (int16x4_t s, int16x4_t t)
    191      1.1  mrg {
    192      1.1  mrg   return __builtin_loongson_pandn_sh (s, t);
    193      1.1  mrg }
    194      1.1  mrg 
    195      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    196      1.1  mrg pandn_sb (int8x8_t s, int8x8_t t)
    197      1.1  mrg {
    198      1.1  mrg   return __builtin_loongson_pandn_sb (s, t);
    199      1.1  mrg }
    200      1.1  mrg 
    201      1.1  mrg /* Average.  */
    202      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    203      1.1  mrg pavgh (uint16x4_t s, uint16x4_t t)
    204      1.1  mrg {
    205      1.1  mrg   return __builtin_loongson_pavgh (s, t);
    206      1.1  mrg }
    207      1.1  mrg 
    208      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    209      1.1  mrg pavgb (uint8x8_t s, uint8x8_t t)
    210      1.1  mrg {
    211      1.1  mrg   return __builtin_loongson_pavgb (s, t);
    212      1.1  mrg }
    213      1.1  mrg 
    214      1.1  mrg /* Equality test.  */
    215      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    216      1.1  mrg pcmpeqw_u (uint32x2_t s, uint32x2_t t)
    217      1.1  mrg {
    218      1.1  mrg   return __builtin_loongson_pcmpeqw_u (s, t);
    219      1.1  mrg }
    220      1.1  mrg 
    221      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    222      1.1  mrg pcmpeqh_u (uint16x4_t s, uint16x4_t t)
    223      1.1  mrg {
    224      1.1  mrg   return __builtin_loongson_pcmpeqh_u (s, t);
    225      1.1  mrg }
    226      1.1  mrg 
    227      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    228      1.1  mrg pcmpeqb_u (uint8x8_t s, uint8x8_t t)
    229      1.1  mrg {
    230      1.1  mrg   return __builtin_loongson_pcmpeqb_u (s, t);
    231      1.1  mrg }
    232      1.1  mrg 
    233      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    234      1.1  mrg pcmpeqw_s (int32x2_t s, int32x2_t t)
    235      1.1  mrg {
    236      1.1  mrg   return __builtin_loongson_pcmpeqw_s (s, t);
    237      1.1  mrg }
    238      1.1  mrg 
    239      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    240      1.1  mrg pcmpeqh_s (int16x4_t s, int16x4_t t)
    241      1.1  mrg {
    242      1.1  mrg   return __builtin_loongson_pcmpeqh_s (s, t);
    243      1.1  mrg }
    244      1.1  mrg 
    245      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    246      1.1  mrg pcmpeqb_s (int8x8_t s, int8x8_t t)
    247      1.1  mrg {
    248      1.1  mrg   return __builtin_loongson_pcmpeqb_s (s, t);
    249      1.1  mrg }
    250      1.1  mrg 
    251      1.1  mrg /* Greater-than test.  */
    252      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    253      1.1  mrg pcmpgtw_u (uint32x2_t s, uint32x2_t t)
    254      1.1  mrg {
    255      1.1  mrg   return __builtin_loongson_pcmpgtw_u (s, t);
    256      1.1  mrg }
    257      1.1  mrg 
    258      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    259      1.1  mrg pcmpgth_u (uint16x4_t s, uint16x4_t t)
    260      1.1  mrg {
    261      1.1  mrg   return __builtin_loongson_pcmpgth_u (s, t);
    262      1.1  mrg }
    263      1.1  mrg 
    264      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    265      1.1  mrg pcmpgtb_u (uint8x8_t s, uint8x8_t t)
    266      1.1  mrg {
    267      1.1  mrg   return __builtin_loongson_pcmpgtb_u (s, t);
    268      1.1  mrg }
    269      1.1  mrg 
    270      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    271      1.1  mrg pcmpgtw_s (int32x2_t s, int32x2_t t)
    272      1.1  mrg {
    273      1.1  mrg   return __builtin_loongson_pcmpgtw_s (s, t);
    274      1.1  mrg }
    275      1.1  mrg 
    276      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    277      1.1  mrg pcmpgth_s (int16x4_t s, int16x4_t t)
    278      1.1  mrg {
    279      1.1  mrg   return __builtin_loongson_pcmpgth_s (s, t);
    280      1.1  mrg }
    281      1.1  mrg 
    282      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    283      1.1  mrg pcmpgtb_s (int8x8_t s, int8x8_t t)
    284      1.1  mrg {
    285      1.1  mrg   return __builtin_loongson_pcmpgtb_s (s, t);
    286      1.1  mrg }
    287      1.1  mrg 
    288      1.1  mrg /* Extract halfword.  */
    289      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    290      1.1  mrg pextrh_u (uint16x4_t s, int field /* 0--3.  */)
    291      1.1  mrg {
    292      1.1  mrg   return __builtin_loongson_pextrh_u (s, field);
    293      1.1  mrg }
    294      1.1  mrg 
    295      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    296      1.1  mrg pextrh_s (int16x4_t s, int field /* 0--3.  */)
    297      1.1  mrg {
    298      1.1  mrg   return __builtin_loongson_pextrh_s (s, field);
    299      1.1  mrg }
    300      1.1  mrg 
    301      1.1  mrg /* Insert halfword.  */
    302      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    303      1.1  mrg pinsrh_0_u (uint16x4_t s, uint16x4_t t)
    304      1.1  mrg {
    305      1.1  mrg   return __builtin_loongson_pinsrh_0_u (s, t);
    306      1.1  mrg }
    307      1.1  mrg 
    308      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    309      1.1  mrg pinsrh_1_u (uint16x4_t s, uint16x4_t t)
    310      1.1  mrg {
    311      1.1  mrg   return __builtin_loongson_pinsrh_1_u (s, t);
    312      1.1  mrg }
    313      1.1  mrg 
    314      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    315      1.1  mrg pinsrh_2_u (uint16x4_t s, uint16x4_t t)
    316      1.1  mrg {
    317      1.1  mrg   return __builtin_loongson_pinsrh_2_u (s, t);
    318      1.1  mrg }
    319      1.1  mrg 
    320      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    321      1.1  mrg pinsrh_3_u (uint16x4_t s, uint16x4_t t)
    322      1.1  mrg {
    323      1.1  mrg   return __builtin_loongson_pinsrh_3_u (s, t);
    324      1.1  mrg }
    325      1.1  mrg 
    326      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    327      1.1  mrg pinsrh_0_s (int16x4_t s, int16x4_t t)
    328      1.1  mrg {
    329      1.1  mrg   return __builtin_loongson_pinsrh_0_s (s, t);
    330      1.1  mrg }
    331      1.1  mrg 
    332      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    333      1.1  mrg pinsrh_1_s (int16x4_t s, int16x4_t t)
    334      1.1  mrg {
    335      1.1  mrg   return __builtin_loongson_pinsrh_1_s (s, t);
    336      1.1  mrg }
    337      1.1  mrg 
    338      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    339      1.1  mrg pinsrh_2_s (int16x4_t s, int16x4_t t)
    340      1.1  mrg {
    341      1.1  mrg   return __builtin_loongson_pinsrh_2_s (s, t);
    342      1.1  mrg }
    343      1.1  mrg 
    344      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    345      1.1  mrg pinsrh_3_s (int16x4_t s, int16x4_t t)
    346      1.1  mrg {
    347      1.1  mrg   return __builtin_loongson_pinsrh_3_s (s, t);
    348      1.1  mrg }
    349      1.1  mrg 
    350      1.1  mrg /* Multiply and add.  */
    351      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    352      1.1  mrg pmaddhw (int16x4_t s, int16x4_t t)
    353      1.1  mrg {
    354      1.1  mrg   return __builtin_loongson_pmaddhw (s, t);
    355      1.1  mrg }
    356      1.1  mrg 
    357      1.1  mrg /* Maximum of signed halfwords.  */
    358      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    359      1.1  mrg pmaxsh (int16x4_t s, int16x4_t t)
    360      1.1  mrg {
    361      1.1  mrg   return __builtin_loongson_pmaxsh (s, t);
    362      1.1  mrg }
    363      1.1  mrg 
    364      1.1  mrg /* Maximum of unsigned bytes.  */
    365      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    366      1.1  mrg pmaxub (uint8x8_t s, uint8x8_t t)
    367      1.1  mrg {
    368      1.1  mrg   return __builtin_loongson_pmaxub (s, t);
    369      1.1  mrg }
    370      1.1  mrg 
    371      1.1  mrg /* Minimum of signed halfwords.  */
    372      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    373      1.1  mrg pminsh (int16x4_t s, int16x4_t t)
    374      1.1  mrg {
    375      1.1  mrg   return __builtin_loongson_pminsh (s, t);
    376      1.1  mrg }
    377      1.1  mrg 
    378      1.1  mrg /* Minimum of unsigned bytes.  */
    379      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    380      1.1  mrg pminub (uint8x8_t s, uint8x8_t t)
    381      1.1  mrg {
    382      1.1  mrg   return __builtin_loongson_pminub (s, t);
    383      1.1  mrg }
    384      1.1  mrg 
    385      1.1  mrg /* Move byte mask.  */
    386      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    387      1.1  mrg pmovmskb_u (uint8x8_t s)
    388      1.1  mrg {
    389      1.1  mrg   return __builtin_loongson_pmovmskb_u (s);
    390      1.1  mrg }
    391      1.1  mrg 
    392      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    393      1.1  mrg pmovmskb_s (int8x8_t s)
    394      1.1  mrg {
    395      1.1  mrg   return __builtin_loongson_pmovmskb_s (s);
    396      1.1  mrg }
    397      1.1  mrg 
    398      1.1  mrg /* Multiply unsigned integers and store high result.  */
    399      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    400      1.1  mrg pmulhuh (uint16x4_t s, uint16x4_t t)
    401      1.1  mrg {
    402      1.1  mrg   return __builtin_loongson_pmulhuh (s, t);
    403      1.1  mrg }
    404      1.1  mrg 
    405      1.1  mrg /* Multiply signed integers and store high result.  */
    406      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    407      1.1  mrg pmulhh (int16x4_t s, int16x4_t t)
    408      1.1  mrg {
    409      1.1  mrg   return __builtin_loongson_pmulhh (s, t);
    410      1.1  mrg }
    411      1.1  mrg 
    412      1.1  mrg /* Multiply signed integers and store low result.  */
    413      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    414      1.1  mrg pmullh (int16x4_t s, int16x4_t t)
    415      1.1  mrg {
    416      1.1  mrg   return __builtin_loongson_pmullh (s, t);
    417      1.1  mrg }
    418      1.1  mrg 
    419      1.1  mrg /* Multiply unsigned word integers.  */
    420      1.1  mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    421      1.1  mrg pmuluw (uint32x2_t s, uint32x2_t t)
    422      1.1  mrg {
    423      1.1  mrg   return __builtin_loongson_pmuluw (s, t);
    424      1.1  mrg }
    425      1.1  mrg 
    426      1.1  mrg /* Absolute difference.  */
    427      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    428      1.1  mrg pasubub (uint8x8_t s, uint8x8_t t)
    429      1.1  mrg {
    430      1.1  mrg   return __builtin_loongson_pasubub (s, t);
    431      1.1  mrg }
    432      1.1  mrg 
    433      1.1  mrg /* Sum of unsigned byte integers.  */
    434      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    435      1.1  mrg biadd (uint8x8_t s)
    436      1.1  mrg {
    437      1.1  mrg   return __builtin_loongson_biadd (s);
    438      1.1  mrg }
    439      1.1  mrg 
    440      1.1  mrg /* Sum of absolute differences.
    441      1.1  mrg    Note that this intrinsic expands into two machine instructions:
    442      1.1  mrg    PASUBUB followed by BIADD.  */
    443      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    444      1.1  mrg psadbh (uint8x8_t s, uint8x8_t t)
    445      1.1  mrg {
    446      1.1  mrg   return __builtin_loongson_psadbh (s, t);
    447      1.1  mrg }
    448      1.1  mrg 
    449      1.1  mrg /* Shuffle halfwords.  */
    450      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    451      1.1  mrg pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
    452      1.1  mrg {
    453      1.1  mrg   return __builtin_loongson_pshufh_u (s, order);
    454      1.1  mrg }
    455      1.1  mrg 
    456      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    457      1.1  mrg pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
    458      1.1  mrg {
    459      1.1  mrg   return __builtin_loongson_pshufh_s (s, order);
    460      1.1  mrg }
    461      1.1  mrg 
    462      1.1  mrg /* Shift left logical.  */
    463      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    464      1.1  mrg psllh_u (uint16x4_t s, uint8_t amount)
    465      1.1  mrg {
    466      1.1  mrg   return __builtin_loongson_psllh_u (s, amount);
    467      1.1  mrg }
    468      1.1  mrg 
    469      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    470      1.1  mrg psllh_s (int16x4_t s, uint8_t amount)
    471      1.1  mrg {
    472      1.1  mrg   return __builtin_loongson_psllh_s (s, amount);
    473      1.1  mrg }
    474      1.1  mrg 
    475      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    476      1.1  mrg psllw_u (uint32x2_t s, uint8_t amount)
    477      1.1  mrg {
    478      1.1  mrg   return __builtin_loongson_psllw_u (s, amount);
    479      1.1  mrg }
    480      1.1  mrg 
    481      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    482      1.1  mrg psllw_s (int32x2_t s, uint8_t amount)
    483      1.1  mrg {
    484      1.1  mrg   return __builtin_loongson_psllw_s (s, amount);
    485      1.1  mrg }
    486      1.1  mrg 
    487      1.1  mrg /* Shift right logical.  */
    488      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    489      1.1  mrg psrlh_u (uint16x4_t s, uint8_t amount)
    490      1.1  mrg {
    491      1.1  mrg   return __builtin_loongson_psrlh_u (s, amount);
    492      1.1  mrg }
    493      1.1  mrg 
    494      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    495      1.1  mrg psrlh_s (int16x4_t s, uint8_t amount)
    496      1.1  mrg {
    497      1.1  mrg   return __builtin_loongson_psrlh_s (s, amount);
    498      1.1  mrg }
    499      1.1  mrg 
    500      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    501      1.1  mrg psrlw_u (uint32x2_t s, uint8_t amount)
    502      1.1  mrg {
    503      1.1  mrg   return __builtin_loongson_psrlw_u (s, amount);
    504      1.1  mrg }
    505      1.1  mrg 
    506      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    507      1.1  mrg psrlw_s (int32x2_t s, uint8_t amount)
    508      1.1  mrg {
    509      1.1  mrg   return __builtin_loongson_psrlw_s (s, amount);
    510      1.1  mrg }
    511      1.1  mrg 
    512      1.1  mrg /* Shift right arithmetic.  */
    513      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    514      1.1  mrg psrah_u (uint16x4_t s, uint8_t amount)
    515      1.1  mrg {
    516      1.1  mrg   return __builtin_loongson_psrah_u (s, amount);
    517      1.1  mrg }
    518      1.1  mrg 
    519      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    520      1.1  mrg psrah_s (int16x4_t s, uint8_t amount)
    521      1.1  mrg {
    522      1.1  mrg   return __builtin_loongson_psrah_s (s, amount);
    523      1.1  mrg }
    524      1.1  mrg 
    525      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    526      1.1  mrg psraw_u (uint32x2_t s, uint8_t amount)
    527      1.1  mrg {
    528      1.1  mrg   return __builtin_loongson_psraw_u (s, amount);
    529      1.1  mrg }
    530      1.1  mrg 
    531      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    532      1.1  mrg psraw_s (int32x2_t s, uint8_t amount)
    533      1.1  mrg {
    534      1.1  mrg   return __builtin_loongson_psraw_s (s, amount);
    535      1.1  mrg }
    536      1.1  mrg 
    537      1.1  mrg /* Vector subtraction, treating overflow by wraparound.  */
    538      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    539      1.1  mrg psubw_u (uint32x2_t s, uint32x2_t t)
    540      1.1  mrg {
    541      1.1  mrg   return __builtin_loongson_psubw_u (s, t);
    542      1.1  mrg }
    543      1.1  mrg 
    544      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    545      1.1  mrg psubh_u (uint16x4_t s, uint16x4_t t)
    546      1.1  mrg {
    547      1.1  mrg   return __builtin_loongson_psubh_u (s, t);
    548      1.1  mrg }
    549      1.1  mrg 
    550      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    551      1.1  mrg psubb_u (uint8x8_t s, uint8x8_t t)
    552      1.1  mrg {
    553      1.1  mrg   return __builtin_loongson_psubb_u (s, t);
    554      1.1  mrg }
    555      1.1  mrg 
    556      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    557      1.1  mrg psubw_s (int32x2_t s, int32x2_t t)
    558      1.1  mrg {
    559      1.1  mrg   return __builtin_loongson_psubw_s (s, t);
    560      1.1  mrg }
    561      1.1  mrg 
    562      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    563      1.1  mrg psubh_s (int16x4_t s, int16x4_t t)
    564      1.1  mrg {
    565      1.1  mrg   return __builtin_loongson_psubh_s (s, t);
    566      1.1  mrg }
    567      1.1  mrg 
    568      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    569      1.1  mrg psubb_s (int8x8_t s, int8x8_t t)
    570      1.1  mrg {
    571      1.1  mrg   return __builtin_loongson_psubb_s (s, t);
    572      1.1  mrg }
    573      1.1  mrg 
    574      1.1  mrg /* Subtraction of doubleword integers, treating overflow by wraparound.  */
    575      1.1  mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
    576      1.1  mrg psubd_u (uint64_t s, uint64_t t)
    577      1.1  mrg {
    578      1.1  mrg   return __builtin_loongson_psubd_u (s, t);
    579      1.1  mrg }
    580      1.1  mrg 
    581      1.1  mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
    582      1.1  mrg psubd_s (int64_t s, int64_t t)
    583      1.1  mrg {
    584      1.1  mrg   return __builtin_loongson_psubd_s (s, t);
    585      1.1  mrg }
    586      1.1  mrg 
    587      1.1  mrg /* Vector subtraction, treating overflow by signed saturation.  */
    588      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    589      1.1  mrg psubsh (int16x4_t s, int16x4_t t)
    590      1.1  mrg {
    591      1.1  mrg   return __builtin_loongson_psubsh (s, t);
    592      1.1  mrg }
    593      1.1  mrg 
    594      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    595      1.1  mrg psubsb (int8x8_t s, int8x8_t t)
    596      1.1  mrg {
    597      1.1  mrg   return __builtin_loongson_psubsb (s, t);
    598      1.1  mrg }
    599      1.1  mrg 
    600      1.1  mrg /* Vector subtraction, treating overflow by unsigned saturation.  */
    601      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    602      1.1  mrg psubush (uint16x4_t s, uint16x4_t t)
    603      1.1  mrg {
    604      1.1  mrg   return __builtin_loongson_psubush (s, t);
    605      1.1  mrg }
    606      1.1  mrg 
    607      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    608      1.1  mrg psubusb (uint8x8_t s, uint8x8_t t)
    609      1.1  mrg {
    610      1.1  mrg   return __builtin_loongson_psubusb (s, t);
    611      1.1  mrg }
    612      1.1  mrg 
    613      1.1  mrg /* Unpack high data.  */
    614      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    615      1.1  mrg punpckhwd_u (uint32x2_t s, uint32x2_t t)
    616      1.1  mrg {
    617      1.1  mrg   return __builtin_loongson_punpckhwd_u (s, t);
    618      1.1  mrg }
    619      1.1  mrg 
    620      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    621      1.1  mrg punpckhhw_u (uint16x4_t s, uint16x4_t t)
    622      1.1  mrg {
    623      1.1  mrg   return __builtin_loongson_punpckhhw_u (s, t);
    624      1.1  mrg }
    625      1.1  mrg 
    626      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    627      1.1  mrg punpckhbh_u (uint8x8_t s, uint8x8_t t)
    628      1.1  mrg {
    629      1.1  mrg   return __builtin_loongson_punpckhbh_u (s, t);
    630      1.1  mrg }
    631      1.1  mrg 
    632      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    633      1.1  mrg punpckhwd_s (int32x2_t s, int32x2_t t)
    634      1.1  mrg {
    635      1.1  mrg   return __builtin_loongson_punpckhwd_s (s, t);
    636      1.1  mrg }
    637      1.1  mrg 
    638      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    639      1.1  mrg punpckhhw_s (int16x4_t s, int16x4_t t)
    640      1.1  mrg {
    641      1.1  mrg   return __builtin_loongson_punpckhhw_s (s, t);
    642      1.1  mrg }
    643      1.1  mrg 
    644      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    645      1.1  mrg punpckhbh_s (int8x8_t s, int8x8_t t)
    646      1.1  mrg {
    647      1.1  mrg   return __builtin_loongson_punpckhbh_s (s, t);
    648      1.1  mrg }
    649      1.1  mrg 
    650      1.1  mrg /* Unpack low data.  */
    651      1.1  mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
    652      1.1  mrg punpcklwd_u (uint32x2_t s, uint32x2_t t)
    653      1.1  mrg {
    654      1.1  mrg   return __builtin_loongson_punpcklwd_u (s, t);
    655      1.1  mrg }
    656      1.1  mrg 
    657      1.1  mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
    658      1.1  mrg punpcklhw_u (uint16x4_t s, uint16x4_t t)
    659      1.1  mrg {
    660      1.1  mrg   return __builtin_loongson_punpcklhw_u (s, t);
    661      1.1  mrg }
    662      1.1  mrg 
    663      1.1  mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
    664      1.1  mrg punpcklbh_u (uint8x8_t s, uint8x8_t t)
    665      1.1  mrg {
    666      1.1  mrg   return __builtin_loongson_punpcklbh_u (s, t);
    667      1.1  mrg }
    668      1.1  mrg 
    669      1.1  mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
    670      1.1  mrg punpcklwd_s (int32x2_t s, int32x2_t t)
    671      1.1  mrg {
    672      1.1  mrg   return __builtin_loongson_punpcklwd_s (s, t);
    673      1.1  mrg }
    674      1.1  mrg 
    675      1.1  mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
    676      1.1  mrg punpcklhw_s (int16x4_t s, int16x4_t t)
    677      1.1  mrg {
    678      1.1  mrg   return __builtin_loongson_punpcklhw_s (s, t);
    679      1.1  mrg }
    680      1.1  mrg 
    681      1.1  mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
    682      1.1  mrg punpcklbh_s (int8x8_t s, int8x8_t t)
    683      1.1  mrg {
    684      1.1  mrg   return __builtin_loongson_punpcklbh_s (s, t);
    685      1.1  mrg }
    686      1.1  mrg 
    687      1.1  mrg #ifdef __cplusplus
    688      1.1  mrg }
    689      1.1  mrg #endif
    690      1.1  mrg 
    691      1.1  mrg #endif
    692