1 1.1 mrg /* Intrinsics for Loongson MultiMedia extension Instructions operations. 2 1.1 mrg 3 1.1.1.3 mrg Copyright (C) 2008-2022 Free Software Foundation, Inc. 4 1.1 mrg Contributed by CodeSourcery. 5 1.1 mrg 6 1.1 mrg This file is part of GCC. 7 1.1 mrg 8 1.1 mrg GCC is free software; you can redistribute it and/or modify it 9 1.1 mrg under the terms of the GNU General Public License as published 10 1.1 mrg by the Free Software Foundation; either version 3, or (at your 11 1.1 mrg option) any later version. 12 1.1 mrg 13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 1.1 mrg License for more details. 17 1.1 mrg 18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 19 1.1 mrg permissions described in the GCC Runtime Library Exception, version 20 1.1 mrg 3.1, as published by the Free Software Foundation. 21 1.1 mrg 22 1.1 mrg You should have received a copy of the GNU General Public License and 23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 1.1 mrg <http://www.gnu.org/licenses/>. */ 26 1.1 mrg 27 1.1 mrg #ifndef _GCC_LOONGSON_MMIINTRIN_H 28 1.1 mrg #define _GCC_LOONGSON_MMIINTRIN_H 29 1.1 mrg 30 1.1 mrg #if !defined(__mips_loongson_mmi) 31 1.1 mrg # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\ 32 1.1 mrg loongson-mmiintrin.h 33 1.1 mrg #endif 34 1.1 mrg 35 1.1 mrg #ifdef __cplusplus 36 1.1 mrg extern "C" { 37 1.1 mrg #endif 38 1.1 mrg 39 1.1 mrg #include <stdint.h> 40 1.1 mrg 41 1.1 mrg /* Vectors of unsigned bytes, halfwords and words. */ 42 1.1 mrg typedef uint8_t uint8x8_t __attribute__((vector_size (8))); 43 1.1 mrg typedef uint16_t uint16x4_t __attribute__((vector_size (8))); 44 1.1 mrg typedef uint32_t uint32x2_t __attribute__((vector_size (8))); 45 1.1 mrg 46 1.1 mrg /* Vectors of signed bytes, halfwords and words. */ 47 1.1 mrg typedef int8_t int8x8_t __attribute__((vector_size (8))); 48 1.1 mrg typedef int16_t int16x4_t __attribute__((vector_size (8))); 49 1.1 mrg typedef int32_t int32x2_t __attribute__((vector_size (8))); 50 1.1 mrg 51 1.1 mrg /* SIMD intrinsics. 52 1.1 mrg Unless otherwise noted, calls to the functions below will expand into 53 1.1 mrg precisely one machine instruction, modulo any moves required to 54 1.1 mrg satisfy register allocation constraints. */ 55 1.1 mrg 56 1.1 mrg /* Pack with signed saturation. */ 57 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 58 1.1 mrg packsswh (int32x2_t s, int32x2_t t) 59 1.1 mrg { 60 1.1 mrg return __builtin_loongson_packsswh (s, t); 61 1.1 mrg } 62 1.1 mrg 63 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 64 1.1 mrg packsshb (int16x4_t s, int16x4_t t) 65 1.1 mrg { 66 1.1 mrg return __builtin_loongson_packsshb (s, t); 67 1.1 mrg } 68 1.1 mrg 69 1.1 mrg /* Pack with unsigned saturation. */ 70 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 71 1.1 mrg packushb (uint16x4_t s, uint16x4_t t) 72 1.1 mrg { 73 1.1 mrg return __builtin_loongson_packushb (s, t); 74 1.1 mrg } 75 1.1 mrg 76 1.1 mrg /* Vector addition, treating overflow by wraparound. */ 77 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 78 1.1 mrg paddw_u (uint32x2_t s, uint32x2_t t) 79 1.1 mrg { 80 1.1 mrg return __builtin_loongson_paddw_u (s, t); 81 1.1 mrg } 82 1.1 mrg 83 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 84 1.1 mrg paddh_u (uint16x4_t s, uint16x4_t t) 85 1.1 mrg { 86 1.1 mrg return __builtin_loongson_paddh_u (s, t); 87 1.1 mrg } 88 1.1 mrg 89 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 90 1.1 mrg paddb_u (uint8x8_t s, uint8x8_t t) 91 1.1 mrg { 92 1.1 mrg return __builtin_loongson_paddb_u (s, t); 93 1.1 mrg } 94 1.1 mrg 95 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 96 1.1 mrg paddw_s (int32x2_t s, int32x2_t t) 97 1.1 mrg { 98 1.1 mrg return __builtin_loongson_paddw_s (s, t); 99 1.1 mrg } 100 1.1 mrg 101 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 102 1.1 mrg paddh_s (int16x4_t s, int16x4_t t) 103 1.1 mrg { 104 1.1 mrg return __builtin_loongson_paddh_s (s, t); 105 1.1 mrg } 106 1.1 mrg 107 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 108 1.1 mrg paddb_s (int8x8_t s, int8x8_t t) 109 1.1 mrg { 110 1.1 mrg return __builtin_loongson_paddb_s (s, t); 111 1.1 mrg } 112 1.1 mrg 113 1.1 mrg /* Addition of doubleword integers, treating overflow by wraparound. */ 114 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 115 1.1 mrg paddd_u (uint64_t s, uint64_t t) 116 1.1 mrg { 117 1.1 mrg return __builtin_loongson_paddd_u (s, t); 118 1.1 mrg } 119 1.1 mrg 120 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 121 1.1 mrg paddd_s (int64_t s, int64_t t) 122 1.1 mrg { 123 1.1 mrg return __builtin_loongson_paddd_s (s, t); 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg /* Vector addition, treating overflow by signed saturation. */ 127 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 128 1.1 mrg paddsh (int16x4_t s, int16x4_t t) 129 1.1 mrg { 130 1.1 mrg return __builtin_loongson_paddsh (s, t); 131 1.1 mrg } 132 1.1 mrg 133 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 134 1.1 mrg paddsb (int8x8_t s, int8x8_t t) 135 1.1 mrg { 136 1.1 mrg return __builtin_loongson_paddsb (s, t); 137 1.1 mrg } 138 1.1 mrg 139 1.1 mrg /* Vector addition, treating overflow by unsigned saturation. */ 140 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 141 1.1 mrg paddush (uint16x4_t s, uint16x4_t t) 142 1.1 mrg { 143 1.1 mrg return __builtin_loongson_paddush (s, t); 144 1.1 mrg } 145 1.1 mrg 146 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 147 1.1 mrg paddusb (uint8x8_t s, uint8x8_t t) 148 1.1 mrg { 149 1.1 mrg return __builtin_loongson_paddusb (s, t); 150 1.1 mrg } 151 1.1 mrg 152 1.1 mrg /* Logical AND NOT. */ 153 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 154 1.1 mrg pandn_ud (uint64_t s, uint64_t t) 155 1.1 mrg { 156 1.1 mrg return __builtin_loongson_pandn_ud (s, t); 157 1.1 mrg } 158 1.1 mrg 159 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 160 1.1 mrg pandn_uw (uint32x2_t s, uint32x2_t t) 161 1.1 mrg { 162 1.1 mrg return __builtin_loongson_pandn_uw (s, t); 163 1.1 mrg } 164 1.1 mrg 165 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 166 1.1 mrg pandn_uh (uint16x4_t s, uint16x4_t t) 167 1.1 mrg { 168 1.1 mrg return __builtin_loongson_pandn_uh (s, t); 169 1.1 mrg } 170 1.1 mrg 171 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 172 1.1 mrg pandn_ub (uint8x8_t s, uint8x8_t t) 173 1.1 mrg { 174 1.1 mrg return __builtin_loongson_pandn_ub (s, t); 175 1.1 mrg } 176 1.1 mrg 177 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 178 1.1 mrg pandn_sd (int64_t s, int64_t t) 179 1.1 mrg { 180 1.1 mrg return __builtin_loongson_pandn_sd (s, t); 181 1.1 mrg } 182 1.1 mrg 183 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 184 1.1 mrg pandn_sw (int32x2_t s, int32x2_t t) 185 1.1 mrg { 186 1.1 mrg return __builtin_loongson_pandn_sw (s, t); 187 1.1 mrg } 188 1.1 mrg 189 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 190 1.1 mrg pandn_sh (int16x4_t s, int16x4_t t) 191 1.1 mrg { 192 1.1 mrg return __builtin_loongson_pandn_sh (s, t); 193 1.1 mrg } 194 1.1 mrg 195 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 196 1.1 mrg pandn_sb (int8x8_t s, int8x8_t t) 197 1.1 mrg { 198 1.1 mrg return __builtin_loongson_pandn_sb (s, t); 199 1.1 mrg } 200 1.1 mrg 201 1.1 mrg /* Average. */ 202 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 203 1.1 mrg pavgh (uint16x4_t s, uint16x4_t t) 204 1.1 mrg { 205 1.1 mrg return __builtin_loongson_pavgh (s, t); 206 1.1 mrg } 207 1.1 mrg 208 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 209 1.1 mrg pavgb (uint8x8_t s, uint8x8_t t) 210 1.1 mrg { 211 1.1 mrg return __builtin_loongson_pavgb (s, t); 212 1.1 mrg } 213 1.1 mrg 214 1.1 mrg /* Equality test. */ 215 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 216 1.1 mrg pcmpeqw_u (uint32x2_t s, uint32x2_t t) 217 1.1 mrg { 218 1.1 mrg return __builtin_loongson_pcmpeqw_u (s, t); 219 1.1 mrg } 220 1.1 mrg 221 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 222 1.1 mrg pcmpeqh_u (uint16x4_t s, uint16x4_t t) 223 1.1 mrg { 224 1.1 mrg return __builtin_loongson_pcmpeqh_u (s, t); 225 1.1 mrg } 226 1.1 mrg 227 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 228 1.1 mrg pcmpeqb_u (uint8x8_t s, uint8x8_t t) 229 1.1 mrg { 230 1.1 mrg return __builtin_loongson_pcmpeqb_u (s, t); 231 1.1 mrg } 232 1.1 mrg 233 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 234 1.1 mrg pcmpeqw_s (int32x2_t s, int32x2_t t) 235 1.1 mrg { 236 1.1 mrg return __builtin_loongson_pcmpeqw_s (s, t); 237 1.1 mrg } 238 1.1 mrg 239 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 240 1.1 mrg pcmpeqh_s (int16x4_t s, int16x4_t t) 241 1.1 mrg { 242 1.1 mrg return __builtin_loongson_pcmpeqh_s (s, t); 243 1.1 mrg } 244 1.1 mrg 245 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 246 1.1 mrg pcmpeqb_s (int8x8_t s, int8x8_t t) 247 1.1 mrg { 248 1.1 mrg return __builtin_loongson_pcmpeqb_s (s, t); 249 1.1 mrg } 250 1.1 mrg 251 1.1 mrg /* Greater-than test. */ 252 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 253 1.1 mrg pcmpgtw_u (uint32x2_t s, uint32x2_t t) 254 1.1 mrg { 255 1.1 mrg return __builtin_loongson_pcmpgtw_u (s, t); 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 259 1.1 mrg pcmpgth_u (uint16x4_t s, uint16x4_t t) 260 1.1 mrg { 261 1.1 mrg return __builtin_loongson_pcmpgth_u (s, t); 262 1.1 mrg } 263 1.1 mrg 264 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 265 1.1 mrg pcmpgtb_u (uint8x8_t s, uint8x8_t t) 266 1.1 mrg { 267 1.1 mrg return __builtin_loongson_pcmpgtb_u (s, t); 268 1.1 mrg } 269 1.1 mrg 270 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 271 1.1 mrg pcmpgtw_s (int32x2_t s, int32x2_t t) 272 1.1 mrg { 273 1.1 mrg return __builtin_loongson_pcmpgtw_s (s, t); 274 1.1 mrg } 275 1.1 mrg 276 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 277 1.1 mrg pcmpgth_s (int16x4_t s, int16x4_t t) 278 1.1 mrg { 279 1.1 mrg return __builtin_loongson_pcmpgth_s (s, t); 280 1.1 mrg } 281 1.1 mrg 282 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 283 1.1 mrg pcmpgtb_s (int8x8_t s, int8x8_t t) 284 1.1 mrg { 285 1.1 mrg return __builtin_loongson_pcmpgtb_s (s, t); 286 1.1 mrg } 287 1.1 mrg 288 1.1 mrg /* Extract halfword. */ 289 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 290 1.1 mrg pextrh_u (uint16x4_t s, int field /* 0--3. */) 291 1.1 mrg { 292 1.1 mrg return __builtin_loongson_pextrh_u (s, field); 293 1.1 mrg } 294 1.1 mrg 295 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 296 1.1 mrg pextrh_s (int16x4_t s, int field /* 0--3. */) 297 1.1 mrg { 298 1.1 mrg return __builtin_loongson_pextrh_s (s, field); 299 1.1 mrg } 300 1.1 mrg 301 1.1 mrg /* Insert halfword. */ 302 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 303 1.1 mrg pinsrh_0_u (uint16x4_t s, uint16x4_t t) 304 1.1 mrg { 305 1.1 mrg return __builtin_loongson_pinsrh_0_u (s, t); 306 1.1 mrg } 307 1.1 mrg 308 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 309 1.1 mrg pinsrh_1_u (uint16x4_t s, uint16x4_t t) 310 1.1 mrg { 311 1.1 mrg return __builtin_loongson_pinsrh_1_u (s, t); 312 1.1 mrg } 313 1.1 mrg 314 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 315 1.1 mrg pinsrh_2_u (uint16x4_t s, uint16x4_t t) 316 1.1 mrg { 317 1.1 mrg return __builtin_loongson_pinsrh_2_u (s, t); 318 1.1 mrg } 319 1.1 mrg 320 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 321 1.1 mrg pinsrh_3_u (uint16x4_t s, uint16x4_t t) 322 1.1 mrg { 323 1.1 mrg return __builtin_loongson_pinsrh_3_u (s, t); 324 1.1 mrg } 325 1.1 mrg 326 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 327 1.1 mrg pinsrh_0_s (int16x4_t s, int16x4_t t) 328 1.1 mrg { 329 1.1 mrg return __builtin_loongson_pinsrh_0_s (s, t); 330 1.1 mrg } 331 1.1 mrg 332 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 333 1.1 mrg pinsrh_1_s (int16x4_t s, int16x4_t t) 334 1.1 mrg { 335 1.1 mrg return __builtin_loongson_pinsrh_1_s (s, t); 336 1.1 mrg } 337 1.1 mrg 338 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 339 1.1 mrg pinsrh_2_s (int16x4_t s, int16x4_t t) 340 1.1 mrg { 341 1.1 mrg return __builtin_loongson_pinsrh_2_s (s, t); 342 1.1 mrg } 343 1.1 mrg 344 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 345 1.1 mrg pinsrh_3_s (int16x4_t s, int16x4_t t) 346 1.1 mrg { 347 1.1 mrg return __builtin_loongson_pinsrh_3_s (s, t); 348 1.1 mrg } 349 1.1 mrg 350 1.1 mrg /* Multiply and add. */ 351 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 352 1.1 mrg pmaddhw (int16x4_t s, int16x4_t t) 353 1.1 mrg { 354 1.1 mrg return __builtin_loongson_pmaddhw (s, t); 355 1.1 mrg } 356 1.1 mrg 357 1.1 mrg /* Maximum of signed halfwords. */ 358 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 359 1.1 mrg pmaxsh (int16x4_t s, int16x4_t t) 360 1.1 mrg { 361 1.1 mrg return __builtin_loongson_pmaxsh (s, t); 362 1.1 mrg } 363 1.1 mrg 364 1.1 mrg /* Maximum of unsigned bytes. */ 365 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 366 1.1 mrg pmaxub (uint8x8_t s, uint8x8_t t) 367 1.1 mrg { 368 1.1 mrg return __builtin_loongson_pmaxub (s, t); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg /* Minimum of signed halfwords. */ 372 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 373 1.1 mrg pminsh (int16x4_t s, int16x4_t t) 374 1.1 mrg { 375 1.1 mrg return __builtin_loongson_pminsh (s, t); 376 1.1 mrg } 377 1.1 mrg 378 1.1 mrg /* Minimum of unsigned bytes. */ 379 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 380 1.1 mrg pminub (uint8x8_t s, uint8x8_t t) 381 1.1 mrg { 382 1.1 mrg return __builtin_loongson_pminub (s, t); 383 1.1 mrg } 384 1.1 mrg 385 1.1 mrg /* Move byte mask. */ 386 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 387 1.1 mrg pmovmskb_u (uint8x8_t s) 388 1.1 mrg { 389 1.1 mrg return __builtin_loongson_pmovmskb_u (s); 390 1.1 mrg } 391 1.1 mrg 392 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 393 1.1 mrg pmovmskb_s (int8x8_t s) 394 1.1 mrg { 395 1.1 mrg return __builtin_loongson_pmovmskb_s (s); 396 1.1 mrg } 397 1.1 mrg 398 1.1 mrg /* Multiply unsigned integers and store high result. */ 399 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 400 1.1 mrg pmulhuh (uint16x4_t s, uint16x4_t t) 401 1.1 mrg { 402 1.1 mrg return __builtin_loongson_pmulhuh (s, t); 403 1.1 mrg } 404 1.1 mrg 405 1.1 mrg /* Multiply signed integers and store high result. */ 406 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 407 1.1 mrg pmulhh (int16x4_t s, int16x4_t t) 408 1.1 mrg { 409 1.1 mrg return __builtin_loongson_pmulhh (s, t); 410 1.1 mrg } 411 1.1 mrg 412 1.1 mrg /* Multiply signed integers and store low result. */ 413 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 414 1.1 mrg pmullh (int16x4_t s, int16x4_t t) 415 1.1 mrg { 416 1.1 mrg return __builtin_loongson_pmullh (s, t); 417 1.1 mrg } 418 1.1 mrg 419 1.1 mrg /* Multiply unsigned word integers. */ 420 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 421 1.1 mrg pmuluw (uint32x2_t s, uint32x2_t t) 422 1.1 mrg { 423 1.1 mrg return __builtin_loongson_pmuluw (s, t); 424 1.1 mrg } 425 1.1 mrg 426 1.1 mrg /* Absolute difference. */ 427 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 428 1.1 mrg pasubub (uint8x8_t s, uint8x8_t t) 429 1.1 mrg { 430 1.1 mrg return __builtin_loongson_pasubub (s, t); 431 1.1 mrg } 432 1.1 mrg 433 1.1 mrg /* Sum of unsigned byte integers. */ 434 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 435 1.1 mrg biadd (uint8x8_t s) 436 1.1 mrg { 437 1.1 mrg return __builtin_loongson_biadd (s); 438 1.1 mrg } 439 1.1 mrg 440 1.1 mrg /* Sum of absolute differences. 441 1.1 mrg Note that this intrinsic expands into two machine instructions: 442 1.1 mrg PASUBUB followed by BIADD. */ 443 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 444 1.1 mrg psadbh (uint8x8_t s, uint8x8_t t) 445 1.1 mrg { 446 1.1 mrg return __builtin_loongson_psadbh (s, t); 447 1.1 mrg } 448 1.1 mrg 449 1.1 mrg /* Shuffle halfwords. */ 450 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 451 1.1 mrg pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) 452 1.1 mrg { 453 1.1 mrg return __builtin_loongson_pshufh_u (s, order); 454 1.1 mrg } 455 1.1 mrg 456 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 457 1.1 mrg pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) 458 1.1 mrg { 459 1.1 mrg return __builtin_loongson_pshufh_s (s, order); 460 1.1 mrg } 461 1.1 mrg 462 1.1 mrg /* Shift left logical. */ 463 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 464 1.1 mrg psllh_u (uint16x4_t s, uint8_t amount) 465 1.1 mrg { 466 1.1 mrg return __builtin_loongson_psllh_u (s, amount); 467 1.1 mrg } 468 1.1 mrg 469 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 470 1.1 mrg psllh_s (int16x4_t s, uint8_t amount) 471 1.1 mrg { 472 1.1 mrg return __builtin_loongson_psllh_s (s, amount); 473 1.1 mrg } 474 1.1 mrg 475 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 476 1.1 mrg psllw_u (uint32x2_t s, uint8_t amount) 477 1.1 mrg { 478 1.1 mrg return __builtin_loongson_psllw_u (s, amount); 479 1.1 mrg } 480 1.1 mrg 481 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 482 1.1 mrg psllw_s (int32x2_t s, uint8_t amount) 483 1.1 mrg { 484 1.1 mrg return __builtin_loongson_psllw_s (s, amount); 485 1.1 mrg } 486 1.1 mrg 487 1.1 mrg /* Shift right logical. */ 488 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 489 1.1 mrg psrlh_u (uint16x4_t s, uint8_t amount) 490 1.1 mrg { 491 1.1 mrg return __builtin_loongson_psrlh_u (s, amount); 492 1.1 mrg } 493 1.1 mrg 494 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 495 1.1 mrg psrlh_s (int16x4_t s, uint8_t amount) 496 1.1 mrg { 497 1.1 mrg return __builtin_loongson_psrlh_s (s, amount); 498 1.1 mrg } 499 1.1 mrg 500 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 501 1.1 mrg psrlw_u (uint32x2_t s, uint8_t amount) 502 1.1 mrg { 503 1.1 mrg return __builtin_loongson_psrlw_u (s, amount); 504 1.1 mrg } 505 1.1 mrg 506 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 507 1.1 mrg psrlw_s (int32x2_t s, uint8_t amount) 508 1.1 mrg { 509 1.1 mrg return __builtin_loongson_psrlw_s (s, amount); 510 1.1 mrg } 511 1.1 mrg 512 1.1 mrg /* Shift right arithmetic. */ 513 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 514 1.1 mrg psrah_u (uint16x4_t s, uint8_t amount) 515 1.1 mrg { 516 1.1 mrg return __builtin_loongson_psrah_u (s, amount); 517 1.1 mrg } 518 1.1 mrg 519 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 520 1.1 mrg psrah_s (int16x4_t s, uint8_t amount) 521 1.1 mrg { 522 1.1 mrg return __builtin_loongson_psrah_s (s, amount); 523 1.1 mrg } 524 1.1 mrg 525 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 526 1.1 mrg psraw_u (uint32x2_t s, uint8_t amount) 527 1.1 mrg { 528 1.1 mrg return __builtin_loongson_psraw_u (s, amount); 529 1.1 mrg } 530 1.1 mrg 531 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 532 1.1 mrg psraw_s (int32x2_t s, uint8_t amount) 533 1.1 mrg { 534 1.1 mrg return __builtin_loongson_psraw_s (s, amount); 535 1.1 mrg } 536 1.1 mrg 537 1.1 mrg /* Vector subtraction, treating overflow by wraparound. */ 538 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 539 1.1 mrg psubw_u (uint32x2_t s, uint32x2_t t) 540 1.1 mrg { 541 1.1 mrg return __builtin_loongson_psubw_u (s, t); 542 1.1 mrg } 543 1.1 mrg 544 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 545 1.1 mrg psubh_u (uint16x4_t s, uint16x4_t t) 546 1.1 mrg { 547 1.1 mrg return __builtin_loongson_psubh_u (s, t); 548 1.1 mrg } 549 1.1 mrg 550 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 551 1.1 mrg psubb_u (uint8x8_t s, uint8x8_t t) 552 1.1 mrg { 553 1.1 mrg return __builtin_loongson_psubb_u (s, t); 554 1.1 mrg } 555 1.1 mrg 556 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 557 1.1 mrg psubw_s (int32x2_t s, int32x2_t t) 558 1.1 mrg { 559 1.1 mrg return __builtin_loongson_psubw_s (s, t); 560 1.1 mrg } 561 1.1 mrg 562 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 563 1.1 mrg psubh_s (int16x4_t s, int16x4_t t) 564 1.1 mrg { 565 1.1 mrg return __builtin_loongson_psubh_s (s, t); 566 1.1 mrg } 567 1.1 mrg 568 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 569 1.1 mrg psubb_s (int8x8_t s, int8x8_t t) 570 1.1 mrg { 571 1.1 mrg return __builtin_loongson_psubb_s (s, t); 572 1.1 mrg } 573 1.1 mrg 574 1.1 mrg /* Subtraction of doubleword integers, treating overflow by wraparound. */ 575 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) 576 1.1 mrg psubd_u (uint64_t s, uint64_t t) 577 1.1 mrg { 578 1.1 mrg return __builtin_loongson_psubd_u (s, t); 579 1.1 mrg } 580 1.1 mrg 581 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__)) 582 1.1 mrg psubd_s (int64_t s, int64_t t) 583 1.1 mrg { 584 1.1 mrg return __builtin_loongson_psubd_s (s, t); 585 1.1 mrg } 586 1.1 mrg 587 1.1 mrg /* Vector subtraction, treating overflow by signed saturation. */ 588 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 589 1.1 mrg psubsh (int16x4_t s, int16x4_t t) 590 1.1 mrg { 591 1.1 mrg return __builtin_loongson_psubsh (s, t); 592 1.1 mrg } 593 1.1 mrg 594 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 595 1.1 mrg psubsb (int8x8_t s, int8x8_t t) 596 1.1 mrg { 597 1.1 mrg return __builtin_loongson_psubsb (s, t); 598 1.1 mrg } 599 1.1 mrg 600 1.1 mrg /* Vector subtraction, treating overflow by unsigned saturation. */ 601 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 602 1.1 mrg psubush (uint16x4_t s, uint16x4_t t) 603 1.1 mrg { 604 1.1 mrg return __builtin_loongson_psubush (s, t); 605 1.1 mrg } 606 1.1 mrg 607 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 608 1.1 mrg psubusb (uint8x8_t s, uint8x8_t t) 609 1.1 mrg { 610 1.1 mrg return __builtin_loongson_psubusb (s, t); 611 1.1 mrg } 612 1.1 mrg 613 1.1 mrg /* Unpack high data. */ 614 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 615 1.1 mrg punpckhwd_u (uint32x2_t s, uint32x2_t t) 616 1.1 mrg { 617 1.1 mrg return __builtin_loongson_punpckhwd_u (s, t); 618 1.1 mrg } 619 1.1 mrg 620 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 621 1.1 mrg punpckhhw_u (uint16x4_t s, uint16x4_t t) 622 1.1 mrg { 623 1.1 mrg return __builtin_loongson_punpckhhw_u (s, t); 624 1.1 mrg } 625 1.1 mrg 626 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 627 1.1 mrg punpckhbh_u (uint8x8_t s, uint8x8_t t) 628 1.1 mrg { 629 1.1 mrg return __builtin_loongson_punpckhbh_u (s, t); 630 1.1 mrg } 631 1.1 mrg 632 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 633 1.1 mrg punpckhwd_s (int32x2_t s, int32x2_t t) 634 1.1 mrg { 635 1.1 mrg return __builtin_loongson_punpckhwd_s (s, t); 636 1.1 mrg } 637 1.1 mrg 638 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 639 1.1 mrg punpckhhw_s (int16x4_t s, int16x4_t t) 640 1.1 mrg { 641 1.1 mrg return __builtin_loongson_punpckhhw_s (s, t); 642 1.1 mrg } 643 1.1 mrg 644 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 645 1.1 mrg punpckhbh_s (int8x8_t s, int8x8_t t) 646 1.1 mrg { 647 1.1 mrg return __builtin_loongson_punpckhbh_s (s, t); 648 1.1 mrg } 649 1.1 mrg 650 1.1 mrg /* Unpack low data. */ 651 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) 652 1.1 mrg punpcklwd_u (uint32x2_t s, uint32x2_t t) 653 1.1 mrg { 654 1.1 mrg return __builtin_loongson_punpcklwd_u (s, t); 655 1.1 mrg } 656 1.1 mrg 657 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) 658 1.1 mrg punpcklhw_u (uint16x4_t s, uint16x4_t t) 659 1.1 mrg { 660 1.1 mrg return __builtin_loongson_punpcklhw_u (s, t); 661 1.1 mrg } 662 1.1 mrg 663 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) 664 1.1 mrg punpcklbh_u (uint8x8_t s, uint8x8_t t) 665 1.1 mrg { 666 1.1 mrg return __builtin_loongson_punpcklbh_u (s, t); 667 1.1 mrg } 668 1.1 mrg 669 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) 670 1.1 mrg punpcklwd_s (int32x2_t s, int32x2_t t) 671 1.1 mrg { 672 1.1 mrg return __builtin_loongson_punpcklwd_s (s, t); 673 1.1 mrg } 674 1.1 mrg 675 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) 676 1.1 mrg punpcklhw_s (int16x4_t s, int16x4_t t) 677 1.1 mrg { 678 1.1 mrg return __builtin_loongson_punpcklhw_s (s, t); 679 1.1 mrg } 680 1.1 mrg 681 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) 682 1.1 mrg punpcklbh_s (int8x8_t s, int8x8_t t) 683 1.1 mrg { 684 1.1 mrg return __builtin_loongson_punpcklbh_s (s, t); 685 1.1 mrg } 686 1.1 mrg 687 1.1 mrg #ifdef __cplusplus 688 1.1 mrg } 689 1.1 mrg #endif 690 1.1 mrg 691 1.1 mrg #endif 692