loongson-mmiintrin.h revision 1.1.1.3 1 1.1 mrg /* Intrinsics for Loongson MultiMedia extension Instructions operations.
2 1.1 mrg
3 1.1.1.3 mrg Copyright (C) 2008-2022 Free Software Foundation, Inc.
4 1.1 mrg Contributed by CodeSourcery.
5 1.1 mrg
6 1.1 mrg This file is part of GCC.
7 1.1 mrg
8 1.1 mrg GCC is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published
10 1.1 mrg by the Free Software Foundation; either version 3, or (at your
11 1.1 mrg option) any later version.
12 1.1 mrg
13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
19 1.1 mrg permissions described in the GCC Runtime Library Exception, version
20 1.1 mrg 3.1, as published by the Free Software Foundation.
21 1.1 mrg
22 1.1 mrg You should have received a copy of the GNU General Public License and
23 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
24 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 1.1 mrg <http://www.gnu.org/licenses/>. */
26 1.1 mrg
27 1.1 mrg #ifndef _GCC_LOONGSON_MMIINTRIN_H
28 1.1 mrg #define _GCC_LOONGSON_MMIINTRIN_H
29 1.1 mrg
30 1.1 mrg #if !defined(__mips_loongson_mmi)
31 1.1 mrg # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
32 1.1 mrg loongson-mmiintrin.h
33 1.1 mrg #endif
34 1.1 mrg
35 1.1 mrg #ifdef __cplusplus
36 1.1 mrg extern "C" {
37 1.1 mrg #endif
38 1.1 mrg
39 1.1 mrg #include <stdint.h>
40 1.1 mrg
41 1.1 mrg /* Vectors of unsigned bytes, halfwords and words. */
42 1.1 mrg typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
43 1.1 mrg typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
44 1.1 mrg typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
45 1.1 mrg
46 1.1 mrg /* Vectors of signed bytes, halfwords and words. */
47 1.1 mrg typedef int8_t int8x8_t __attribute__((vector_size (8)));
48 1.1 mrg typedef int16_t int16x4_t __attribute__((vector_size (8)));
49 1.1 mrg typedef int32_t int32x2_t __attribute__((vector_size (8)));
50 1.1 mrg
51 1.1 mrg /* SIMD intrinsics.
52 1.1 mrg Unless otherwise noted, calls to the functions below will expand into
53 1.1 mrg precisely one machine instruction, modulo any moves required to
54 1.1 mrg satisfy register allocation constraints. */
55 1.1 mrg
56 1.1 mrg /* Pack with signed saturation. */
57 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
58 1.1 mrg packsswh (int32x2_t s, int32x2_t t)
59 1.1 mrg {
60 1.1 mrg return __builtin_loongson_packsswh (s, t);
61 1.1 mrg }
62 1.1 mrg
63 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
64 1.1 mrg packsshb (int16x4_t s, int16x4_t t)
65 1.1 mrg {
66 1.1 mrg return __builtin_loongson_packsshb (s, t);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg /* Pack with unsigned saturation. */
70 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
71 1.1 mrg packushb (uint16x4_t s, uint16x4_t t)
72 1.1 mrg {
73 1.1 mrg return __builtin_loongson_packushb (s, t);
74 1.1 mrg }
75 1.1 mrg
76 1.1 mrg /* Vector addition, treating overflow by wraparound. */
77 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
78 1.1 mrg paddw_u (uint32x2_t s, uint32x2_t t)
79 1.1 mrg {
80 1.1 mrg return __builtin_loongson_paddw_u (s, t);
81 1.1 mrg }
82 1.1 mrg
83 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
84 1.1 mrg paddh_u (uint16x4_t s, uint16x4_t t)
85 1.1 mrg {
86 1.1 mrg return __builtin_loongson_paddh_u (s, t);
87 1.1 mrg }
88 1.1 mrg
89 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
90 1.1 mrg paddb_u (uint8x8_t s, uint8x8_t t)
91 1.1 mrg {
92 1.1 mrg return __builtin_loongson_paddb_u (s, t);
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
96 1.1 mrg paddw_s (int32x2_t s, int32x2_t t)
97 1.1 mrg {
98 1.1 mrg return __builtin_loongson_paddw_s (s, t);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
102 1.1 mrg paddh_s (int16x4_t s, int16x4_t t)
103 1.1 mrg {
104 1.1 mrg return __builtin_loongson_paddh_s (s, t);
105 1.1 mrg }
106 1.1 mrg
107 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
108 1.1 mrg paddb_s (int8x8_t s, int8x8_t t)
109 1.1 mrg {
110 1.1 mrg return __builtin_loongson_paddb_s (s, t);
111 1.1 mrg }
112 1.1 mrg
113 1.1 mrg /* Addition of doubleword integers, treating overflow by wraparound. */
114 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
115 1.1 mrg paddd_u (uint64_t s, uint64_t t)
116 1.1 mrg {
117 1.1 mrg return __builtin_loongson_paddd_u (s, t);
118 1.1 mrg }
119 1.1 mrg
120 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
121 1.1 mrg paddd_s (int64_t s, int64_t t)
122 1.1 mrg {
123 1.1 mrg return __builtin_loongson_paddd_s (s, t);
124 1.1 mrg }
125 1.1 mrg
126 1.1 mrg /* Vector addition, treating overflow by signed saturation. */
127 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
128 1.1 mrg paddsh (int16x4_t s, int16x4_t t)
129 1.1 mrg {
130 1.1 mrg return __builtin_loongson_paddsh (s, t);
131 1.1 mrg }
132 1.1 mrg
133 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
134 1.1 mrg paddsb (int8x8_t s, int8x8_t t)
135 1.1 mrg {
136 1.1 mrg return __builtin_loongson_paddsb (s, t);
137 1.1 mrg }
138 1.1 mrg
139 1.1 mrg /* Vector addition, treating overflow by unsigned saturation. */
140 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
141 1.1 mrg paddush (uint16x4_t s, uint16x4_t t)
142 1.1 mrg {
143 1.1 mrg return __builtin_loongson_paddush (s, t);
144 1.1 mrg }
145 1.1 mrg
146 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
147 1.1 mrg paddusb (uint8x8_t s, uint8x8_t t)
148 1.1 mrg {
149 1.1 mrg return __builtin_loongson_paddusb (s, t);
150 1.1 mrg }
151 1.1 mrg
152 1.1 mrg /* Logical AND NOT. */
153 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
154 1.1 mrg pandn_ud (uint64_t s, uint64_t t)
155 1.1 mrg {
156 1.1 mrg return __builtin_loongson_pandn_ud (s, t);
157 1.1 mrg }
158 1.1 mrg
159 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
160 1.1 mrg pandn_uw (uint32x2_t s, uint32x2_t t)
161 1.1 mrg {
162 1.1 mrg return __builtin_loongson_pandn_uw (s, t);
163 1.1 mrg }
164 1.1 mrg
165 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
166 1.1 mrg pandn_uh (uint16x4_t s, uint16x4_t t)
167 1.1 mrg {
168 1.1 mrg return __builtin_loongson_pandn_uh (s, t);
169 1.1 mrg }
170 1.1 mrg
171 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
172 1.1 mrg pandn_ub (uint8x8_t s, uint8x8_t t)
173 1.1 mrg {
174 1.1 mrg return __builtin_loongson_pandn_ub (s, t);
175 1.1 mrg }
176 1.1 mrg
177 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
178 1.1 mrg pandn_sd (int64_t s, int64_t t)
179 1.1 mrg {
180 1.1 mrg return __builtin_loongson_pandn_sd (s, t);
181 1.1 mrg }
182 1.1 mrg
183 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
184 1.1 mrg pandn_sw (int32x2_t s, int32x2_t t)
185 1.1 mrg {
186 1.1 mrg return __builtin_loongson_pandn_sw (s, t);
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
190 1.1 mrg pandn_sh (int16x4_t s, int16x4_t t)
191 1.1 mrg {
192 1.1 mrg return __builtin_loongson_pandn_sh (s, t);
193 1.1 mrg }
194 1.1 mrg
195 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
196 1.1 mrg pandn_sb (int8x8_t s, int8x8_t t)
197 1.1 mrg {
198 1.1 mrg return __builtin_loongson_pandn_sb (s, t);
199 1.1 mrg }
200 1.1 mrg
201 1.1 mrg /* Average. */
202 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
203 1.1 mrg pavgh (uint16x4_t s, uint16x4_t t)
204 1.1 mrg {
205 1.1 mrg return __builtin_loongson_pavgh (s, t);
206 1.1 mrg }
207 1.1 mrg
208 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
209 1.1 mrg pavgb (uint8x8_t s, uint8x8_t t)
210 1.1 mrg {
211 1.1 mrg return __builtin_loongson_pavgb (s, t);
212 1.1 mrg }
213 1.1 mrg
214 1.1 mrg /* Equality test. */
215 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
216 1.1 mrg pcmpeqw_u (uint32x2_t s, uint32x2_t t)
217 1.1 mrg {
218 1.1 mrg return __builtin_loongson_pcmpeqw_u (s, t);
219 1.1 mrg }
220 1.1 mrg
221 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
222 1.1 mrg pcmpeqh_u (uint16x4_t s, uint16x4_t t)
223 1.1 mrg {
224 1.1 mrg return __builtin_loongson_pcmpeqh_u (s, t);
225 1.1 mrg }
226 1.1 mrg
227 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
228 1.1 mrg pcmpeqb_u (uint8x8_t s, uint8x8_t t)
229 1.1 mrg {
230 1.1 mrg return __builtin_loongson_pcmpeqb_u (s, t);
231 1.1 mrg }
232 1.1 mrg
233 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
234 1.1 mrg pcmpeqw_s (int32x2_t s, int32x2_t t)
235 1.1 mrg {
236 1.1 mrg return __builtin_loongson_pcmpeqw_s (s, t);
237 1.1 mrg }
238 1.1 mrg
239 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
240 1.1 mrg pcmpeqh_s (int16x4_t s, int16x4_t t)
241 1.1 mrg {
242 1.1 mrg return __builtin_loongson_pcmpeqh_s (s, t);
243 1.1 mrg }
244 1.1 mrg
245 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
246 1.1 mrg pcmpeqb_s (int8x8_t s, int8x8_t t)
247 1.1 mrg {
248 1.1 mrg return __builtin_loongson_pcmpeqb_s (s, t);
249 1.1 mrg }
250 1.1 mrg
251 1.1 mrg /* Greater-than test. */
252 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
253 1.1 mrg pcmpgtw_u (uint32x2_t s, uint32x2_t t)
254 1.1 mrg {
255 1.1 mrg return __builtin_loongson_pcmpgtw_u (s, t);
256 1.1 mrg }
257 1.1 mrg
258 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
259 1.1 mrg pcmpgth_u (uint16x4_t s, uint16x4_t t)
260 1.1 mrg {
261 1.1 mrg return __builtin_loongson_pcmpgth_u (s, t);
262 1.1 mrg }
263 1.1 mrg
264 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
265 1.1 mrg pcmpgtb_u (uint8x8_t s, uint8x8_t t)
266 1.1 mrg {
267 1.1 mrg return __builtin_loongson_pcmpgtb_u (s, t);
268 1.1 mrg }
269 1.1 mrg
270 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
271 1.1 mrg pcmpgtw_s (int32x2_t s, int32x2_t t)
272 1.1 mrg {
273 1.1 mrg return __builtin_loongson_pcmpgtw_s (s, t);
274 1.1 mrg }
275 1.1 mrg
276 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
277 1.1 mrg pcmpgth_s (int16x4_t s, int16x4_t t)
278 1.1 mrg {
279 1.1 mrg return __builtin_loongson_pcmpgth_s (s, t);
280 1.1 mrg }
281 1.1 mrg
282 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
283 1.1 mrg pcmpgtb_s (int8x8_t s, int8x8_t t)
284 1.1 mrg {
285 1.1 mrg return __builtin_loongson_pcmpgtb_s (s, t);
286 1.1 mrg }
287 1.1 mrg
288 1.1 mrg /* Extract halfword. */
289 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
290 1.1 mrg pextrh_u (uint16x4_t s, int field /* 0--3. */)
291 1.1 mrg {
292 1.1 mrg return __builtin_loongson_pextrh_u (s, field);
293 1.1 mrg }
294 1.1 mrg
295 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
296 1.1 mrg pextrh_s (int16x4_t s, int field /* 0--3. */)
297 1.1 mrg {
298 1.1 mrg return __builtin_loongson_pextrh_s (s, field);
299 1.1 mrg }
300 1.1 mrg
301 1.1 mrg /* Insert halfword. */
302 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
303 1.1 mrg pinsrh_0_u (uint16x4_t s, uint16x4_t t)
304 1.1 mrg {
305 1.1 mrg return __builtin_loongson_pinsrh_0_u (s, t);
306 1.1 mrg }
307 1.1 mrg
308 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
309 1.1 mrg pinsrh_1_u (uint16x4_t s, uint16x4_t t)
310 1.1 mrg {
311 1.1 mrg return __builtin_loongson_pinsrh_1_u (s, t);
312 1.1 mrg }
313 1.1 mrg
314 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
315 1.1 mrg pinsrh_2_u (uint16x4_t s, uint16x4_t t)
316 1.1 mrg {
317 1.1 mrg return __builtin_loongson_pinsrh_2_u (s, t);
318 1.1 mrg }
319 1.1 mrg
320 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
321 1.1 mrg pinsrh_3_u (uint16x4_t s, uint16x4_t t)
322 1.1 mrg {
323 1.1 mrg return __builtin_loongson_pinsrh_3_u (s, t);
324 1.1 mrg }
325 1.1 mrg
326 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
327 1.1 mrg pinsrh_0_s (int16x4_t s, int16x4_t t)
328 1.1 mrg {
329 1.1 mrg return __builtin_loongson_pinsrh_0_s (s, t);
330 1.1 mrg }
331 1.1 mrg
332 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
333 1.1 mrg pinsrh_1_s (int16x4_t s, int16x4_t t)
334 1.1 mrg {
335 1.1 mrg return __builtin_loongson_pinsrh_1_s (s, t);
336 1.1 mrg }
337 1.1 mrg
338 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
339 1.1 mrg pinsrh_2_s (int16x4_t s, int16x4_t t)
340 1.1 mrg {
341 1.1 mrg return __builtin_loongson_pinsrh_2_s (s, t);
342 1.1 mrg }
343 1.1 mrg
344 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
345 1.1 mrg pinsrh_3_s (int16x4_t s, int16x4_t t)
346 1.1 mrg {
347 1.1 mrg return __builtin_loongson_pinsrh_3_s (s, t);
348 1.1 mrg }
349 1.1 mrg
350 1.1 mrg /* Multiply and add. */
351 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
352 1.1 mrg pmaddhw (int16x4_t s, int16x4_t t)
353 1.1 mrg {
354 1.1 mrg return __builtin_loongson_pmaddhw (s, t);
355 1.1 mrg }
356 1.1 mrg
357 1.1 mrg /* Maximum of signed halfwords. */
358 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
359 1.1 mrg pmaxsh (int16x4_t s, int16x4_t t)
360 1.1 mrg {
361 1.1 mrg return __builtin_loongson_pmaxsh (s, t);
362 1.1 mrg }
363 1.1 mrg
364 1.1 mrg /* Maximum of unsigned bytes. */
365 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
366 1.1 mrg pmaxub (uint8x8_t s, uint8x8_t t)
367 1.1 mrg {
368 1.1 mrg return __builtin_loongson_pmaxub (s, t);
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg /* Minimum of signed halfwords. */
372 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
373 1.1 mrg pminsh (int16x4_t s, int16x4_t t)
374 1.1 mrg {
375 1.1 mrg return __builtin_loongson_pminsh (s, t);
376 1.1 mrg }
377 1.1 mrg
378 1.1 mrg /* Minimum of unsigned bytes. */
379 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
380 1.1 mrg pminub (uint8x8_t s, uint8x8_t t)
381 1.1 mrg {
382 1.1 mrg return __builtin_loongson_pminub (s, t);
383 1.1 mrg }
384 1.1 mrg
385 1.1 mrg /* Move byte mask. */
386 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
387 1.1 mrg pmovmskb_u (uint8x8_t s)
388 1.1 mrg {
389 1.1 mrg return __builtin_loongson_pmovmskb_u (s);
390 1.1 mrg }
391 1.1 mrg
392 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
393 1.1 mrg pmovmskb_s (int8x8_t s)
394 1.1 mrg {
395 1.1 mrg return __builtin_loongson_pmovmskb_s (s);
396 1.1 mrg }
397 1.1 mrg
398 1.1 mrg /* Multiply unsigned integers and store high result. */
399 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
400 1.1 mrg pmulhuh (uint16x4_t s, uint16x4_t t)
401 1.1 mrg {
402 1.1 mrg return __builtin_loongson_pmulhuh (s, t);
403 1.1 mrg }
404 1.1 mrg
405 1.1 mrg /* Multiply signed integers and store high result. */
406 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
407 1.1 mrg pmulhh (int16x4_t s, int16x4_t t)
408 1.1 mrg {
409 1.1 mrg return __builtin_loongson_pmulhh (s, t);
410 1.1 mrg }
411 1.1 mrg
412 1.1 mrg /* Multiply signed integers and store low result. */
413 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
414 1.1 mrg pmullh (int16x4_t s, int16x4_t t)
415 1.1 mrg {
416 1.1 mrg return __builtin_loongson_pmullh (s, t);
417 1.1 mrg }
418 1.1 mrg
419 1.1 mrg /* Multiply unsigned word integers. */
420 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
421 1.1 mrg pmuluw (uint32x2_t s, uint32x2_t t)
422 1.1 mrg {
423 1.1 mrg return __builtin_loongson_pmuluw (s, t);
424 1.1 mrg }
425 1.1 mrg
426 1.1 mrg /* Absolute difference. */
427 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
428 1.1 mrg pasubub (uint8x8_t s, uint8x8_t t)
429 1.1 mrg {
430 1.1 mrg return __builtin_loongson_pasubub (s, t);
431 1.1 mrg }
432 1.1 mrg
433 1.1 mrg /* Sum of unsigned byte integers. */
434 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
435 1.1 mrg biadd (uint8x8_t s)
436 1.1 mrg {
437 1.1 mrg return __builtin_loongson_biadd (s);
438 1.1 mrg }
439 1.1 mrg
440 1.1 mrg /* Sum of absolute differences.
441 1.1 mrg Note that this intrinsic expands into two machine instructions:
442 1.1 mrg PASUBUB followed by BIADD. */
443 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
444 1.1 mrg psadbh (uint8x8_t s, uint8x8_t t)
445 1.1 mrg {
446 1.1 mrg return __builtin_loongson_psadbh (s, t);
447 1.1 mrg }
448 1.1 mrg
449 1.1 mrg /* Shuffle halfwords. */
450 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
451 1.1 mrg pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
452 1.1 mrg {
453 1.1 mrg return __builtin_loongson_pshufh_u (s, order);
454 1.1 mrg }
455 1.1 mrg
456 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
457 1.1 mrg pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
458 1.1 mrg {
459 1.1 mrg return __builtin_loongson_pshufh_s (s, order);
460 1.1 mrg }
461 1.1 mrg
462 1.1 mrg /* Shift left logical. */
463 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
464 1.1 mrg psllh_u (uint16x4_t s, uint8_t amount)
465 1.1 mrg {
466 1.1 mrg return __builtin_loongson_psllh_u (s, amount);
467 1.1 mrg }
468 1.1 mrg
469 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
470 1.1 mrg psllh_s (int16x4_t s, uint8_t amount)
471 1.1 mrg {
472 1.1 mrg return __builtin_loongson_psllh_s (s, amount);
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
476 1.1 mrg psllw_u (uint32x2_t s, uint8_t amount)
477 1.1 mrg {
478 1.1 mrg return __builtin_loongson_psllw_u (s, amount);
479 1.1 mrg }
480 1.1 mrg
481 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
482 1.1 mrg psllw_s (int32x2_t s, uint8_t amount)
483 1.1 mrg {
484 1.1 mrg return __builtin_loongson_psllw_s (s, amount);
485 1.1 mrg }
486 1.1 mrg
487 1.1 mrg /* Shift right logical. */
488 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
489 1.1 mrg psrlh_u (uint16x4_t s, uint8_t amount)
490 1.1 mrg {
491 1.1 mrg return __builtin_loongson_psrlh_u (s, amount);
492 1.1 mrg }
493 1.1 mrg
494 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
495 1.1 mrg psrlh_s (int16x4_t s, uint8_t amount)
496 1.1 mrg {
497 1.1 mrg return __builtin_loongson_psrlh_s (s, amount);
498 1.1 mrg }
499 1.1 mrg
500 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
501 1.1 mrg psrlw_u (uint32x2_t s, uint8_t amount)
502 1.1 mrg {
503 1.1 mrg return __builtin_loongson_psrlw_u (s, amount);
504 1.1 mrg }
505 1.1 mrg
506 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
507 1.1 mrg psrlw_s (int32x2_t s, uint8_t amount)
508 1.1 mrg {
509 1.1 mrg return __builtin_loongson_psrlw_s (s, amount);
510 1.1 mrg }
511 1.1 mrg
512 1.1 mrg /* Shift right arithmetic. */
513 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
514 1.1 mrg psrah_u (uint16x4_t s, uint8_t amount)
515 1.1 mrg {
516 1.1 mrg return __builtin_loongson_psrah_u (s, amount);
517 1.1 mrg }
518 1.1 mrg
519 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
520 1.1 mrg psrah_s (int16x4_t s, uint8_t amount)
521 1.1 mrg {
522 1.1 mrg return __builtin_loongson_psrah_s (s, amount);
523 1.1 mrg }
524 1.1 mrg
525 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
526 1.1 mrg psraw_u (uint32x2_t s, uint8_t amount)
527 1.1 mrg {
528 1.1 mrg return __builtin_loongson_psraw_u (s, amount);
529 1.1 mrg }
530 1.1 mrg
531 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
532 1.1 mrg psraw_s (int32x2_t s, uint8_t amount)
533 1.1 mrg {
534 1.1 mrg return __builtin_loongson_psraw_s (s, amount);
535 1.1 mrg }
536 1.1 mrg
537 1.1 mrg /* Vector subtraction, treating overflow by wraparound. */
538 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
539 1.1 mrg psubw_u (uint32x2_t s, uint32x2_t t)
540 1.1 mrg {
541 1.1 mrg return __builtin_loongson_psubw_u (s, t);
542 1.1 mrg }
543 1.1 mrg
544 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
545 1.1 mrg psubh_u (uint16x4_t s, uint16x4_t t)
546 1.1 mrg {
547 1.1 mrg return __builtin_loongson_psubh_u (s, t);
548 1.1 mrg }
549 1.1 mrg
550 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
551 1.1 mrg psubb_u (uint8x8_t s, uint8x8_t t)
552 1.1 mrg {
553 1.1 mrg return __builtin_loongson_psubb_u (s, t);
554 1.1 mrg }
555 1.1 mrg
556 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
557 1.1 mrg psubw_s (int32x2_t s, int32x2_t t)
558 1.1 mrg {
559 1.1 mrg return __builtin_loongson_psubw_s (s, t);
560 1.1 mrg }
561 1.1 mrg
562 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
563 1.1 mrg psubh_s (int16x4_t s, int16x4_t t)
564 1.1 mrg {
565 1.1 mrg return __builtin_loongson_psubh_s (s, t);
566 1.1 mrg }
567 1.1 mrg
568 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
569 1.1 mrg psubb_s (int8x8_t s, int8x8_t t)
570 1.1 mrg {
571 1.1 mrg return __builtin_loongson_psubb_s (s, t);
572 1.1 mrg }
573 1.1 mrg
574 1.1 mrg /* Subtraction of doubleword integers, treating overflow by wraparound. */
575 1.1 mrg __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
576 1.1 mrg psubd_u (uint64_t s, uint64_t t)
577 1.1 mrg {
578 1.1 mrg return __builtin_loongson_psubd_u (s, t);
579 1.1 mrg }
580 1.1 mrg
581 1.1 mrg __extension__ static __inline int64_t __attribute__ ((__always_inline__))
582 1.1 mrg psubd_s (int64_t s, int64_t t)
583 1.1 mrg {
584 1.1 mrg return __builtin_loongson_psubd_s (s, t);
585 1.1 mrg }
586 1.1 mrg
587 1.1 mrg /* Vector subtraction, treating overflow by signed saturation. */
588 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
589 1.1 mrg psubsh (int16x4_t s, int16x4_t t)
590 1.1 mrg {
591 1.1 mrg return __builtin_loongson_psubsh (s, t);
592 1.1 mrg }
593 1.1 mrg
594 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
595 1.1 mrg psubsb (int8x8_t s, int8x8_t t)
596 1.1 mrg {
597 1.1 mrg return __builtin_loongson_psubsb (s, t);
598 1.1 mrg }
599 1.1 mrg
600 1.1 mrg /* Vector subtraction, treating overflow by unsigned saturation. */
601 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
602 1.1 mrg psubush (uint16x4_t s, uint16x4_t t)
603 1.1 mrg {
604 1.1 mrg return __builtin_loongson_psubush (s, t);
605 1.1 mrg }
606 1.1 mrg
607 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
608 1.1 mrg psubusb (uint8x8_t s, uint8x8_t t)
609 1.1 mrg {
610 1.1 mrg return __builtin_loongson_psubusb (s, t);
611 1.1 mrg }
612 1.1 mrg
613 1.1 mrg /* Unpack high data. */
614 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
615 1.1 mrg punpckhwd_u (uint32x2_t s, uint32x2_t t)
616 1.1 mrg {
617 1.1 mrg return __builtin_loongson_punpckhwd_u (s, t);
618 1.1 mrg }
619 1.1 mrg
620 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
621 1.1 mrg punpckhhw_u (uint16x4_t s, uint16x4_t t)
622 1.1 mrg {
623 1.1 mrg return __builtin_loongson_punpckhhw_u (s, t);
624 1.1 mrg }
625 1.1 mrg
626 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
627 1.1 mrg punpckhbh_u (uint8x8_t s, uint8x8_t t)
628 1.1 mrg {
629 1.1 mrg return __builtin_loongson_punpckhbh_u (s, t);
630 1.1 mrg }
631 1.1 mrg
632 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
633 1.1 mrg punpckhwd_s (int32x2_t s, int32x2_t t)
634 1.1 mrg {
635 1.1 mrg return __builtin_loongson_punpckhwd_s (s, t);
636 1.1 mrg }
637 1.1 mrg
638 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
639 1.1 mrg punpckhhw_s (int16x4_t s, int16x4_t t)
640 1.1 mrg {
641 1.1 mrg return __builtin_loongson_punpckhhw_s (s, t);
642 1.1 mrg }
643 1.1 mrg
644 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
645 1.1 mrg punpckhbh_s (int8x8_t s, int8x8_t t)
646 1.1 mrg {
647 1.1 mrg return __builtin_loongson_punpckhbh_s (s, t);
648 1.1 mrg }
649 1.1 mrg
650 1.1 mrg /* Unpack low data. */
651 1.1 mrg __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
652 1.1 mrg punpcklwd_u (uint32x2_t s, uint32x2_t t)
653 1.1 mrg {
654 1.1 mrg return __builtin_loongson_punpcklwd_u (s, t);
655 1.1 mrg }
656 1.1 mrg
657 1.1 mrg __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
658 1.1 mrg punpcklhw_u (uint16x4_t s, uint16x4_t t)
659 1.1 mrg {
660 1.1 mrg return __builtin_loongson_punpcklhw_u (s, t);
661 1.1 mrg }
662 1.1 mrg
663 1.1 mrg __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
664 1.1 mrg punpcklbh_u (uint8x8_t s, uint8x8_t t)
665 1.1 mrg {
666 1.1 mrg return __builtin_loongson_punpcklbh_u (s, t);
667 1.1 mrg }
668 1.1 mrg
669 1.1 mrg __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
670 1.1 mrg punpcklwd_s (int32x2_t s, int32x2_t t)
671 1.1 mrg {
672 1.1 mrg return __builtin_loongson_punpcklwd_s (s, t);
673 1.1 mrg }
674 1.1 mrg
675 1.1 mrg __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
676 1.1 mrg punpcklhw_s (int16x4_t s, int16x4_t t)
677 1.1 mrg {
678 1.1 mrg return __builtin_loongson_punpcklhw_s (s, t);
679 1.1 mrg }
680 1.1 mrg
681 1.1 mrg __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
682 1.1 mrg punpcklbh_s (int8x8_t s, int8x8_t t)
683 1.1 mrg {
684 1.1 mrg return __builtin_loongson_punpcklbh_s (s, t);
685 1.1 mrg }
686 1.1 mrg
687 1.1 mrg #ifdef __cplusplus
688 1.1 mrg }
689 1.1 mrg #endif
690 1.1 mrg
691 1.1 mrg #endif
692