1/*-
2 * Copyright (c) 2025 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Nia Alarie.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * This should be compatible with what was shipped with SunPro.
32 *
33 * VIS Instruction Set User's Manual
34 * Sun Microsystems
35 * Part Number: 805-1394-03
36 * May 2001
37 *
38 * Version of available VIS instructions can be detected through
39 * the `machdep.vis` sysctl. A value of "0" means that such
40 * instructions are unavailable. All SPARCv9 hardware should support
41 * at least VIS 1, while VIS 2 requires UltraSPARC-III or newer.
42 *
43 * GCC needs -mvis for VIS, and -mvis2 for VIS 2. However, its
44 * builtins are incomplete and some cause problematic typing issues
45 * with Sun's API, so they're mostly avoided.
46 */
47
48#ifndef _VIS_PROTO_H
49#define _VIS_PROTO_H
50
51#ifdef __cplusplus
52extern "C" {
53#endif
54
55#include "vis_types.h"
56
57#define _VISATTR \
58	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
59
60/* 4.6.1 Arithmetic - addition and subtraction */
61
62_VISATTR
63static __inline vis_d64
64vis_fpadd16(vis_d64 r1, vis_d64 r2)
65{
66	vis_d64 out;
67
68	__asm("fpadd16 %1,%2,%0"
69	    : "=f"(out)
70	    : "f"(r1), "f"(r2));
71	return out;
72}
73
74_VISATTR
75static __inline vis_d64
76vis_fpsub16(vis_d64 r1, vis_d64 r2)
77{
78	vis_d64 out;
79
80	__asm("fpsub16 %1,%2,%0"
81	    : "=f"(out)
82	    : "f"(r1), "f"(r2));
83	return out;
84}
85
86_VISATTR
87static __inline vis_d64
88vis_fpadd32(vis_d64 r1, vis_d64 r2)
89{
90	vis_d64 out;
91
92	__asm("fpadd32 %1,%2,%0"
93	    : "=f"(out)
94	    : "f"(r1), "f"(r2));
95	return out;
96}
97
98_VISATTR
99static __inline vis_d64
100vis_fpsub32(vis_d64 r1, vis_d64 r2)
101{
102	vis_d64 out;
103
104	__asm("fpsub32 %1,%2,%0"
105	    : "=f"(out)
106	    : "f"(r1), "f"(r2));
107	return out;
108}
109
110_VISATTR
111static __inline vis_f32
112vis_fpadd16s(vis_f32 r1, vis_f32 r2)
113{
114	vis_f32 out;
115
116	__asm("fpadd16s %1,%2,%0"
117	    : "=f"(out)
118	    : "f"(r1), "f"(r2));
119	return out;
120}
121
122_VISATTR
123static __inline vis_f32
124vis_fpsub16s(vis_f32 r1, vis_f32 r2)
125{
126	vis_f32 out;
127
128	__asm("fpsub16s %1,%2,%0"
129	    : "=f"(out)
130	    : "f"(r1), "f"(r2));
131	return out;
132}
133
134_VISATTR
135static __inline vis_f32
136vis_fpadd32s(vis_f32 r1, vis_f32 r2)
137{
138	vis_f32 out;
139
140	__asm("fpadd32s %1,%2,%0"
141	    : "=f"(out)
142	    : "f"(r1), "f"(r2));
143	return out;
144}
145
146_VISATTR
147static __inline vis_f32
148vis_fpsub32s(vis_f32 r1, vis_f32 r2)
149{
150	vis_f32 out;
151
152	__asm("fpsub32s %1,%2,%0"
153	    : "=f"(out)
154	    : "f"(r1), "f"(r2));
155	return out;
156}
157
158/* 4.7 Pixel formatting - packing */
159
160_VISATTR
161static __inline vis_f32
162vis_fpack16(vis_d64 r1)
163{
164	vis_f32 out;
165
166	__asm("fpack16 %1,%0"
167	    : "=f"(out)
168	    : "f"(r1));
169	return out;
170}
171
172_VISATTR
173static __inline vis_d64
174vis_fpack32(vis_d64 r1, vis_d64 r2)
175{
176	vis_d64 out;
177
178	__asm("fpack32 %1,%2,%0"
179	    : "=f"(out)
180	    : "f"(r1), "f"(r2));
181	return out;
182}
183
184_VISATTR
185static __inline vis_f32
186vis_fpackfix(vis_d64 r1)
187{
188	vis_f32 out;
189
190	__asm("fpackfix %1,%0"
191	    : "=f"(out)
192	    : "f"(r1));
193	return out;
194}
195
196_VISATTR
197static __inline vis_d64
198vis_fexpand(vis_f32 r1)
199{
200	vis_d64 out;
201
202	__asm("fexpand %1,%0"
203	    : "=f"(out)
204	    : "f"(r1));
205	return out;
206}
207
208_VISATTR
209static __inline vis_d64
210vis_fpmerge(vis_f32 r1, vis_f32 r2)
211{
212	vis_d64 out;
213
214	__asm("fpmerge %1,%2,%0"
215	    : "=f"(out)
216	    : "f"(r1), "f"(r2));
217	return out;
218}
219
220/* 4.7.6 Aligned address calculation */
221
222_VISATTR
223static __inline void *
224vis_alignaddr(void *addr, int offset)
225{
226#if defined(__VIS__) && defined(__GNUC__)
227	return __builtin_vis_alignaddr(addr, offset);
228#else
229	void *out;
230
231	__asm("alginaddr %1,%2,%0"
232	    : "=r"(out)
233	    : "r"(addr), "r"(offset));
234	return out;
235#endif
236}
237
238_VISATTR
239static __inline vis_d64
240vis_faligndata(vis_d64 hi, vis_d64 lo)
241{
242	vis_d64 out;
243
244	__asm("faligndata %1,%2,%0"
245	    : "=f"(out)
246	    : "f"(hi), "f"(lo));
247	return out;
248}
249
250/* 4.7.7 Edge handling */
251
252_VISATTR
253static __inline vis_s32
254vis_edge8(void *a1, void *a2)
255{
256#if defined(__VIS__) && defined(__GNUC__)
257	return __builtin_vis_edge8(a1, a2);
258#else
259	vis_s32 out;
260
261	__asm("edge8 %1,%2,%0"
262	    : "=r"(out)
263	    : "r"(a1), "f"(a2));
264	return out;
265#endif
266}
267
268_VISATTR
269static __inline vis_s32
270vis_edge16(void *a1, void *a2)
271{
272#if defined(__VIS__) && defined(__GNUC__)
273	return __builtin_vis_edge16(a1, a2);
274#else
275	vis_s32 out;
276
277	__asm("edge16 %1,%2,%0"
278	    : "=r"(out)
279	    : "r"(a1), "f"(a2));
280	return out;
281#endif
282}
283
284_VISATTR
285static __inline vis_s32
286vis_edge32(void *a1, void *a2)
287{
288#if defined(__VIS__) && defined(__GNUC__)
289	return __builtin_vis_edge32(a1, a2);
290#else
291	vis_s32 out;
292
293	__asm("edge32 %1,%2,%0"
294	    : "=r"(out)
295	    : "r"(a1), "f"(a2));
296	return out;
297#endif
298}
299
300_VISATTR
301static __inline vis_s32
302vis_edge8l(void *a1, void *a2)
303{
304#if defined(__VIS__) && defined(__GNUC__)
305	return __builtin_vis_edge8l(a1, a2);
306#else
307	vis_s32 out;
308
309	__asm("edge8l %1,%2,%0"
310	    : "=r"(out)
311	    : "r"(a1), "f"(a2));
312	return out;
313#endif
314}
315
316_VISATTR
317static __inline vis_s32
318vis_edge16l(void *a1, void *a2)
319{
320#if defined(__VIS__) && defined(__GNUC__)
321	return __builtin_vis_edge16l(a1, a2);
322#else
323	vis_s32 out;
324
325	__asm("edge16l %1,%2,%0"
326	    : "=r"(out)
327	    : "r"(a1), "f"(a2));
328	return out;
329#endif
330}
331
332_VISATTR
333static __inline vis_s32
334vis_edge32l(void *a1, void *a2)
335{
336#if defined(__VIS__) && defined(__GNUC__)
337	return __builtin_vis_edge32l(a1, a2);
338#else
339	vis_s32 out;
340
341	__asm("edge32l %1,%2,%0"
342	    : "=r"(out)
343	    : "r"(a1), "f"(a2));
344	return out;
345#endif
346}
347
348/* 4.9 Array coordinate translation */
349
350_VISATTR
351static __inline vis_addr
352_VISATTR
353vis_array8(vis_u64 d1, vis_s32 d2)
354{
355#if defined(__VIS__) && defined(__GNUC__)
356	return __builtin_vis_array8(d1, d2);
357#else
358	vis_addr out;
359
360	__asm("array8 %1,%2,%0"
361	    : "=r"(out)
362	    : "r"(d1), "f"(d2));
363	return out;
364#endif
365}
366
367_VISATTR
368static __inline vis_addr
369vis_array16(vis_u64 d1, vis_s32 d2)
370{
371#if defined(__VIS__) && defined(__GNUC__)
372	return __builtin_vis_array16(d1, d2);
373#else
374	vis_addr out;
375
376	__asm("array16 %1,%2,%0"
377	    : "=r"(out)
378	    : "r"(d1), "f"(d2));
379	return out;
380#endif
381}
382
383_VISATTR
384static __inline vis_addr
385vis_array32(vis_u64 d1, vis_s32 d2)
386{
387#if defined(__VIS__) && defined(__GNUC__)
388	return __builtin_vis_array32(d1, d2);
389#else
390	vis_addr out;
391
392	__asm("array32 %1,%2,%0"
393	    : "=r"(out)
394	    : "r"(d1), "r"(d2));
395	return out;
396#endif
397}
398
399/* 4.3.1 Graphics Status Register manipulation */
400
401_VISATTR
402static __inline vis_u64
403vis_read_gsr64(void)
404{
405#if defined(__VIS__) && defined(__GNUC__)
406	return __builtin_vis_read_gsr();
407#else
408	vis_u64 out;
409
410	__asm("rd %%gsr,%0"
411	    : "=r"(out));
412	return out;
413#endif
414}
415
416_VISATTR
417static __inline void
418vis_write_gsr64(vis_u64 gsr)
419{
420#if defined(__VIS__) && defined(__GNUC__)
421	__builtin_vis_write_gsr(gsr);
422#else
423	__asm("mov %0,%%gsr"
424	    :
425	    : "r"(gsr));
426#endif
427}
428
429_VISATTR
430static __inline vis_u32
431vis_read_gsr32(void)
432{
433	return vis_read_gsr64();
434}
435
436_VISATTR
437static __inline void
438vis_write_gsr32(vis_u32 gsr)
439{
440	vis_write_gsr64(gsr);
441}
442
443/* 4.3.2 Read and write to upper/lower components */
444
445_VISATTR
446static __inline vis_f32
447vis_read_hi(vis_d64 var)
448{
449	vis_u64 reg = *((vis_u64 *)&var);
450	vis_u32 hi = (reg >> 32) & 0xffffffff;
451	vis_f32 out = *((vis_f32 *)&hi);
452	return out;
453}
454
455_VISATTR
456static __inline vis_f32
457vis_read_lo(vis_d64 var)
458{
459	vis_u64 reg = *((vis_u64 *)&var);
460	vis_u32 lo = reg & 0xffffffff;
461	vis_f32 out = *((vis_f32 *)&lo);
462	return out;
463}
464
465_VISATTR
466static __inline vis_d64
467vis_write_lo(vis_d64 in, vis_f32 lower)
468{
469	vis_u64 out = *((vis_u64 *)&in);
470	vis_u32 hi = (out >> 32) & 0xffffffff;
471	vis_u32 lo = *((vis_u32 *)&lower);
472
473	out = ((vis_u64)hi << 32ULL) | lo;
474	return *((vis_d64 *)&out);
475}
476
477_VISATTR
478static __inline vis_d64
479vis_write_hi(vis_d64 in, vis_f32 upper)
480{
481	vis_u64 out = *((vis_u64 *)&in);
482	vis_u32 hi = *((vis_u32 *)&upper);
483	vis_u32 lo = out & 0xffffffff;
484
485	out = ((vis_u64)hi << 32ULL) | lo;
486	return *((vis_d64 *)&out);
487}
488
489/* 4.3.3 Join two variables into a single */
490
491_VISATTR
492static __inline vis_d64
493vis_freg_pair(vis_f32 f1, vis_f32 f2)
494{
495	vis_u64 out;
496	vis_u32 r1 = *((vis_u32 *)&f1);
497	vis_u32 r2 = *((vis_u32 *)&f2);
498
499	out = ((vis_u64)r1 << 32ULL) | r2;
500	return *((vis_d64 *)&out);
501}
502
503/* 4.3.4 Place ints into FP register */
504
505_VISATTR
506static __inline vis_f32
507vis_to_float(vis_u32 data)
508{
509	return *((vis_f32 *)&data);
510}
511
512_VISATTR
513static __inline vis_d64
514vis_to_double(vis_u32 d1, vis_u32 d2)
515{
516	vis_u64 out;
517
518	out = ((vis_u64)d1 << 32ULL) | d2;
519	return *((vis_d64 *)&out);
520}
521
522_VISATTR
523static __inline vis_d64
524vis_to_double_dup(vis_u32 data)
525{
526	return vis_to_double(data, data);
527}
528
529_VISATTR
530static __inline vis_d64
531vis_ll_to_double(vis_u64 data)
532{
533	return *((vis_d64 *)&data);
534}
535
536/* 4.6.2 Arithmetic - multiplication */
537
538_VISATTR
539static __inline vis_d64
540vis_fmul8x16(vis_f32 pixels, vis_d64 scale)
541{
542	vis_d64 out;
543
544	__asm("fmul8x16 %1,%2,%0"
545	    : "=f"(out)
546	    : "f"(pixels), "f"(scale));
547	return out;
548}
549
550_VISATTR
551static __inline vis_d64
552vis_fmul8x16au(vis_f32 pixels, vis_f32 scale)
553{
554	vis_d64 out;
555
556	__asm("fmul8x16au %1,%2,%0"
557	    : "=f"(out)
558	    : "f"(pixels), "f"(scale));
559	return out;
560}
561
562_VISATTR
563static __inline vis_d64
564vis_fmul8x16al(vis_f32 pixels, vis_f32 scale)
565{
566	vis_d64 out;
567
568	__asm("fmul8x16al %1,%2,%0"
569	    : "=f"(out)
570	    : "f"(pixels), "f"(scale));
571	return out;
572}
573
574_VISATTR
575static __inline vis_d64
576vis_fmul8sux16(vis_d64 d1, vis_d64 d2)
577{
578	vis_d64 out;
579
580	__asm("fmul8sux16 %1,%2,%0"
581	    : "=f"(out)
582	    : "f"(d1), "f"(d2));
583	return out;
584}
585
586_VISATTR
587static __inline vis_d64
588vis_fmul8ulx16(vis_d64 d1, vis_d64 d2)
589{
590	vis_d64 out;
591
592	__asm("fmul8ulx16 %1,%2,%0"
593	    : "=f"(out)
594	    : "f"(d1), "f"(d2));
595	return out;
596}
597
598_VISATTR
599static __inline vis_d64
600vis_fmuld8sux16(vis_f32 d1, vis_f32 d2)
601{
602	vis_d64 out;
603
604	__asm("fmuld8sux16 %1,%2,%0"
605	    : "=f"(out)
606	    : "f"(d1), "f"(d2));
607	return out;
608}
609
610_VISATTR
611static __inline vis_d64
612vis_fmuld8ulx16(vis_f32 d1, vis_f32 d2)
613{
614	vis_d64 out;
615
616	__asm("fmuld8ulx16 %1,%2,%0"
617	    : "=f"(out)
618	    : "f"(d1), "f"(d2));
619	return out;
620}
621
622/* 4.5 Pixel compare */
623
624_VISATTR
625static __inline int
626vis_fcmpgt16(vis_d64 d1, vis_d64 d2)
627{
628	int out;
629
630	__asm("fcmpgt16 %1,%2,%0"
631	    : "=r"(out)
632	    : "f"(d1), "f"(d2));
633	return out;
634}
635
636_VISATTR
637static __inline int
638vis_fcmple16(vis_d64 d1, vis_d64 d2)
639{
640	int out;
641
642	__asm("fcmple16 %1,%2,%0"
643	    : "=r"(out)
644	    : "f"(d1), "f"(d2));
645	return out;
646}
647
648_VISATTR
649static __inline int
650vis_fcmpeq16(vis_d64 d1, vis_d64 d2)
651{
652	int out;
653
654	__asm("fcmpeq16 %1,%2,%0"
655	    : "=r"(out)
656	    : "f"(d1), "f"(d2));
657	return out;
658}
659
660_VISATTR
661static __inline int
662vis_fcmpne16(vis_d64 d1, vis_d64 d2)
663{
664	int out;
665
666	__asm("fcmpne16 %1,%2,%0"
667	    : "=r"(out)
668	    : "f"(d1), "f"(d2));
669	return out;
670}
671
672_VISATTR
673static __inline int
674vis_fcmpgt32(vis_d64 d1, vis_d64 d2)
675{
676	int out;
677
678	__asm("fcmpgt32 %1,%2,%0"
679	    : "=r"(out)
680	    : "f"(d1), "f"(d2));
681	return out;
682}
683
684_VISATTR
685static __inline int
686vis_fcmple32(vis_d64 d1, vis_d64 d2)
687{
688	int out;
689
690	__asm("fcmple32 %1,%2,%0"
691	    : "=r"(out)
692	    : "f"(d1), "f"(d2));
693	return out;
694}
695
696_VISATTR
697static __inline int
698vis_fcmpeq32(vis_d64 d1, vis_d64 d2)
699{
700	int out;
701
702	__asm("fcmpeq32 %1,%2,%0"
703	    : "=r"(out)
704	    : "f"(d1), "f"(d2));
705	return out;
706}
707
708_VISATTR
709static __inline int
710vis_fcmpne32(vis_d64 d1, vis_d64 d2)
711{
712	int out;
713
714	__asm("fcmpne32 %1,%2,%0"
715	    : "=r"(out)
716	    : "f"(d1), "f"(d2));
717	return out;
718}
719
720_VISATTR
721static __inline int
722vis_fcmplt16(vis_d64 d1, vis_d64 d2)
723{
724	return vis_fcmpgt16(d2, d1);
725}
726
727_VISATTR
728static __inline int
729vis_fcmpge16(vis_d64 d1, vis_d64 d2)
730{
731	return vis_fcmple16(d2, d1);
732}
733
734_VISATTR
735static __inline int
736vis_fcmplt32(vis_d64 d1, vis_d64 d2)
737{
738	return vis_fcmpgt32(d2, d1);
739}
740
741_VISATTR
742static __inline int
743vis_fcmpge32(vis_d64 d1, vis_d64 d2)
744{
745	return vis_fcmple32(d2, d1);
746}
747
748/* 4.10 Pixel distance */
749
750_VISATTR
751static __inline vis_d64
752vis_pdist(vis_d64 pixels1, vis_d64 pixels2, vis_d64 acc)
753{
754	__asm("pdist %1,%2,%0"
755	    : "+f"(acc)
756	    : "f"(pixels1), "f"(pixels2));
757
758	return acc;
759}
760
761/* 4.4.1 Logical instructions - fill variables */
762
763_VISATTR
764static __inline vis_d64
765vis_fzero(void)
766{
767	vis_d64 out;
768
769	__asm("fzero %0"
770	    : "=f"(out));
771	return out;
772}
773
774_VISATTR
775static __inline vis_d64
776vis_fone(void)
777{
778	vis_d64 out;
779
780	__asm("fone %0"
781	    : "=f"(out));
782	return out;
783}
784
785_VISATTR
786static __inline vis_f32
787vis_fzeros(void)
788{
789	vis_f32 out;
790
791	__asm("fzeros %0"
792	    : "=f"(out));
793	return out;
794}
795
796_VISATTR
797static __inline vis_f32
798vis_fones(void)
799{
800	vis_f32 out;
801
802	__asm("fones %0"
803	    : "=f"(out));
804	return out;
805}
806
807/* 4.4.2 Logical instructions - copies and complements */
808
809_VISATTR
810static __inline vis_d64
811vis_fsrc(vis_d64 r1)
812{
813	vis_d64 out;
814
815	__asm("fsrc1 %1,%0"
816	    : "=f"(out)
817	    : "f"(r1));
818	return out;
819}
820
821_VISATTR
822static __inline vis_d64
823vis_fnot(vis_d64 r1)
824{
825	vis_d64 out;
826
827	__asm("fnot1 %1,%0"
828	    : "=f"(out)
829	    : "f"(r1));
830	return out;
831}
832
833_VISATTR
834static __inline vis_f32
835vis_fsrcs(vis_f32 r1)
836{
837	vis_f32 out;
838
839	__asm("fsrc1s %1,%0"
840	    : "=f"(out)
841	    : "f"(r1));
842	return out;
843}
844
845_VISATTR
846static __inline vis_f32
847vis_fnots(vis_f32 r1)
848{
849	vis_f32 out;
850
851	__asm("fnot1s %1,%0"
852	    : "=f"(out)
853	    : "f"(r1));
854	return out;
855}
856
857/* 4.3 Logical instructions - bitwise */
858
859_VISATTR
860static __inline vis_d64
861vis_for(vis_d64 r1, vis_d64 r2)
862{
863	vis_d64 out;
864	__asm("for %1,%2,%0"
865	    : "=f"(out)
866	    : "f"(r1), "f"(r2));
867	return out;
868}
869
870_VISATTR
871static __inline vis_d64
872vis_fand(vis_d64 r1, vis_d64 r2)
873{
874	vis_d64 out;
875	__asm("fand %1,%2,%0"
876	    : "=f"(out)
877	    : "f"(r1), "f"(r2));
878	return out;
879}
880
881_VISATTR
882static __inline vis_d64
883vis_fxor(vis_d64 r1, vis_d64 r2)
884{
885	vis_d64 out;
886	__asm("fxor %1,%2,%0"
887	    : "=f"(out)
888	    : "f"(r1), "f"(r2));
889	return out;
890}
891
892_VISATTR
893static __inline vis_d64
894vis_fnor(vis_d64 r1, vis_d64 r2)
895{
896	vis_d64 out;
897	__asm("fnor %1,%2,%0"
898	    : "=f"(out)
899	    : "f"(r1), "f"(r2));
900	return out;
901}
902
903_VISATTR
904static __inline vis_d64
905vis_fnand(vis_d64 r1, vis_d64 r2)
906{
907	vis_d64 out;
908	__asm("fnand %1,%2,%0"
909	    : "=f"(out)
910	    : "f"(r1), "f"(r2));
911	return out;
912}
913
914_VISATTR
915static __inline vis_d64
916vis_fxnor(vis_d64 r1, vis_d64 r2)
917{
918	vis_d64 out;
919	__asm("fxnor %1,%2,%0"
920	    : "=f"(out)
921	    : "f"(r1), "f"(r2));
922	return out;
923}
924
925_VISATTR
926static __inline vis_d64
927vis_fornot(vis_d64 r1, vis_d64 r2)
928{
929	vis_d64 out;
930	__asm("fornot1 %1,%2,%0"
931	    : "=f"(out)
932	    : "f"(r1), "f"(r2));
933	return out;
934}
935
936_VISATTR
937static __inline vis_d64
938vis_fandnot(vis_d64 r1, vis_d64 r2)
939{
940	vis_d64 out;
941	__asm("fandnot1 %1,%2,%0"
942	    : "=f"(out)
943	    : "f"(r1), "f"(r2));
944	return out;
945}
946
947_VISATTR
948static __inline vis_f32
949vis_fors(vis_f32 r1, vis_f32 r2)
950{
951	vis_f32 out;
952	__asm("fors %1,%2,%0"
953	    : "=f"(out)
954	    : "f"(r1), "f"(r2));
955	return out;
956}
957
958_VISATTR
959static __inline vis_f32
960vis_fands(vis_f32 r1, vis_f32 r2)
961{
962	vis_f32 out;
963	__asm("fands %1,%2,%0"
964	    : "=f"(out)
965	    : "f"(r1), "f"(r2));
966	return out;
967}
968
969_VISATTR
970static __inline vis_f32
971vis_fxors(vis_f32 r1, vis_f32 r2)
972{
973	vis_f32 out;
974	__asm("fxors %1,%2,%0"
975	    : "=f"(out)
976	    : "f"(r1), "f"(r2));
977	return out;
978}
979
980_VISATTR
981static __inline vis_f32
982vis_fnors(vis_f32 r1, vis_f32 r2)
983{
984	vis_f32 out;
985	__asm("fnors %1,%2,%0"
986	    : "=f"(out)
987	    : "f"(r1), "f"(r2));
988	return out;
989}
990
991_VISATTR
992static __inline vis_f32
993vis_fnands(vis_f32 r1, vis_f32 r2)
994{
995	vis_f32 out;
996	__asm("fnands %1,%2,%0"
997	    : "=f"(out)
998	    : "f"(r1), "f"(r2));
999	return out;
1000}
1001
1002_VISATTR
1003static __inline vis_f32
1004vis_fxnors(vis_f32 r1, vis_f32 r2)
1005{
1006	vis_f32 out;
1007	__asm("fxnors %1,%2,%0"
1008	    : "=f"(out)
1009	    : "f"(r1), "f"(r2));
1010	return out;
1011}
1012
1013_VISATTR
1014static __inline vis_f32
1015vis_fornots(vis_f32 r1, vis_f32 r2)
1016{
1017	vis_f32 out;
1018	__asm("fornot1s %1,%2,%0"
1019	    : "=f"(out)
1020	    : "f"(r1), "f"(r2));
1021	return out;
1022}
1023
1024_VISATTR
1025static __inline vis_f32
1026vis_fandnots(vis_f32 r1, vis_f32 r2)
1027{
1028	vis_f32 out;
1029	__asm("fandnot1s %1,%2,%0"
1030	    : "=f"(out)
1031	    : "f"(r1), "f"(r2));
1032	return out;
1033}
1034
1035/* 4.8.1 Partial Stores */
1036
1037_VISATTR
1038static __inline void
1039vis_pst_8(vis_d64 data, void *addr, vis_u8 mask)
1040{
1041	__asm("stda %1,[%0]%2,0xc0"
1042	    : "=r"(addr)
1043	    : "f"(data), "r"(mask));
1044}
1045
1046_VISATTR
1047static __inline void
1048vis_pst_16(vis_d64 data, void *addr, vis_u8 mask)
1049{
1050	__asm("stda %1,[%0]%2,0xc2"
1051	    : "=r"(addr)
1052	    : "f"(data), "r"(mask));
1053}
1054
1055_VISATTR
1056static __inline void
1057vis_pst_32(vis_d64 data, void *addr, vis_u8 mask)
1058{
1059	__asm("stda %1,[%0]%2,0xc4"
1060	    : "=r"(addr)
1061	    : "f"(data), "r"(mask));
1062}
1063
1064/* 4.8.2 Byte/Short Loads and Stores */
1065
1066_VISATTR
1067static __inline void
1068vis_st_u8(vis_u64 data, void *addr)
1069{
1070	__asm("stda %1,[%0]0xd0"
1071	    : "=r"(addr)
1072	    : "f"(data));
1073}
1074
1075_VISATTR
1076static __inline void
1077vis_st_u8_le(vis_d64 data, void *addr)
1078{
1079	__asm("stda %1,[%0]0xd8"
1080	    : "=r"(addr)
1081	    : "f"(data));
1082}
1083
1084_VISATTR
1085static __inline void
1086vis_st_u16(vis_d64 data, void *addr)
1087{
1088	__asm("stda %1,[%0]0xd2"
1089	    : "=r"(addr)
1090	    : "f"(data));
1091}
1092
1093_VISATTR
1094static __inline void
1095vis_st_u16_le(vis_d64 data, void *addr)
1096{
1097	__asm("stda %1,[%0]0xda"
1098	    : "=r"(addr)
1099	    : "f"(data));
1100}
1101
1102_VISATTR
1103static __inline void
1104vis_st_u8_i(vis_d64 data, void *addr, long idx)
1105{
1106	vis_u8 *ptr = addr;
1107	vis_st_u8(data, ptr + idx);
1108}
1109
1110_VISATTR
1111static __inline void
1112vis_st_u16_i(vis_d64 data, void *addr, long idx)
1113{
1114	vis_u8 *ptr = addr;
1115	vis_st_u16(data, ptr + idx);
1116}
1117
1118_VISATTR
1119static __inline vis_d64
1120vis_ld_u8(void *addr)
1121{
1122	vis_u8 val;
1123	vis_d64 out;
1124
1125	val = *((vis_u8 *)addr);
1126	*((vis_u8 *)&out) = val;
1127
1128	return out;
1129}
1130
1131_VISATTR
1132static __inline vis_d64
1133vis_ld_u16(void *addr)
1134{
1135	vis_u16 val;
1136	vis_d64 out;
1137
1138	val = *((vis_u16 *)addr);
1139	*((vis_u16 *)&out) = val;
1140
1141	return out;
1142}
1143
1144_VISATTR
1145static __inline vis_d64
1146vis_ld_u8_i(void *addr, long idx)
1147{
1148	vis_u8 *ptr = addr;
1149	return vis_ld_u8(ptr + idx);
1150}
1151
1152_VISATTR
1153static __inline vis_d64
1154vis_ld_u16_i(void *addr, long idx)
1155{
1156	vis_u8 *ptr = addr;
1157	return vis_ld_u16(ptr + idx);
1158}
1159
1160/*
1161 * VIS 2.0 instructions
1162 */
1163
1164_VISATTR
1165static __inline vis_u32
1166vis_read_bmask(void)
1167{
1168	vis_u32 out;
1169
1170	__asm("rd %%gsr,%0"
1171	    "srlx %0,32,%0"
1172	    : "+f"(out));
1173	return out;
1174}
1175
1176_VISATTR
1177static __inline void
1178vis_write_bmask(vis_u32 mask1, vis_u32 mask2)
1179{
1180#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1181	(void)__builtin_vis_bmask(mask1, mask2);
1182#else
1183	vis_u32 out;
1184
1185	__asm("bmask %1,%2,%0"
1186	    : "=r"(out)
1187	    : "r"(mask1), "r"(mask2));
1188
1189	(void)out;
1190#endif
1191}
1192
1193_VISATTR
1194static __inline vis_d64
1195vis_bshuffle(vis_d64 pixels1, vis_d64 pixels2)
1196{
1197	vis_d64 out;
1198
1199	__asm("bshuffle %1,%2,%0"
1200	    : "=f"(out)
1201	    : "f"(pixels1), "f"(pixels2));
1202	return out;
1203}
1204
1205_VISATTR
1206static __inline vis_s32
1207vis_edge8n(void *a1, void *a2)
1208{
1209#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1210	return __builtin_vis_edge8n(a1, a2);
1211#else
1212	vis_s32 out;
1213
1214	__asm("edge8n %1,%2,%0"
1215	    : "=r"(out)
1216	    : "r"(a1), "r"(a2));
1217	return out;
1218#endif
1219}
1220
1221_VISATTR
1222static __inline vis_s32
1223vis_edge16n(void *a1, void *a2)
1224{
1225#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1226	return __builtin_vis_edge16n(a1, a2);
1227#else
1228	vis_s32 out;
1229
1230	__asm("edge16n %1,%2,%0"
1231	    : "=r"(out)
1232	    : "r"(a1), "r"(a2));
1233	return out;
1234#endif
1235}
1236
1237_VISATTR
1238static __inline vis_s32
1239vis_edge32n(void *a1, void *a2)
1240{
1241#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1242	return __builtin_vis_edge32n(a1, a2);
1243#else
1244	vis_s32 out;
1245
1246	__asm("edge32n %1,%2,%0"
1247	    : "=r"(out)
1248	    : "r"(a1), "r"(a2));
1249	return out;
1250#endif
1251}
1252
1253_VISATTR
1254static __inline vis_s32
1255vis_edge8ln(void *a1, void *a2)
1256{
1257#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1258	return __builtin_vis_edge8ln(a1, a2);
1259#else
1260	vis_s32 out;
1261
1262	__asm("edge8ln %1,%2,%0"
1263	    : "=r"(out)
1264	    : "r"(a1), "r"(a2));
1265	return out;
1266#endif
1267}
1268
1269_VISATTR
1270static __inline vis_s32
1271vis_edge16ln(void *a1, void *a2)
1272{
1273#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1274	return __builtin_vis_edge16ln(a1, a2);
1275#else
1276	vis_s32 out;
1277
1278	__asm("edge16ln %1,%2,%0"
1279	    : "=r"(out)
1280	    : "r"(a1), "r"(a2));
1281	return out;
1282#endif
1283}
1284
1285_VISATTR
1286static __inline vis_s32
1287vis_edge32ln(void *a1, void *a2)
1288{
1289#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
1290	return __builtin_vis_edge32ln(a1, a2);
1291#else
1292	vis_s32 out;
1293
1294	__asm("edge32ln %1,%2,%0"
1295	    : "=r"(out)
1296	    : "r"(a1), "r"(a2));
1297	return out;
1298#endif
1299}
1300
1301#ifdef __cplusplus
1302}
1303#endif
1304
1305#endif
1306