vis_proto.h revision 1.1
11.1Snia/*-
21.1Snia * Copyright (c) 2025 The NetBSD Foundation, Inc.
31.1Snia * All rights reserved.
41.1Snia *
51.1Snia * This code is derived from software contributed to The NetBSD Foundation
61.1Snia * by Nia Alarie.
71.1Snia *
81.1Snia * Redistribution and use in source and binary forms, with or without
91.1Snia * modification, are permitted provided that the following conditions
101.1Snia * are met:
111.1Snia * 1. Redistributions of source code must retain the above copyright
121.1Snia *    notice, this list of conditions and the following disclaimer.
131.1Snia * 2. Redistributions in binary form must reproduce the above copyright
141.1Snia *    notice, this list of conditions and the following disclaimer in the
151.1Snia *    documentation and/or other materials provided with the distribution.
161.1Snia *
171.1Snia * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
181.1Snia * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
191.1Snia * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
201.1Snia * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
211.1Snia * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
221.1Snia * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
231.1Snia * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
241.1Snia * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
251.1Snia * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
261.1Snia * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
271.1Snia * POSSIBILITY OF SUCH DAMAGE.
281.1Snia */
291.1Snia
301.1Snia/*
311.1Snia * This should be compatible with what was shipped with SunPro.
321.1Snia *
331.1Snia * VIS Instruction Set User's Manual
341.1Snia * Sun Microsystems
351.1Snia * Part Number: 805-1394-03
361.1Snia * May 2001
371.1Snia *
381.1Snia * Version of available VIS instructions can be detected through
391.1Snia * the `machdep.vis` sysctl. A value of "0" means that such
401.1Snia * instructions are unavailable. All SPARCv9 hardware should support
411.1Snia * at least VIS 1, while VIS 2 requires UltraSPARC-III or newer.
421.1Snia *
431.1Snia * GCC needs -mvis for VIS, and -mvis2 for VIS 2. However, its
441.1Snia * builtins are incomplete and some cause problematic typing issues
451.1Snia * with Sun's API, so they're mostly avoided.
461.1Snia */
471.1Snia
481.1Snia#ifndef _VIS_PROTO_H
491.1Snia#define _VIS_PROTO_H
501.1Snia
511.1Snia#ifdef __cplusplus
521.1Sniaextern "C" {
531.1Snia#endif
541.1Snia
551.1Snia#include "vis_types.h"
561.1Snia
571.1Snia#define _VISATTR \
581.1Snia	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
591.1Snia
601.1Snia/* 4.6.1 Arithmetic - addition and subtraction */
611.1Snia
621.1Snia_VISATTR
631.1Sniastatic __inline vis_d64
641.1Sniavis_fpadd16(vis_d64 r1, vis_d64 r2)
651.1Snia{
661.1Snia	vis_d64 out;
671.1Snia
681.1Snia	__asm("fpadd16 %1,%2,%0"
691.1Snia	    : "=f"(out)
701.1Snia	    : "f"(r1), "f"(r2));
711.1Snia	return out;
721.1Snia}
731.1Snia
741.1Snia_VISATTR
751.1Sniastatic __inline vis_d64
761.1Sniavis_fpsub16(vis_d64 r1, vis_d64 r2)
771.1Snia{
781.1Snia	vis_d64 out;
791.1Snia
801.1Snia	__asm("fpsub16 %1,%2,%0"
811.1Snia	    : "=f"(out)
821.1Snia	    : "f"(r1), "f"(r2));
831.1Snia	return out;
841.1Snia}
851.1Snia
861.1Snia_VISATTR
871.1Sniastatic __inline vis_d64
881.1Sniavis_fpadd32(vis_d64 r1, vis_d64 r2)
891.1Snia{
901.1Snia	vis_d64 out;
911.1Snia
921.1Snia	__asm("fpadd32 %1,%2,%0"
931.1Snia	    : "=f"(out)
941.1Snia	    : "f"(r1), "f"(r2));
951.1Snia	return out;
961.1Snia}
971.1Snia
981.1Snia_VISATTR
991.1Sniastatic __inline vis_d64
1001.1Sniavis_fpsub32(vis_d64 r1, vis_d64 r2)
1011.1Snia{
1021.1Snia	vis_d64 out;
1031.1Snia
1041.1Snia	__asm("fpsub32 %1,%2,%0"
1051.1Snia	    : "=f"(out)
1061.1Snia	    : "f"(r1), "f"(r2));
1071.1Snia	return out;
1081.1Snia}
1091.1Snia
1101.1Snia_VISATTR
1111.1Sniastatic __inline vis_f32
1121.1Sniavis_fpadd16s(vis_f32 r1, vis_f32 r2)
1131.1Snia{
1141.1Snia	vis_f32 out;
1151.1Snia
1161.1Snia	__asm("fpadd16s %1,%2,%0"
1171.1Snia	    : "=f"(out)
1181.1Snia	    : "f"(r1), "f"(r2));
1191.1Snia	return out;
1201.1Snia}
1211.1Snia
1221.1Snia_VISATTR
1231.1Sniastatic __inline vis_f32
1241.1Sniavis_fpsub16s(vis_f32 r1, vis_f32 r2)
1251.1Snia{
1261.1Snia	vis_f32 out;
1271.1Snia
1281.1Snia	__asm("fpsub16s %1,%2,%0"
1291.1Snia	    : "=f"(out)
1301.1Snia	    : "f"(r1), "f"(r2));
1311.1Snia	return out;
1321.1Snia}
1331.1Snia
1341.1Snia_VISATTR
1351.1Sniastatic __inline vis_f32
1361.1Sniavis_fpadd32s(vis_f32 r1, vis_f32 r2)
1371.1Snia{
1381.1Snia	vis_f32 out;
1391.1Snia
1401.1Snia	__asm("fpadd32s %1,%2,%0"
1411.1Snia	    : "=f"(out)
1421.1Snia	    : "f"(r1), "f"(r2));
1431.1Snia	return out;
1441.1Snia}
1451.1Snia
1461.1Snia_VISATTR
1471.1Sniastatic __inline vis_f32
1481.1Sniavis_fpsub32s(vis_f32 r1, vis_f32 r2)
1491.1Snia{
1501.1Snia	vis_f32 out;
1511.1Snia
1521.1Snia	__asm("fpsub32s %1,%2,%0"
1531.1Snia	    : "=f"(out)
1541.1Snia	    : "f"(r1), "f"(r2));
1551.1Snia	return out;
1561.1Snia}
1571.1Snia
1581.1Snia/* 4.7 Pixel formatting - packing */
1591.1Snia
1601.1Snia_VISATTR
1611.1Sniastatic __inline vis_f32
1621.1Sniavis_fpack16(vis_d64 r1)
1631.1Snia{
1641.1Snia	vis_f32 out;
1651.1Snia
1661.1Snia	__asm("fpack16 %1,%0"
1671.1Snia	    : "=f"(out)
1681.1Snia	    : "f"(r1));
1691.1Snia	return out;
1701.1Snia}
1711.1Snia
1721.1Snia_VISATTR
1731.1Sniastatic __inline vis_d64
1741.1Sniavis_fpack32(vis_d64 r1, vis_d64 r2)
1751.1Snia{
1761.1Snia	vis_d64 out;
1771.1Snia
1781.1Snia	__asm("fpack32 %1,%2,%0"
1791.1Snia	    : "=f"(out)
1801.1Snia	    : "f"(r1), "f"(r2));
1811.1Snia	return out;
1821.1Snia}
1831.1Snia
1841.1Snia_VISATTR
1851.1Sniastatic __inline vis_f32
1861.1Sniavis_fpackfix(vis_d64 r1)
1871.1Snia{
1881.1Snia	vis_f32 out;
1891.1Snia
1901.1Snia	__asm("fpackfix %1,%0"
1911.1Snia	    : "=f"(out)
1921.1Snia	    : "f"(r1));
1931.1Snia	return out;
1941.1Snia}
1951.1Snia
1961.1Snia_VISATTR
1971.1Sniastatic __inline vis_d64
1981.1Sniavis_fexpand(vis_f32 r1)
1991.1Snia{
2001.1Snia	vis_d64 out;
2011.1Snia
2021.1Snia	__asm("fexpand %1,%0"
2031.1Snia	    : "=f"(out)
2041.1Snia	    : "f"(r1));
2051.1Snia	return out;
2061.1Snia}
2071.1Snia
2081.1Snia_VISATTR
2091.1Sniastatic __inline vis_d64
2101.1Sniavis_fpmerge(vis_f32 r1, vis_f32 r2)
2111.1Snia{
2121.1Snia	vis_d64 out;
2131.1Snia
2141.1Snia	__asm("fpmerge %1,%2,%0"
2151.1Snia	    : "=f"(out)
2161.1Snia	    : "f"(r1), "f"(r2));
2171.1Snia	return out;
2181.1Snia}
2191.1Snia
2201.1Snia/* 4.7.6 Aligned address calculation */
2211.1Snia
2221.1Snia_VISATTR
2231.1Sniastatic __inline void *
2241.1Sniavis_alignaddr(void *addr, int offset)
2251.1Snia{
2261.1Snia#if defined(__VIS__) && defined(__GNUC__)
2271.1Snia	return __builtin_vis_alignaddr(addr, offset);
2281.1Snia#else
2291.1Snia	void *out;
2301.1Snia
2311.1Snia	__asm("alginaddr %1,%2,%0"
2321.1Snia	    : "=r"(out)
2331.1Snia	    : "r"(addr), "r"(offset));
2341.1Snia	return out;
2351.1Snia#endif
2361.1Snia}
2371.1Snia
2381.1Snia_VISATTR
2391.1Sniastatic __inline vis_d64
2401.1Sniavis_faligndata(vis_d64 hi, vis_d64 lo)
2411.1Snia{
2421.1Snia	vis_d64 out;
2431.1Snia
2441.1Snia	__asm("faligndata %1,%2,%0"
2451.1Snia	    : "=f"(out)
2461.1Snia	    : "f"(hi), "f"(lo));
2471.1Snia	return out;
2481.1Snia}
2491.1Snia
2501.1Snia/* 4.7.7 Edge handling */
2511.1Snia
2521.1Snia_VISATTR
2531.1Sniastatic __inline vis_s32
2541.1Sniavis_edge8(void *a1, void *a2)
2551.1Snia{
2561.1Snia#if defined(__VIS__) && defined(__GNUC__)
2571.1Snia	return __builtin_vis_edge8(a1, a2);
2581.1Snia#else
2591.1Snia	vis_s32 out;
2601.1Snia
2611.1Snia	__asm("edge8 %1,%2,%0"
2621.1Snia	    : "=r"(out)
2631.1Snia	    : "r"(a1), "f"(a2));
2641.1Snia	return out;
2651.1Snia#endif
2661.1Snia}
2671.1Snia
2681.1Snia_VISATTR
2691.1Sniastatic __inline vis_s32
2701.1Sniavis_edge16(void *a1, void *a2)
2711.1Snia{
2721.1Snia#if defined(__VIS__) && defined(__GNUC__)
2731.1Snia	return __builtin_vis_edge16(a1, a2);
2741.1Snia#else
2751.1Snia	vis_s32 out;
2761.1Snia
2771.1Snia	__asm("edge16 %1,%2,%0"
2781.1Snia	    : "=r"(out)
2791.1Snia	    : "r"(a1), "f"(a2));
2801.1Snia	return out;
2811.1Snia#endif
2821.1Snia}
2831.1Snia
2841.1Snia_VISATTR
2851.1Sniastatic __inline vis_s32
2861.1Sniavis_edge32(void *a1, void *a2)
2871.1Snia{
2881.1Snia#if defined(__VIS__) && defined(__GNUC__)
2891.1Snia	return __builtin_vis_edge32(a1, a2);
2901.1Snia#else
2911.1Snia	vis_s32 out;
2921.1Snia
2931.1Snia	__asm("edge32 %1,%2,%0"
2941.1Snia	    : "=r"(out)
2951.1Snia	    : "r"(a1), "f"(a2));
2961.1Snia	return out;
2971.1Snia#endif
2981.1Snia}
2991.1Snia
3001.1Snia_VISATTR
3011.1Sniastatic __inline vis_s32
3021.1Sniavis_edge8l(void *a1, void *a2)
3031.1Snia{
3041.1Snia#if defined(__VIS__) && defined(__GNUC__)
3051.1Snia	return __builtin_vis_edge8l(a1, a2);
3061.1Snia#else
3071.1Snia	vis_s32 out;
3081.1Snia
3091.1Snia	__asm("edge8l %1,%2,%0"
3101.1Snia	    : "=r"(out)
3111.1Snia	    : "r"(a1), "f"(a2));
3121.1Snia	return out;
3131.1Snia#endif
3141.1Snia}
3151.1Snia
3161.1Snia_VISATTR
3171.1Sniastatic __inline vis_s32
3181.1Sniavis_edge16l(void *a1, void *a2)
3191.1Snia{
3201.1Snia#if defined(__VIS__) && defined(__GNUC__)
3211.1Snia	return __builtin_vis_edge16l(a1, a2);
3221.1Snia#else
3231.1Snia	vis_s32 out;
3241.1Snia
3251.1Snia	__asm("edge16l %1,%2,%0"
3261.1Snia	    : "=r"(out)
3271.1Snia	    : "r"(a1), "f"(a2));
3281.1Snia	return out;
3291.1Snia#endif
3301.1Snia}
3311.1Snia
3321.1Snia_VISATTR
3331.1Sniastatic __inline vis_s32
3341.1Sniavis_edge32l(void *a1, void *a2)
3351.1Snia{
3361.1Snia#if defined(__VIS__) && defined(__GNUC__)
3371.1Snia	return __builtin_vis_edge32l(a1, a2);
3381.1Snia#else
3391.1Snia	vis_s32 out;
3401.1Snia
3411.1Snia	__asm("edge32l %1,%2,%0"
3421.1Snia	    : "=r"(out)
3431.1Snia	    : "r"(a1), "f"(a2));
3441.1Snia	return out;
3451.1Snia#endif
3461.1Snia}
3471.1Snia
3481.1Snia/* 4.9 Array coordinate translation */
3491.1Snia
3501.1Snia_VISATTR
3511.1Sniastatic __inline vis_addr
3521.1Snia_VISATTR
3531.1Sniavis_array8(vis_u64 d1, vis_s32 d2)
3541.1Snia{
3551.1Snia#if defined(__VIS__) && defined(__GNUC__)
3561.1Snia	return __builtin_vis_array8(d1, d2);
3571.1Snia#else
3581.1Snia	vis_addr out;
3591.1Snia
3601.1Snia	__asm("array8 %1,%2,%0"
3611.1Snia	    : "=r"(out)
3621.1Snia	    : "r"(d1), "f"(d2));
3631.1Snia	return out;
3641.1Snia#endif
3651.1Snia}
3661.1Snia
3671.1Snia_VISATTR
3681.1Sniastatic __inline vis_addr
3691.1Sniavis_array16(vis_u64 d1, vis_s32 d2)
3701.1Snia{
3711.1Snia#if defined(__VIS__) && defined(__GNUC__)
3721.1Snia	return __builtin_vis_array16(d1, d2);
3731.1Snia#else
3741.1Snia	vis_addr out;
3751.1Snia
3761.1Snia	__asm("array16 %1,%2,%0"
3771.1Snia	    : "=r"(out)
3781.1Snia	    : "r"(d1), "f"(d2));
3791.1Snia	return out;
3801.1Snia#endif
3811.1Snia}
3821.1Snia
3831.1Snia_VISATTR
3841.1Sniastatic __inline vis_addr
3851.1Sniavis_array32(vis_u64 d1, vis_s32 d2)
3861.1Snia{
3871.1Snia#if defined(__VIS__) && defined(__GNUC__)
3881.1Snia	return __builtin_vis_array32(d1, d2);
3891.1Snia#else
3901.1Snia	vis_addr out;
3911.1Snia
3921.1Snia	__asm("array32 %1,%2,%0"
3931.1Snia	    : "=r"(out)
3941.1Snia	    : "r"(d1), "r"(d2));
3951.1Snia	return out;
3961.1Snia#endif
3971.1Snia}
3981.1Snia
3991.1Snia/* 4.3.1 Graphics Status Register manipulation */
4001.1Snia
4011.1Snia_VISATTR
4021.1Sniastatic __inline vis_u64
4031.1Sniavis_read_gsr64(void)
4041.1Snia{
4051.1Snia#if defined(__VIS__) && defined(__GNUC__)
4061.1Snia	return __builtin_vis_read_gsr();
4071.1Snia#else
4081.1Snia	vis_u64 out;
4091.1Snia
4101.1Snia	__asm("rd %%gsr,%0"
4111.1Snia	    : "=r"(out));
4121.1Snia	return out;
4131.1Snia#endif
4141.1Snia}
4151.1Snia
4161.1Snia_VISATTR
4171.1Sniastatic __inline void
4181.1Sniavis_write_gsr64(vis_u64 gsr)
4191.1Snia{
4201.1Snia#if defined(__VIS__) && defined(__GNUC__)
4211.1Snia	__builtin_vis_write_gsr(gsr);
4221.1Snia#else
4231.1Snia	__asm("mov %0,%%gsr"
4241.1Snia	    :
4251.1Snia	    : "r"(gsr));
4261.1Snia#endif
4271.1Snia}
4281.1Snia
4291.1Snia_VISATTR
4301.1Sniastatic __inline vis_u32
4311.1Sniavis_read_gsr32(void)
4321.1Snia{
4331.1Snia	return vis_read_gsr64();
4341.1Snia}
4351.1Snia
4361.1Snia_VISATTR
4371.1Sniastatic __inline void
4381.1Sniavis_write_gsr32(vis_u32 gsr)
4391.1Snia{
4401.1Snia	vis_write_gsr64(gsr);
4411.1Snia}
4421.1Snia
4431.1Snia/* 4.3.2 Read and write to upper/lower components */
4441.1Snia
4451.1Snia_VISATTR
4461.1Sniastatic __inline vis_f32
4471.1Sniavis_read_hi(vis_d64 var)
4481.1Snia{
4491.1Snia	vis_u64 reg = *((vis_u64 *)&var);
4501.1Snia	vis_u32 hi = (reg >> 32) & 0xffffffff;
4511.1Snia	vis_f32 out = *((vis_f32 *)&hi);
4521.1Snia	return out;
4531.1Snia}
4541.1Snia
4551.1Snia_VISATTR
4561.1Sniastatic __inline vis_f32
4571.1Sniavis_read_lo(vis_d64 var)
4581.1Snia{
4591.1Snia	vis_u64 reg = *((vis_u64 *)&var);
4601.1Snia	vis_u32 lo = reg & 0xffffffff;
4611.1Snia	vis_f32 out = *((vis_f32 *)&lo);
4621.1Snia	return out;
4631.1Snia}
4641.1Snia
4651.1Snia_VISATTR
4661.1Sniastatic __inline vis_d64
4671.1Sniavis_write_lo(vis_d64 in, vis_f32 lower)
4681.1Snia{
4691.1Snia	vis_u64 out = *((vis_u64 *)&in);
4701.1Snia	vis_u32 hi = (out >> 32) & 0xffffffff;
4711.1Snia	vis_u32 lo = *((vis_u32 *)&lower);
4721.1Snia
4731.1Snia	out = ((vis_u64)hi << 32ULL) | lo;
4741.1Snia	return *((vis_d64 *)&out);
4751.1Snia}
4761.1Snia
4771.1Snia_VISATTR
4781.1Sniastatic __inline vis_d64
4791.1Sniavis_write_hi(vis_d64 in, vis_f32 upper)
4801.1Snia{
4811.1Snia	vis_u64 out = *((vis_u64 *)&in);
4821.1Snia	vis_u32 hi = *((vis_u32 *)&upper);
4831.1Snia	vis_u32 lo = out & 0xffffffff;
4841.1Snia
4851.1Snia	out = ((vis_u64)hi << 32ULL) | lo;
4861.1Snia	return *((vis_d64 *)&out);
4871.1Snia}
4881.1Snia
4891.1Snia/* 4.3.3 Join two variables into a single */
4901.1Snia
4911.1Snia_VISATTR
4921.1Sniastatic __inline vis_d64
4931.1Sniavis_freg_pair(vis_f32 f1, vis_f32 f2)
4941.1Snia{
4951.1Snia	vis_u64 out;
4961.1Snia	vis_u32 r1 = *((vis_u32 *)&f1);
4971.1Snia	vis_u32 r2 = *((vis_u32 *)&f2);
4981.1Snia
4991.1Snia	out = ((vis_u64)r1 << 32ULL) | r2;
5001.1Snia	return *((vis_d64 *)&out);
5011.1Snia}
5021.1Snia
5031.1Snia/* 4.3.4 Place ints into FP register */
5041.1Snia
5051.1Snia_VISATTR
5061.1Sniastatic __inline vis_f32
5071.1Sniavis_to_float(vis_u32 data)
5081.1Snia{
5091.1Snia	return *((vis_f32 *)&data);
5101.1Snia}
5111.1Snia
5121.1Snia_VISATTR
5131.1Sniastatic __inline vis_d64
5141.1Sniavis_to_double(vis_u32 d1, vis_u32 d2)
5151.1Snia{
5161.1Snia	vis_u64 out;
5171.1Snia
5181.1Snia	out = ((vis_u64)d1 << 32ULL) | d2;
5191.1Snia	return *((vis_d64 *)&out);
5201.1Snia}
5211.1Snia
5221.1Snia_VISATTR
5231.1Sniastatic __inline vis_d64
5241.1Sniavis_to_double_dup(vis_u32 data)
5251.1Snia{
5261.1Snia	return vis_to_double(data, data);
5271.1Snia}
5281.1Snia
5291.1Snia_VISATTR
5301.1Sniastatic __inline vis_d64
5311.1Sniavis_ll_to_double(vis_u64 data)
5321.1Snia{
5331.1Snia	return *((vis_d64 *)&data);
5341.1Snia}
5351.1Snia
5361.1Snia/* 4.6.2 Arithmetic - multiplication */
5371.1Snia
5381.1Snia_VISATTR
5391.1Sniastatic __inline vis_d64
5401.1Sniavis_fmul8x16(vis_f32 pixels, vis_d64 scale)
5411.1Snia{
5421.1Snia	vis_d64 out;
5431.1Snia
5441.1Snia	__asm("fmul8x16 %1,%2,%0"
5451.1Snia	    : "=f"(out)
5461.1Snia	    : "f"(pixels), "f"(scale));
5471.1Snia	return out;
5481.1Snia}
5491.1Snia
5501.1Snia_VISATTR
5511.1Sniastatic __inline vis_d64
5521.1Sniavis_fmul8x16au(vis_f32 pixels, vis_f32 scale)
5531.1Snia{
5541.1Snia	vis_d64 out;
5551.1Snia
5561.1Snia	__asm("fmul8x16au %1,%2,%0"
5571.1Snia	    : "=f"(out)
5581.1Snia	    : "f"(pixels), "f"(scale));
5591.1Snia	return out;
5601.1Snia}
5611.1Snia
5621.1Snia_VISATTR
5631.1Sniastatic __inline vis_d64
5641.1Sniavis_fmul8x16al(vis_f32 pixels, vis_f32 scale)
5651.1Snia{
5661.1Snia	vis_d64 out;
5671.1Snia
5681.1Snia	__asm("fmul8x16al %1,%2,%0"
5691.1Snia	    : "=f"(out)
5701.1Snia	    : "f"(pixels), "f"(scale));
5711.1Snia	return out;
5721.1Snia}
5731.1Snia
5741.1Snia_VISATTR
5751.1Sniastatic __inline vis_d64
5761.1Sniavis_fmul8sux16(vis_d64 d1, vis_d64 d2)
5771.1Snia{
5781.1Snia	vis_d64 out;
5791.1Snia
5801.1Snia	__asm("fmul8sux16 %1,%2,%0"
5811.1Snia	    : "=f"(out)
5821.1Snia	    : "f"(d1), "f"(d2));
5831.1Snia	return out;
5841.1Snia}
5851.1Snia
5861.1Snia_VISATTR
5871.1Sniastatic __inline vis_d64
5881.1Sniavis_fmul8ulx16(vis_d64 d1, vis_d64 d2)
5891.1Snia{
5901.1Snia	vis_d64 out;
5911.1Snia
5921.1Snia	__asm("fmul8ulx16 %1,%2,%0"
5931.1Snia	    : "=f"(out)
5941.1Snia	    : "f"(d1), "f"(d2));
5951.1Snia	return out;
5961.1Snia}
5971.1Snia
5981.1Snia_VISATTR
5991.1Sniastatic __inline vis_d64
6001.1Sniavis_fmuld8sux16(vis_f32 d1, vis_f32 d2)
6011.1Snia{
6021.1Snia	vis_d64 out;
6031.1Snia
6041.1Snia	__asm("fmuld8sux16 %1,%2,%0"
6051.1Snia	    : "=f"(out)
6061.1Snia	    : "f"(d1), "f"(d2));
6071.1Snia	return out;
6081.1Snia}
6091.1Snia
6101.1Snia_VISATTR
6111.1Sniastatic __inline vis_d64
6121.1Sniavis_fmuld8ulx16(vis_f32 d1, vis_f32 d2)
6131.1Snia{
6141.1Snia	vis_d64 out;
6151.1Snia
6161.1Snia	__asm("fmuld8ulx16 %1,%2,%0"
6171.1Snia	    : "=f"(out)
6181.1Snia	    : "f"(d1), "f"(d2));
6191.1Snia	return out;
6201.1Snia}
6211.1Snia
6221.1Snia/* 4.5 Pixel compare */
6231.1Snia
6241.1Snia_VISATTR
6251.1Sniastatic __inline int
6261.1Sniavis_fcmpgt16(vis_d64 d1, vis_d64 d2)
6271.1Snia{
6281.1Snia	int out;
6291.1Snia
6301.1Snia	__asm("fcmpgt16 %1,%2,%0"
6311.1Snia	    : "=r"(out)
6321.1Snia	    : "f"(d1), "f"(d2));
6331.1Snia	return out;
6341.1Snia}
6351.1Snia
6361.1Snia_VISATTR
6371.1Sniastatic __inline int
6381.1Sniavis_fcmple16(vis_d64 d1, vis_d64 d2)
6391.1Snia{
6401.1Snia	int out;
6411.1Snia
6421.1Snia	__asm("fcmple16 %1,%2,%0"
6431.1Snia	    : "=r"(out)
6441.1Snia	    : "f"(d1), "f"(d2));
6451.1Snia	return out;
6461.1Snia}
6471.1Snia
6481.1Snia_VISATTR
6491.1Sniastatic __inline int
6501.1Sniavis_fcmpeq16(vis_d64 d1, vis_d64 d2)
6511.1Snia{
6521.1Snia	int out;
6531.1Snia
6541.1Snia	__asm("fcmpeq16 %1,%2,%0"
6551.1Snia	    : "=r"(out)
6561.1Snia	    : "f"(d1), "f"(d2));
6571.1Snia	return out;
6581.1Snia}
6591.1Snia
6601.1Snia_VISATTR
6611.1Sniastatic __inline int
6621.1Sniavis_fcmpne16(vis_d64 d1, vis_d64 d2)
6631.1Snia{
6641.1Snia	int out;
6651.1Snia
6661.1Snia	__asm("fcmpne16 %1,%2,%0"
6671.1Snia	    : "=r"(out)
6681.1Snia	    : "f"(d1), "f"(d2));
6691.1Snia	return out;
6701.1Snia}
6711.1Snia
6721.1Snia_VISATTR
6731.1Sniastatic __inline int
6741.1Sniavis_fcmpgt32(vis_d64 d1, vis_d64 d2)
6751.1Snia{
6761.1Snia	int out;
6771.1Snia
6781.1Snia	__asm("fcmpgt32 %1,%2,%0"
6791.1Snia	    : "=r"(out)
6801.1Snia	    : "f"(d1), "f"(d2));
6811.1Snia	return out;
6821.1Snia}
6831.1Snia
6841.1Snia_VISATTR
6851.1Sniastatic __inline int
6861.1Sniavis_fcmple32(vis_d64 d1, vis_d64 d2)
6871.1Snia{
6881.1Snia	int out;
6891.1Snia
6901.1Snia	__asm("fcmple32 %1,%2,%0"
6911.1Snia	    : "=r"(out)
6921.1Snia	    : "f"(d1), "f"(d2));
6931.1Snia	return out;
6941.1Snia}
6951.1Snia
6961.1Snia_VISATTR
6971.1Sniastatic __inline int
6981.1Sniavis_fcmpeq32(vis_d64 d1, vis_d64 d2)
6991.1Snia{
7001.1Snia	int out;
7011.1Snia
7021.1Snia	__asm("fcmpeq32 %1,%2,%0"
7031.1Snia	    : "=r"(out)
7041.1Snia	    : "f"(d1), "f"(d2));
7051.1Snia	return out;
7061.1Snia}
7071.1Snia
7081.1Snia_VISATTR
7091.1Sniastatic __inline int
7101.1Sniavis_fcmpne32(vis_d64 d1, vis_d64 d2)
7111.1Snia{
7121.1Snia	int out;
7131.1Snia
7141.1Snia	__asm("fcmpne32 %1,%2,%0"
7151.1Snia	    : "=r"(out)
7161.1Snia	    : "f"(d1), "f"(d2));
7171.1Snia	return out;
7181.1Snia}
7191.1Snia
7201.1Snia_VISATTR
7211.1Sniastatic __inline int
7221.1Sniavis_fcmplt16(vis_d64 d1, vis_d64 d2)
7231.1Snia{
7241.1Snia	return vis_fcmpgt16(d2, d1);
7251.1Snia}
7261.1Snia
7271.1Snia_VISATTR
7281.1Sniastatic __inline int
7291.1Sniavis_fcmpge16(vis_d64 d1, vis_d64 d2)
7301.1Snia{
7311.1Snia	return vis_fcmple16(d2, d1);
7321.1Snia}
7331.1Snia
7341.1Snia_VISATTR
7351.1Sniastatic __inline int
7361.1Sniavis_fcmplt32(vis_d64 d1, vis_d64 d2)
7371.1Snia{
7381.1Snia	return vis_fcmpgt32(d2, d1);
7391.1Snia}
7401.1Snia
7411.1Snia_VISATTR
7421.1Sniastatic __inline int
7431.1Sniavis_fcmpge32(vis_d64 d1, vis_d64 d2)
7441.1Snia{
7451.1Snia	return vis_fcmple32(d2, d1);
7461.1Snia}
7471.1Snia
7481.1Snia/* 4.10 Pixel distance */
7491.1Snia
7501.1Snia_VISATTR
7511.1Sniastatic __inline vis_d64
7521.1Sniavis_pdist(vis_d64 pixels1, vis_d64 pixels2, vis_d64 acc)
7531.1Snia{
7541.1Snia	__asm("pdist %1,%2,%0"
7551.1Snia	    : "+f"(acc)
7561.1Snia	    : "f"(pixels1), "f"(pixels2));
7571.1Snia
7581.1Snia	return acc;
7591.1Snia}
7601.1Snia
7611.1Snia/* 4.4.1 Logical instructions - fill variables */
7621.1Snia
7631.1Snia_VISATTR
7641.1Sniastatic __inline vis_d64
7651.1Sniavis_fzero(void)
7661.1Snia{
7671.1Snia	vis_d64 out;
7681.1Snia
7691.1Snia	__asm("fzero %0"
7701.1Snia	    : "=f"(out));
7711.1Snia	return out;
7721.1Snia}
7731.1Snia
7741.1Snia_VISATTR
7751.1Sniastatic __inline vis_d64
7761.1Sniavis_fone(void)
7771.1Snia{
7781.1Snia	vis_d64 out;
7791.1Snia
7801.1Snia	__asm("fone %0"
7811.1Snia	    : "=f"(out));
7821.1Snia	return out;
7831.1Snia}
7841.1Snia
7851.1Snia_VISATTR
7861.1Sniastatic __inline vis_f32
7871.1Sniavis_fzeros(void)
7881.1Snia{
7891.1Snia	vis_f32 out;
7901.1Snia
7911.1Snia	__asm("fzeros %0"
7921.1Snia	    : "=f"(out));
7931.1Snia	return out;
7941.1Snia}
7951.1Snia
7961.1Snia_VISATTR
7971.1Sniastatic __inline vis_f32
7981.1Sniavis_fones(void)
7991.1Snia{
8001.1Snia	vis_f32 out;
8011.1Snia
8021.1Snia	__asm("fones %0"
8031.1Snia	    : "=f"(out));
8041.1Snia	return out;
8051.1Snia}
8061.1Snia
8071.1Snia/* 4.4.2 Logical instructions - copies and complements */
8081.1Snia
8091.1Snia_VISATTR
8101.1Sniastatic __inline vis_d64
8111.1Sniavis_fsrc(vis_d64 r1)
8121.1Snia{
8131.1Snia	vis_d64 out;
8141.1Snia
8151.1Snia	__asm("fsrc1 %1,%0"
8161.1Snia	    : "=f"(out)
8171.1Snia	    : "f"(r1));
8181.1Snia	return out;
8191.1Snia}
8201.1Snia
8211.1Snia_VISATTR
8221.1Sniastatic __inline vis_d64
8231.1Sniavis_fnot(vis_d64 r1)
8241.1Snia{
8251.1Snia	vis_d64 out;
8261.1Snia
8271.1Snia	__asm("fnot1 %1,%0"
8281.1Snia	    : "=f"(out)
8291.1Snia	    : "f"(r1));
8301.1Snia	return out;
8311.1Snia}
8321.1Snia
8331.1Snia_VISATTR
8341.1Sniastatic __inline vis_f32
8351.1Sniavis_fsrcs(vis_f32 r1)
8361.1Snia{
8371.1Snia	vis_f32 out;
8381.1Snia
8391.1Snia	__asm("fsrc1s %1,%0"
8401.1Snia	    : "=f"(out)
8411.1Snia	    : "f"(r1));
8421.1Snia	return out;
8431.1Snia}
8441.1Snia
8451.1Snia_VISATTR
8461.1Sniastatic __inline vis_f32
8471.1Sniavis_fnots(vis_f32 r1)
8481.1Snia{
8491.1Snia	vis_f32 out;
8501.1Snia
8511.1Snia	__asm("fnot1s %1,%0"
8521.1Snia	    : "=f"(out)
8531.1Snia	    : "f"(r1));
8541.1Snia	return out;
8551.1Snia}
8561.1Snia
8571.1Snia/* 4.3 Logical instructions - bitwise */
8581.1Snia
8591.1Snia_VISATTR
8601.1Sniastatic __inline vis_d64
8611.1Sniavis_for(vis_d64 r1, vis_d64 r2)
8621.1Snia{
8631.1Snia	vis_d64 out;
8641.1Snia	__asm("for %1,%2,%0"
8651.1Snia	    : "=f"(out)
8661.1Snia	    : "f"(r1), "f"(r2));
8671.1Snia	return out;
8681.1Snia}
8691.1Snia
8701.1Snia_VISATTR
8711.1Sniastatic __inline vis_d64
8721.1Sniavis_fand(vis_d64 r1, vis_d64 r2)
8731.1Snia{
8741.1Snia	vis_d64 out;
8751.1Snia	__asm("fand %1,%2,%0"
8761.1Snia	    : "=f"(out)
8771.1Snia	    : "f"(r1), "f"(r2));
8781.1Snia	return out;
8791.1Snia}
8801.1Snia
8811.1Snia_VISATTR
8821.1Sniastatic __inline vis_d64
8831.1Sniavis_fxor(vis_d64 r1, vis_d64 r2)
8841.1Snia{
8851.1Snia	vis_d64 out;
8861.1Snia	__asm("fxor %1,%2,%0"
8871.1Snia	    : "=f"(out)
8881.1Snia	    : "f"(r1), "f"(r2));
8891.1Snia	return out;
8901.1Snia}
8911.1Snia
8921.1Snia_VISATTR
8931.1Sniastatic __inline vis_d64
8941.1Sniavis_fnor(vis_d64 r1, vis_d64 r2)
8951.1Snia{
8961.1Snia	vis_d64 out;
8971.1Snia	__asm("fnor %1,%2,%0"
8981.1Snia	    : "=f"(out)
8991.1Snia	    : "f"(r1), "f"(r2));
9001.1Snia	return out;
9011.1Snia}
9021.1Snia
9031.1Snia_VISATTR
9041.1Sniastatic __inline vis_d64
9051.1Sniavis_fnand(vis_d64 r1, vis_d64 r2)
9061.1Snia{
9071.1Snia	vis_d64 out;
9081.1Snia	__asm("fnand %1,%2,%0"
9091.1Snia	    : "=f"(out)
9101.1Snia	    : "f"(r1), "f"(r2));
9111.1Snia	return out;
9121.1Snia}
9131.1Snia
9141.1Snia_VISATTR
9151.1Sniastatic __inline vis_d64
9161.1Sniavis_fxnor(vis_d64 r1, vis_d64 r2)
9171.1Snia{
9181.1Snia	vis_d64 out;
9191.1Snia	__asm("fxnor %1,%2,%0"
9201.1Snia	    : "=f"(out)
9211.1Snia	    : "f"(r1), "f"(r2));
9221.1Snia	return out;
9231.1Snia}
9241.1Snia
9251.1Snia_VISATTR
9261.1Sniastatic __inline vis_d64
9271.1Sniavis_fornot(vis_d64 r1, vis_d64 r2)
9281.1Snia{
9291.1Snia	vis_d64 out;
9301.1Snia	__asm("fornot1 %1,%2,%0"
9311.1Snia	    : "=f"(out)
9321.1Snia	    : "f"(r1), "f"(r2));
9331.1Snia	return out;
9341.1Snia}
9351.1Snia
9361.1Snia_VISATTR
9371.1Sniastatic __inline vis_d64
9381.1Sniavis_fandnot(vis_d64 r1, vis_d64 r2)
9391.1Snia{
9401.1Snia	vis_d64 out;
9411.1Snia	__asm("fandnot1 %1,%2,%0"
9421.1Snia	    : "=f"(out)
9431.1Snia	    : "f"(r1), "f"(r2));
9441.1Snia	return out;
9451.1Snia}
9461.1Snia
9471.1Snia_VISATTR
9481.1Sniastatic __inline vis_f32
9491.1Sniavis_fors(vis_f32 r1, vis_f32 r2)
9501.1Snia{
9511.1Snia	vis_f32 out;
9521.1Snia	__asm("fors %1,%2,%0"
9531.1Snia	    : "=f"(out)
9541.1Snia	    : "f"(r1), "f"(r2));
9551.1Snia	return out;
9561.1Snia}
9571.1Snia
9581.1Snia_VISATTR
9591.1Sniastatic __inline vis_f32
9601.1Sniavis_fands(vis_f32 r1, vis_f32 r2)
9611.1Snia{
9621.1Snia	vis_f32 out;
9631.1Snia	__asm("fands %1,%2,%0"
9641.1Snia	    : "=f"(out)
9651.1Snia	    : "f"(r1), "f"(r2));
9661.1Snia	return out;
9671.1Snia}
9681.1Snia
9691.1Snia_VISATTR
9701.1Sniastatic __inline vis_f32
9711.1Sniavis_fxors(vis_f32 r1, vis_f32 r2)
9721.1Snia{
9731.1Snia	vis_f32 out;
9741.1Snia	__asm("fxors %1,%2,%0"
9751.1Snia	    : "=f"(out)
9761.1Snia	    : "f"(r1), "f"(r2));
9771.1Snia	return out;
9781.1Snia}
9791.1Snia
9801.1Snia_VISATTR
9811.1Sniastatic __inline vis_f32
9821.1Sniavis_fnors(vis_f32 r1, vis_f32 r2)
9831.1Snia{
9841.1Snia	vis_f32 out;
9851.1Snia	__asm("fnors %1,%2,%0"
9861.1Snia	    : "=f"(out)
9871.1Snia	    : "f"(r1), "f"(r2));
9881.1Snia	return out;
9891.1Snia}
9901.1Snia
9911.1Snia_VISATTR
9921.1Sniastatic __inline vis_f32
9931.1Sniavis_fnands(vis_f32 r1, vis_f32 r2)
9941.1Snia{
9951.1Snia	vis_f32 out;
9961.1Snia	__asm("fnands %1,%2,%0"
9971.1Snia	    : "=f"(out)
9981.1Snia	    : "f"(r1), "f"(r2));
9991.1Snia	return out;
10001.1Snia}
10011.1Snia
10021.1Snia_VISATTR
10031.1Sniastatic __inline vis_f32
10041.1Sniavis_fxnors(vis_f32 r1, vis_f32 r2)
10051.1Snia{
10061.1Snia	vis_f32 out;
10071.1Snia	__asm("fxnors %1,%2,%0"
10081.1Snia	    : "=f"(out)
10091.1Snia	    : "f"(r1), "f"(r2));
10101.1Snia	return out;
10111.1Snia}
10121.1Snia
10131.1Snia_VISATTR
10141.1Sniastatic __inline vis_f32
10151.1Sniavis_fornots(vis_f32 r1, vis_f32 r2)
10161.1Snia{
10171.1Snia	vis_f32 out;
10181.1Snia	__asm("fornot1s %1,%2,%0"
10191.1Snia	    : "=f"(out)
10201.1Snia	    : "f"(r1), "f"(r2));
10211.1Snia	return out;
10221.1Snia}
10231.1Snia
10241.1Snia_VISATTR
10251.1Sniastatic __inline vis_f32
10261.1Sniavis_fandnots(vis_f32 r1, vis_f32 r2)
10271.1Snia{
10281.1Snia	vis_f32 out;
10291.1Snia	__asm("fandnot1s %1,%2,%0"
10301.1Snia	    : "=f"(out)
10311.1Snia	    : "f"(r1), "f"(r2));
10321.1Snia	return out;
10331.1Snia}
10341.1Snia
10351.1Snia/* 4.8.1 Partial Stores */
10361.1Snia
10371.1Snia_VISATTR
10381.1Sniastatic __inline void
10391.1Sniavis_pst_8(vis_d64 data, void *addr, vis_u8 mask)
10401.1Snia{
10411.1Snia	__asm("stda %1,[%0]%2,0xc0"
10421.1Snia	    : "=r"(addr)
10431.1Snia	    : "f"(data), "r"(mask));
10441.1Snia}
10451.1Snia
10461.1Snia_VISATTR
10471.1Sniastatic __inline void
10481.1Sniavis_pst_16(vis_d64 data, void *addr, vis_u8 mask)
10491.1Snia{
10501.1Snia	__asm("stda %1,[%0]%2,0xc2"
10511.1Snia	    : "=r"(addr)
10521.1Snia	    : "f"(data), "r"(mask));
10531.1Snia}
10541.1Snia
10551.1Snia_VISATTR
10561.1Sniastatic __inline void
10571.1Sniavis_pst_32(vis_d64 data, void *addr, vis_u8 mask)
10581.1Snia{
10591.1Snia	__asm("stda %1,[%0]%2,0xc4"
10601.1Snia	    : "=r"(addr)
10611.1Snia	    : "f"(data), "r"(mask));
10621.1Snia}
10631.1Snia
10641.1Snia/* 4.8.2 Byte/Short Loads and Stores */
10651.1Snia
10661.1Snia_VISATTR
10671.1Sniastatic __inline void
10681.1Sniavis_st_u8(vis_u64 data, void *addr)
10691.1Snia{
10701.1Snia	__asm("stda %1,[%0]0xd0"
10711.1Snia	    : "=r"(addr)
10721.1Snia	    : "f"(data));
10731.1Snia}
10741.1Snia
10751.1Snia_VISATTR
10761.1Sniastatic __inline void
10771.1Sniavis_st_u8_le(vis_d64 data, void *addr)
10781.1Snia{
10791.1Snia	__asm("stda %1,[%0]0xd8"
10801.1Snia	    : "=r"(addr)
10811.1Snia	    : "f"(data));
10821.1Snia}
10831.1Snia
10841.1Snia_VISATTR
10851.1Sniastatic __inline void
10861.1Sniavis_st_u16(vis_d64 data, void *addr)
10871.1Snia{
10881.1Snia	__asm("stda %1,[%0]0xd2"
10891.1Snia	    : "=r"(addr)
10901.1Snia	    : "f"(data));
10911.1Snia}
10921.1Snia
10931.1Snia_VISATTR
10941.1Sniastatic __inline void
10951.1Sniavis_st_u16_le(vis_d64 data, void *addr)
10961.1Snia{
10971.1Snia	__asm("stda %1,[%0]0xda"
10981.1Snia	    : "=r"(addr)
10991.1Snia	    : "f"(data));
11001.1Snia}
11011.1Snia
11021.1Snia_VISATTR
11031.1Sniastatic __inline void
11041.1Sniavis_st_u8_i(vis_d64 data, void *addr, long idx)
11051.1Snia{
11061.1Snia	vis_u8 *ptr = addr;
11071.1Snia	vis_st_u8(data, ptr + idx);
11081.1Snia}
11091.1Snia
11101.1Snia_VISATTR
11111.1Sniastatic __inline void
11121.1Sniavis_st_u16_i(vis_d64 data, void *addr, long idx)
11131.1Snia{
11141.1Snia	vis_u8 *ptr = addr;
11151.1Snia	vis_st_u16(data, ptr + idx);
11161.1Snia}
11171.1Snia
11181.1Snia_VISATTR
11191.1Sniastatic __inline vis_d64
11201.1Sniavis_ld_u8(void *addr)
11211.1Snia{
11221.1Snia	vis_u8 val;
11231.1Snia	vis_d64 out;
11241.1Snia
11251.1Snia	val = *((vis_u8 *)addr);
11261.1Snia	*((vis_u8 *)&out) = val;
11271.1Snia
11281.1Snia	return out;
11291.1Snia}
11301.1Snia
11311.1Snia_VISATTR
11321.1Sniastatic __inline vis_d64
11331.1Sniavis_ld_u16(void *addr)
11341.1Snia{
11351.1Snia	vis_u16 val;
11361.1Snia	vis_d64 out;
11371.1Snia
11381.1Snia	val = *((vis_u16 *)addr);
11391.1Snia	*((vis_u16 *)&out) = val;
11401.1Snia
11411.1Snia	return out;
11421.1Snia}
11431.1Snia
11441.1Snia_VISATTR
11451.1Sniastatic __inline vis_d64
11461.1Sniavis_ld_u8_i(void *addr, long idx)
11471.1Snia{
11481.1Snia	vis_u8 *ptr = addr;
11491.1Snia	return vis_ld_u8(ptr + idx);
11501.1Snia}
11511.1Snia
11521.1Snia_VISATTR
11531.1Sniastatic __inline vis_d64
11541.1Sniavis_ld_u16_i(void *addr, long idx)
11551.1Snia{
11561.1Snia	vis_u8 *ptr = addr;
11571.1Snia	return vis_ld_u16(ptr + idx);
11581.1Snia}
11591.1Snia
11601.1Snia/*
11611.1Snia * VIS 2.0 instructions
11621.1Snia */
11631.1Snia
11641.1Snia_VISATTR
11651.1Sniastatic __inline vis_u32
11661.1Sniavis_read_bmask(void)
11671.1Snia{
11681.1Snia	vis_u32 out;
11691.1Snia
11701.1Snia	__asm("rd %%gsr,%0"
11711.1Snia	    "srlx %0,32,%0"
11721.1Snia	    : "+f"(out));
11731.1Snia	return out;
11741.1Snia}
11751.1Snia
11761.1Snia_VISATTR
11771.1Sniastatic __inline void
11781.1Sniavis_write_bmask(vis_u32 mask1, vis_u32 mask2)
11791.1Snia{
11801.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
11811.1Snia	(void)__builtin_vis_bmask(mask1, mask2);
11821.1Snia#else
11831.1Snia	vis_u32 out;
11841.1Snia
11851.1Snia	__asm("bmask %1,%2,%0"
11861.1Snia	    : "=r"(out)
11871.1Snia	    : "r"(mask1), "r"(mask2));
11881.1Snia
11891.1Snia	(void)out;
11901.1Snia#endif
11911.1Snia}
11921.1Snia
11931.1Snia_VISATTR
11941.1Sniastatic __inline vis_d64
11951.1Sniavis_bshuffle(vis_d64 pixels1, vis_d64 pixels2)
11961.1Snia{
11971.1Snia	vis_d64 out;
11981.1Snia
11991.1Snia	__asm("bshuffle %1,%2,%0"
12001.1Snia	    : "=f"(out)
12011.1Snia	    : "f"(pixels1), "f"(pixels2));
12021.1Snia	return out;
12031.1Snia}
12041.1Snia
12051.1Snia_VISATTR
12061.1Sniastatic __inline vis_s32
12071.1Sniavis_edge8n(void *a1, void *a2)
12081.1Snia{
12091.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12101.1Snia	return __builtin_vis_edge8n(a1, a2);
12111.1Snia#else
12121.1Snia	vis_s32 out;
12131.1Snia
12141.1Snia	__asm("edge8n %1,%2,%0"
12151.1Snia	    : "=r"(out)
12161.1Snia	    : "r"(a1), "r"(a2));
12171.1Snia	return out;
12181.1Snia#endif
12191.1Snia}
12201.1Snia
12211.1Snia_VISATTR
12221.1Sniastatic __inline vis_s32
12231.1Sniavis_edge16n(void *a1, void *a2)
12241.1Snia{
12251.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12261.1Snia	return __builtin_vis_edge16n(a1, a2);
12271.1Snia#else
12281.1Snia	vis_s32 out;
12291.1Snia
12301.1Snia	__asm("edge16n %1,%2,%0"
12311.1Snia	    : "=r"(out)
12321.1Snia	    : "r"(a1), "r"(a2));
12331.1Snia	return out;
12341.1Snia#endif
12351.1Snia}
12361.1Snia
12371.1Snia_VISATTR
12381.1Sniastatic __inline vis_s32
12391.1Sniavis_edge32n(void *a1, void *a2)
12401.1Snia{
12411.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12421.1Snia	return __builtin_vis_edge32n(a1, a2);
12431.1Snia#else
12441.1Snia	vis_s32 out;
12451.1Snia
12461.1Snia	__asm("edge32n %1,%2,%0"
12471.1Snia	    : "=r"(out)
12481.1Snia	    : "r"(a1), "r"(a2));
12491.1Snia	return out;
12501.1Snia#endif
12511.1Snia}
12521.1Snia
12531.1Snia_VISATTR
12541.1Sniastatic __inline vis_s32
12551.1Sniavis_edge8ln(void *a1, void *a2)
12561.1Snia{
12571.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12581.1Snia	return __builtin_vis_edge8ln(a1, a2);
12591.1Snia#else
12601.1Snia	vis_s32 out;
12611.1Snia
12621.1Snia	__asm("edge8ln %1,%2,%0"
12631.1Snia	    : "=r"(out)
12641.1Snia	    : "r"(a1), "r"(a2));
12651.1Snia	return out;
12661.1Snia#endif
12671.1Snia}
12681.1Snia
12691.1Snia_VISATTR
12701.1Sniastatic __inline vis_s32
12711.1Sniavis_edge16ln(void *a1, void *a2)
12721.1Snia{
12731.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12741.1Snia	return __builtin_vis_edge16ln(a1, a2);
12751.1Snia#else
12761.1Snia	vis_s32 out;
12771.1Snia
12781.1Snia	__asm("edge16ln %1,%2,%0"
12791.1Snia	    : "=r"(out)
12801.1Snia	    : "r"(a1), "r"(a2));
12811.1Snia	return out;
12821.1Snia#endif
12831.1Snia}
12841.1Snia
12851.1Snia_VISATTR
12861.1Sniastatic __inline vis_s32
12871.1Sniavis_edge32ln(void *a1, void *a2)
12881.1Snia{
12891.1Snia#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__)
12901.1Snia	return __builtin_vis_edge32ln(a1, a2);
12911.1Snia#else
12921.1Snia	vis_s32 out;
12931.1Snia
12941.1Snia	__asm("edge32ln %1,%2,%0"
12951.1Snia	    : "=r"(out)
12961.1Snia	    : "r"(a1), "r"(a2));
12971.1Snia	return out;
12981.1Snia#endif
12991.1Snia}
13001.1Snia
13011.1Snia#ifdef __cplusplus
13021.1Snia}
13031.1Snia#endif
13041.1Snia
13051.1Snia#endif
1306