1848b8605Smrg/*
2848b8605Smrg * Clip testing in SPARC assembly
3848b8605Smrg */
4848b8605Smrg
5848b8605Smrg#if __arch64__
6848b8605Smrg#define LDPTR		ldx
7848b8605Smrg#define V4F_DATA	0x00
8848b8605Smrg#define V4F_START	0x08
9848b8605Smrg#define V4F_COUNT	0x10
10848b8605Smrg#define V4F_STRIDE	0x14
11848b8605Smrg#define V4F_SIZE	0x18
12848b8605Smrg#define V4F_FLAGS	0x1c
13848b8605Smrg#else
14848b8605Smrg#define LDPTR		ld
15848b8605Smrg#define V4F_DATA	0x00
16848b8605Smrg#define V4F_START	0x04
17848b8605Smrg#define V4F_COUNT	0x08
18848b8605Smrg#define V4F_STRIDE	0x0c
19848b8605Smrg#define V4F_SIZE	0x10
20848b8605Smrg#define V4F_FLAGS	0x14
21848b8605Smrg#endif
22848b8605Smrg
23848b8605Smrg#define VEC_SIZE_1   	1
24848b8605Smrg#define VEC_SIZE_2   	3
25848b8605Smrg#define VEC_SIZE_3   	7
26848b8605Smrg#define VEC_SIZE_4   	15
27848b8605Smrg
28848b8605Smrg        .register %g2, #scratch
29848b8605Smrg        .register %g3, #scratch
30848b8605Smrg
31848b8605Smrg	.text
32848b8605Smrg	.align		64
33848b8605Smrg
34848b8605Smrgone_dot_zero:
35848b8605Smrg	.word		0x3f800000	/* 1.0f */
36848b8605Smrg
37848b8605Smrg	/* This trick is shamelessly stolen from the x86
38848b8605Smrg	 * Mesa asm.  Very clever, and we can do it too
39848b8605Smrg	 * since we have the necessary add with carry
40848b8605Smrg	 * instructions on Sparc.
41848b8605Smrg	 */
42848b8605Smrgclip_table:
43848b8605Smrg	.byte	 0,  1,  0,  2,  4,  5,  4,  6
44848b8605Smrg	.byte	 0,  1,  0,  2,  8,  9,  8, 10
45848b8605Smrg	.byte	32, 33, 32, 34, 36, 37, 36, 38
46848b8605Smrg	.byte	32, 33, 32, 34, 40, 41, 40, 42
47848b8605Smrg	.byte	 0,  1,  0,  2,  4,  5,  4,  6
48848b8605Smrg	.byte	 0,  1,  0,  2,  8,  9,  8, 10
49848b8605Smrg	.byte	16, 17, 16, 18, 20, 21, 20, 22
50848b8605Smrg	.byte	16, 17, 16, 18, 24, 25, 24, 26
51848b8605Smrg	.byte	63, 61, 63, 62, 55, 53, 55, 54
52848b8605Smrg	.byte	63, 61, 63, 62, 59, 57, 59, 58
53848b8605Smrg	.byte	47, 45, 47, 46, 39, 37, 39, 38
54848b8605Smrg	.byte	47, 45, 47, 46, 43, 41, 43, 42
55848b8605Smrg	.byte	63, 61, 63, 62, 55, 53, 55, 54
56848b8605Smrg	.byte	63, 61, 63, 62, 59, 57, 59, 58
57848b8605Smrg	.byte	31, 29, 31, 30, 23, 21, 23, 22
58848b8605Smrg	.byte	31, 29, 31, 30, 27, 25, 27, 26
59848b8605Smrg
60848b8605Smrg/* GLvector4f *clip_vec, GLvector4f *proj_vec,
61848b8605Smrg   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
62848b8605Smrg   GLboolean viewport_z_enable */
63848b8605Smrg
64848b8605Smrg	.align		64
65848b8605Smrg__pc_tramp:
66848b8605Smrg	retl
67848b8605Smrg	 nop
68848b8605Smrg
69848b8605Smrg	.globl		_mesa_sparc_cliptest_points4
70848b8605Smrg_mesa_sparc_cliptest_points4:
71848b8605Smrg	save		%sp, -64, %sp
72848b8605Smrg	call		__pc_tramp
73848b8605Smrg	 sub		%o7, (. - one_dot_zero - 4), %g1
74848b8605Smrg	ld		[%g1 + 0x0], %f4
75848b8605Smrg	add		%g1, 0x4, %g1
76848b8605Smrg
77848b8605Smrg	ld		[%i0 + V4F_STRIDE], %l1
78848b8605Smrg	ld		[%i0 + V4F_COUNT], %l3
79848b8605Smrg	LDPTR		[%i0 + V4F_START], %i0
80848b8605Smrg	LDPTR		[%i1 + V4F_START], %i5
81848b8605Smrg	ldub		[%i3], %g2
82848b8605Smrg	ldub		[%i4], %g3
83848b8605Smrg	sll		%g3, 8, %g3
84848b8605Smrg	or		%g2, %g3, %g2
85848b8605Smrg
86848b8605Smrg	ld		[%i1 + V4F_FLAGS], %g3
87848b8605Smrg	or		%g3, VEC_SIZE_4, %g3
88848b8605Smrg	st		%g3, [%i1 + V4F_FLAGS]
89848b8605Smrg	mov		3, %g3
90848b8605Smrg	st		%g3, [%i1 + V4F_SIZE]
91848b8605Smrg	st		%l3, [%i1 + V4F_COUNT]
92848b8605Smrg	clr		%l2
93848b8605Smrg	clr		%l0
94848b8605Smrg
95848b8605Smrg	/* l0:	i
96848b8605Smrg	 * l3:	count
97848b8605Smrg	 * l1:	stride
98848b8605Smrg	 * l2:	c
99848b8605Smrg	 * g2:	(tmpAndMask << 8) | tmpOrMask
100848b8605Smrg	 * g1:	clip_table
101848b8605Smrg	 * i0:	from[stride][i]
102848b8605Smrg	 * i2:	clipMask
103848b8605Smrg	 * i5:	vProj[4][i]
104848b8605Smrg	 */
105848b8605Smrg
106848b8605Smrg1:	ld		[%i0 + 0x0c], %f3	! LSU	Group
107848b8605Smrg	ld		[%i0 + 0x0c], %g5	! LSU	Group
108848b8605Smrg	ld		[%i0 + 0x08], %g4	! LSU	Group
109848b8605Smrg	fdivs		%f4, %f3, %f8		! FGM
110848b8605Smrg	addcc		%g5, %g5, %g5		! IEU1	Group
111848b8605Smrg	addx		%g0, 0x0, %g3		! IEU1	Group
112848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
113848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
114848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
115848b8605Smrg	ld		[%i0 + 0x04], %g4	! LSU	Group
116848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
117848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
118848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
119848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
120848b8605Smrg	ld		[%i0 + 0x00], %g4	! LSU	Group
121848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
122848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
123848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
124848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
125848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
126848b8605Smrg	ldub		[%g1 + %g3], %g3	! LSU	Group
127848b8605Smrg	cmp		%g3, 0			! IEU1	Group, stall
128848b8605Smrg	be		2f			! CTI
129848b8605Smrg	 stb		%g3, [%i2]		! LSU
130848b8605Smrg	sll		%g3, 8, %g4		! IEU1	Group
131848b8605Smrg	add		%l2, 1, %l2		! IEU0
132848b8605Smrg	st		%g0, [%i5 + 0x00]	! LSU
133848b8605Smrg	or		%g4, 0xff, %g4		! IEU0	Group
134848b8605Smrg	or		%g2, %g3, %g2		! IEU1
135848b8605Smrg	st		%g0, [%i5 + 0x04]	! LSU
136848b8605Smrg	and		%g2, %g4, %g2		! IEU0	Group
137848b8605Smrg	st		%g0, [%i5 + 0x08]	! LSU
138848b8605Smrg	b		3f			! CTI
139848b8605Smrg	 st		%f4, [%i5 + 0x0c]	! LSU	Group
140848b8605Smrg2:	ld		[%i0 + 0x00], %f0	! LSU	Group
141848b8605Smrg	ld		[%i0 + 0x04], %f1	! LSU	Group
142848b8605Smrg	ld		[%i0 + 0x08], %f2	! LSU	Group
143848b8605Smrg	fmuls		%f0, %f8, %f0		! FGM
144848b8605Smrg	st		%f0, [%i5 + 0x00]	! LSU	Group
145848b8605Smrg	fmuls		%f1, %f8, %f1		! FGM
146848b8605Smrg	st		%f1, [%i5 + 0x04]	! LSU	Group
147848b8605Smrg	fmuls		%f2, %f8, %f2		! FGM
148848b8605Smrg	st		%f2, [%i5 + 0x08]	! LSU	Group
149848b8605Smrg	st		%f8, [%i5 + 0x0c]	! LSU	Group
150848b8605Smrg3:	add		%i5, 0x10, %i5		! IEU1
151848b8605Smrg	add		%l0, 1, %l0		! IEU0	Group
152848b8605Smrg	add		%i2, 1, %i2		! IEU0	Group
153848b8605Smrg	cmp		%l0, %l3		! IEU1	Group
154848b8605Smrg	bne		1b			! CTI
155848b8605Smrg	 add		%i0, %l1, %i0		! IEU0	Group
156848b8605Smrg	stb		%g2, [%i3]		! LSU
157848b8605Smrg	srl		%g2, 8, %g3		! IEU0	Group
158848b8605Smrg	cmp		%l2, %l3		! IEU1	Group
159848b8605Smrg	bl,a		1f			! CTI
160848b8605Smrg	 clr		%g3			! IEU0
161848b8605Smrg1:	stb		%g3, [%i4]		! LSU	Group
162848b8605Smrg	ret					! CTI	Group
163848b8605Smrg	 restore	%i1, 0x0, %o0
164848b8605Smrg
165848b8605Smrg	.globl		_mesa_sparc_cliptest_points4_np
166848b8605Smrg_mesa_sparc_cliptest_points4_np:
167848b8605Smrg	save		%sp, -64, %sp
168848b8605Smrg
169848b8605Smrg	call		__pc_tramp
170848b8605Smrg	 sub		%o7, (. - one_dot_zero - 4), %g1
171848b8605Smrg	add		%g1, 0x4, %g1
172848b8605Smrg
173848b8605Smrg	ld		[%i0 + V4F_STRIDE], %l1
174848b8605Smrg	ld		[%i0 + V4F_COUNT], %l3
175848b8605Smrg	LDPTR		[%i0 + V4F_START], %i0
176848b8605Smrg	ldub		[%i3], %g2
177848b8605Smrg	ldub		[%i4], %g3
178848b8605Smrg	sll		%g3, 8, %g3
179848b8605Smrg	or		%g2, %g3, %g2
180848b8605Smrg
181848b8605Smrg	clr		%l2
182848b8605Smrg	clr		%l0
183848b8605Smrg
184848b8605Smrg	/* l0:	i
185848b8605Smrg	 * l3:	count
186848b8605Smrg	 * l1:	stride
187848b8605Smrg	 * l2:	c
188848b8605Smrg	 * g2:	(tmpAndMask << 8) | tmpOrMask
189848b8605Smrg	 * g1:	clip_table
190848b8605Smrg	 * i0:	from[stride][i]
191848b8605Smrg	 * i2:	clipMask
192848b8605Smrg	 */
193848b8605Smrg
194848b8605Smrg1:	ld		[%i0 + 0x0c], %g5	! LSU	Group
195848b8605Smrg	ld		[%i0 + 0x08], %g4	! LSU	Group
196848b8605Smrg	addcc		%g5, %g5, %g5		! IEU1	Group
197848b8605Smrg	addx		%g0, 0x0, %g3		! IEU1	Group
198848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
199848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
200848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
201848b8605Smrg	ld		[%i0 + 0x04], %g4	! LSU	Group
202848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
203848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
204848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
205848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
206848b8605Smrg	ld		[%i0 + 0x00], %g4	! LSU	Group
207848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
208848b8605Smrg	addcc		%g4, %g4, %g4		! IEU1	Group
209848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
210848b8605Smrg	subcc		%g5, %g4, %g0		! IEU1	Group
211848b8605Smrg	addx		%g3, %g3, %g3		! IEU1	Group
212848b8605Smrg	ldub		[%g1 + %g3], %g3	! LSU	Group
213848b8605Smrg	cmp		%g3, 0			! IEU1	Group, stall
214848b8605Smrg	be		2f			! CTI
215848b8605Smrg	 stb		%g3, [%i2]		! LSU
216848b8605Smrg	sll		%g3, 8, %g4		! IEU1	Group
217848b8605Smrg	add		%l2, 1, %l2		! IEU0
218848b8605Smrg	or		%g4, 0xff, %g4		! IEU0	Group
219848b8605Smrg	or		%g2, %g3, %g2		! IEU1
220848b8605Smrg	and		%g2, %g4, %g2		! IEU0	Group
221848b8605Smrg2:	add		%l0, 1, %l0		! IEU0	Group
222848b8605Smrg	add		%i2, 1, %i2		! IEU0	Group
223848b8605Smrg	cmp		%l0, %l3		! IEU1	Group
224848b8605Smrg	bne		1b			! CTI
225848b8605Smrg	 add		%i0, %l1, %i0		! IEU0	Group
226848b8605Smrg	stb		%g2, [%i3]		! LSU
227848b8605Smrg	srl		%g2, 8, %g3		! IEU0	Group
228848b8605Smrg	cmp		%l2, %l3		! IEU1	Group
229848b8605Smrg	bl,a		1f			! CTI
230848b8605Smrg	 clr		%g3			! IEU0
231848b8605Smrg1:	stb		%g3, [%i4]		! LSU	Group
232848b8605Smrg	ret					! CTI	Group
233848b8605Smrg	 restore	%i1, 0x0, %o0
234