1af69d88dSmrg/*
2af69d88dSmrg * Clip testing in SPARC assembly
3af69d88dSmrg */
4af69d88dSmrg
5af69d88dSmrg#if __arch64__
6af69d88dSmrg#define LDPTR		ldx
77ec681f3Smrg#define MATH_ASM_PTR_SIZE 8
87ec681f3Smrg#include "math/m_vector_asm.h"
9af69d88dSmrg#else
10af69d88dSmrg#define LDPTR		ld
117ec681f3Smrg#define MATH_ASM_PTR_SIZE 4
127ec681f3Smrg#include "math/m_vector_asm.h"
13af69d88dSmrg#endif
14af69d88dSmrg
15af69d88dSmrg        .register %g2, #scratch
16af69d88dSmrg        .register %g3, #scratch
17af69d88dSmrg
18af69d88dSmrg	.text
19af69d88dSmrg	.align		64
20af69d88dSmrg
21af69d88dSmrgone_dot_zero:
22af69d88dSmrg	.word		0x3f800000	/* 1.0f */
23af69d88dSmrg
24af69d88dSmrg	/* This trick is shamelessly stolen from the x86
25af69d88dSmrg	 * Mesa asm.  Very clever, and we can do it too
26af69d88dSmrg	 * since we have the necessary add with carry
27af69d88dSmrg	 * instructions on Sparc.
28af69d88dSmrg	 */
29af69d88dSmrgclip_table:
30af69d88dSmrg	.byte	 0,  1,  0,  2,  4,  5,  4,  6
31af69d88dSmrg	.byte	 0,  1,  0,  2,  8,  9,  8, 10
32af69d88dSmrg	.byte	32, 33, 32, 34, 36, 37, 36, 38
33af69d88dSmrg	.byte	32, 33, 32, 34, 40, 41, 40, 42
34af69d88dSmrg	.byte	 0,  1,  0,  2,  4,  5,  4,  6
35af69d88dSmrg	.byte	 0,  1,  0,  2,  8,  9,  8, 10
36af69d88dSmrg	.byte	16, 17, 16, 18, 20, 21, 20, 22
37af69d88dSmrg	.byte	16, 17, 16, 18, 24, 25, 24, 26
38af69d88dSmrg	.byte	63, 61, 63, 62, 55, 53, 55, 54
39af69d88dSmrg	.byte	63, 61, 63, 62, 59, 57, 59, 58
40af69d88dSmrg	.byte	47, 45, 47, 46, 39, 37, 39, 38
41af69d88dSmrg	.byte	47, 45, 47, 46, 43, 41, 43, 42
42af69d88dSmrg	.byte	63, 61, 63, 62, 55, 53, 55, 54
43af69d88dSmrg	.byte	63, 61, 63, 62, 59, 57, 59, 58
44af69d88dSmrg	.byte	31, 29, 31, 30, 23, 21, 23, 22
45af69d88dSmrg	.byte	31, 29, 31, 30, 27, 25, 27, 26
46af69d88dSmrg
47af69d88dSmrg/* GLvector4f *clip_vec, GLvector4f *proj_vec,
48af69d88dSmrg   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
49af69d88dSmrg   GLboolean viewport_z_enable */
50af69d88dSmrg
51af69d88dSmrg	.align		64
52af69d88dSmrg__pc_tramp:
53af69d88dSmrg	retl
54af69d88dSmrg	 nop
55af69d88dSmrg
56af69d88dSmrg	.globl		_mesa_sparc_cliptest_points4
57af69d88dSmrg_mesa_sparc_cliptest_points4:
58af69d88dSmrg	save		%sp, -64, %sp
59af69d88dSmrg	call		__pc_tramp
60af69d88dSmrg	 sub		%o7, (. - one_dot_zero - 4), %g1
61af69d88dSmrg	ld		[%g1 + 0x0], %f4
62af69d88dSmrg	add		%g1, 0x4, %g1
63af69d88dSmrg
64af69d88dSmrg	ld		[%i0 + V4F_STRIDE], %l1
65af69d88dSmrg	ld		[%i0 + V4F_COUNT], %l3
66af69d88dSmrg	LDPTR		[%i0 + V4F_START], %i0
67af69d88dSmrg	LDPTR		[%i1 + V4F_START], %i5
68af69d88dSmrg	ldub		[%i3], %g2
69af69d88dSmrg	ldub		[%i4], %g3
70af69d88dSmrg	sll		%g3, 8, %g3
71af69d88dSmrg	or		%g2, %g3, %g2
72af69d88dSmrg
73af69d88dSmrg	ld		[%i1 + V4F_FLAGS], %g3
74af69d88dSmrg	or		%g3, VEC_SIZE_4, %g3
75af69d88dSmrg	st		%g3, [%i1 + V4F_FLAGS]
76af69d88dSmrg	mov		3, %g3
77af69d88dSmrg	st		%g3, [%i1 + V4F_SIZE]
78af69d88dSmrg	st		%l3, [%i1 + V4F_COUNT]
79af69d88dSmrg	clr		%l2
80af69d88dSmrg	clr		%l0
81af69d88dSmrg
82af69d88dSmrg	/* l0:	i
83af69d88dSmrg	 * l3:	count
84af69d88dSmrg	 * l1:	stride
85af69d88dSmrg	 * l2:	c
86af69d88dSmrg	 * g2:	(tmpAndMask << 8) | tmpOrMask
87af69d88dSmrg	 * g1:	clip_table
88af69d88dSmrg	 * i0:	from[stride][i]
89af69d88dSmrg	 * i2:	clipMask
90af69d88dSmrg	 * i5:	vProj[4][i]
91af69d88dSmrg	 */
92af69d88dSmrg
93af69d88dSmrg1:	ld		[%i0 + 0x0c], %f3	! LSU	Group
94af69d88dSmrg	ld		[%i0 + 0x0c], %g5	! LSU	Group
95af69d88dSmrg	ld		[%i0 + 0x08], %g4	! LSU	Group
96af69d88dSmrg	fdivs		%f4, %f3, %f8		! FGM
97af69d88dSmrg	addcc		%g5, %g5, %g5		! IEU1	Group
98af69d88dSmrg	addx		%g0, 0x0, %g3		! IEU1	Group
99af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
100af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
101af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
102af69d88dSmrg	ld		[%i0 + 0x04], %g4	! LSU	Group
103af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
104af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
105af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
106af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
107af69d88dSmrg	ld		[%i0 + 0x00], %g4	! LSU	Group
108af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
109af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
110af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
111af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
112af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
113af69d88dSmrg	ldub		[%g1 + %g3], %g3	! LSU	Group
114af69d88dSmrg	cmp		%g3, 0			! IEU1	Group, stall
115af69d88dSmrg	be		2f			! CTI
116af69d88dSmrg	 stb		%g3, [%i2]		! LSU
117af69d88dSmrg	sll		%g3, 8, %g4		! IEU1	Group
118af69d88dSmrg	add		%l2, 1, %l2		! IEU0
119af69d88dSmrg	st		%g0, [%i5 + 0x00]	! LSU
120af69d88dSmrg	or		%g4, 0xff, %g4		! IEU0	Group
121af69d88dSmrg	or		%g2, %g3, %g2		! IEU1
122af69d88dSmrg	st		%g0, [%i5 + 0x04]	! LSU
123af69d88dSmrg	and		%g2, %g4, %g2		! IEU0	Group
124af69d88dSmrg	st		%g0, [%i5 + 0x08]	! LSU
125af69d88dSmrg	b		3f			! CTI
126af69d88dSmrg	 st		%f4, [%i5 + 0x0c]	! LSU	Group
127af69d88dSmrg2:	ld		[%i0 + 0x00], %f0	! LSU	Group
128af69d88dSmrg	ld		[%i0 + 0x04], %f1	! LSU	Group
129af69d88dSmrg	ld		[%i0 + 0x08], %f2	! LSU	Group
130af69d88dSmrg	fmuls		%f0, %f8, %f0		! FGM
131af69d88dSmrg	st		%f0, [%i5 + 0x00]	! LSU	Group
132af69d88dSmrg	fmuls		%f1, %f8, %f1		! FGM
133af69d88dSmrg	st		%f1, [%i5 + 0x04]	! LSU	Group
134af69d88dSmrg	fmuls		%f2, %f8, %f2		! FGM
135af69d88dSmrg	st		%f2, [%i5 + 0x08]	! LSU	Group
136af69d88dSmrg	st		%f8, [%i5 + 0x0c]	! LSU	Group
137af69d88dSmrg3:	add		%i5, 0x10, %i5		! IEU1
138af69d88dSmrg	add		%l0, 1, %l0		! IEU0	Group
139af69d88dSmrg	add		%i2, 1, %i2		! IEU0	Group
140af69d88dSmrg	cmp		%l0, %l3		! IEU1	Group
141af69d88dSmrg	bne		1b			! CTI
142af69d88dSmrg	 add		%i0, %l1, %i0		! IEU0	Group
143af69d88dSmrg	stb		%g2, [%i3]		! LSU
144af69d88dSmrg	srl		%g2, 8, %g3		! IEU0	Group
145af69d88dSmrg	cmp		%l2, %l3		! IEU1	Group
146af69d88dSmrg	bl,a		1f			! CTI
147af69d88dSmrg	 clr		%g3			! IEU0
148af69d88dSmrg1:	stb		%g3, [%i4]		! LSU	Group
149af69d88dSmrg	ret					! CTI	Group
150af69d88dSmrg	 restore	%i1, 0x0, %o0
151af69d88dSmrg
152af69d88dSmrg	.globl		_mesa_sparc_cliptest_points4_np
153af69d88dSmrg_mesa_sparc_cliptest_points4_np:
154af69d88dSmrg	save		%sp, -64, %sp
155af69d88dSmrg
156af69d88dSmrg	call		__pc_tramp
157af69d88dSmrg	 sub		%o7, (. - one_dot_zero - 4), %g1
158af69d88dSmrg	add		%g1, 0x4, %g1
159af69d88dSmrg
160af69d88dSmrg	ld		[%i0 + V4F_STRIDE], %l1
161af69d88dSmrg	ld		[%i0 + V4F_COUNT], %l3
162af69d88dSmrg	LDPTR		[%i0 + V4F_START], %i0
163af69d88dSmrg	ldub		[%i3], %g2
164af69d88dSmrg	ldub		[%i4], %g3
165af69d88dSmrg	sll		%g3, 8, %g3
166af69d88dSmrg	or		%g2, %g3, %g2
167af69d88dSmrg
168af69d88dSmrg	clr		%l2
169af69d88dSmrg	clr		%l0
170af69d88dSmrg
171af69d88dSmrg	/* l0:	i
172af69d88dSmrg	 * l3:	count
173af69d88dSmrg	 * l1:	stride
174af69d88dSmrg	 * l2:	c
175af69d88dSmrg	 * g2:	(tmpAndMask << 8) | tmpOrMask
176af69d88dSmrg	 * g1:	clip_table
177af69d88dSmrg	 * i0:	from[stride][i]
178af69d88dSmrg	 * i2:	clipMask
179af69d88dSmrg	 */
180af69d88dSmrg
181af69d88dSmrg1:	ld		[%i0 + 0x0c], %g5	! LSU	Group
182af69d88dSmrg	ld		[%i0 + 0x08], %g4	! LSU	Group
183af69d88dSmrg	addcc		%g5, %g5, %g5		! IEU1	Group
184af69d88dSmrg	addx		%g0, 0x0, %g3		! IEU1	Group
185af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
186af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
187af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
188af69d88dSmrg	ld		[%i0 + 0x04], %g4	! LSU	Group
189af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
190af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
191af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
192af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
193af69d88dSmrg	ld		[%i0 + 0x00], %g4	! LSU	Group
194af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
195af69d88dSmrg	addcc		%g4, %g4, %g4		! IEU1	Group
196af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
197af69d88dSmrg	subcc		%g5, %g4, %g0		! IEU1	Group
198af69d88dSmrg	addx		%g3, %g3, %g3		! IEU1	Group
199af69d88dSmrg	ldub		[%g1 + %g3], %g3	! LSU	Group
200af69d88dSmrg	cmp		%g3, 0			! IEU1	Group, stall
201af69d88dSmrg	be		2f			! CTI
202af69d88dSmrg	 stb		%g3, [%i2]		! LSU
203af69d88dSmrg	sll		%g3, 8, %g4		! IEU1	Group
204af69d88dSmrg	add		%l2, 1, %l2		! IEU0
205af69d88dSmrg	or		%g4, 0xff, %g4		! IEU0	Group
206af69d88dSmrg	or		%g2, %g3, %g2		! IEU1
207af69d88dSmrg	and		%g2, %g4, %g2		! IEU0	Group
208af69d88dSmrg2:	add		%l0, 1, %l0		! IEU0	Group
209af69d88dSmrg	add		%i2, 1, %i2		! IEU0	Group
210af69d88dSmrg	cmp		%l0, %l3		! IEU1	Group
211af69d88dSmrg	bne		1b			! CTI
212af69d88dSmrg	 add		%i0, %l1, %i0		! IEU0	Group
213af69d88dSmrg	stb		%g2, [%i3]		! LSU
214af69d88dSmrg	srl		%g2, 8, %g3		! IEU0	Group
215af69d88dSmrg	cmp		%l2, %l3		! IEU1	Group
216af69d88dSmrg	bl,a		1f			! CTI
217af69d88dSmrg	 clr		%g3			! IEU0
218af69d88dSmrg1:	stb		%g3, [%i4]		! LSU	Group
219af69d88dSmrg	ret					! CTI	Group
220af69d88dSmrg	 restore	%i1, 0x0, %o0
221