1848b8605Smrg/* 2848b8605Smrg * Clip testing in SPARC assembly 3848b8605Smrg */ 4848b8605Smrg 5848b8605Smrg#if __arch64__ 6848b8605Smrg#define LDPTR ldx 7848b8605Smrg#define V4F_DATA 0x00 8848b8605Smrg#define V4F_START 0x08 9848b8605Smrg#define V4F_COUNT 0x10 10848b8605Smrg#define V4F_STRIDE 0x14 11848b8605Smrg#define V4F_SIZE 0x18 12848b8605Smrg#define V4F_FLAGS 0x1c 13848b8605Smrg#else 14848b8605Smrg#define LDPTR ld 15848b8605Smrg#define V4F_DATA 0x00 16848b8605Smrg#define V4F_START 0x04 17848b8605Smrg#define V4F_COUNT 0x08 18848b8605Smrg#define V4F_STRIDE 0x0c 19848b8605Smrg#define V4F_SIZE 0x10 20848b8605Smrg#define V4F_FLAGS 0x14 21848b8605Smrg#endif 22848b8605Smrg 23848b8605Smrg#define VEC_SIZE_1 1 24848b8605Smrg#define VEC_SIZE_2 3 25848b8605Smrg#define VEC_SIZE_3 7 26848b8605Smrg#define VEC_SIZE_4 15 27848b8605Smrg 28848b8605Smrg .register %g2, #scratch 29848b8605Smrg .register %g3, #scratch 30848b8605Smrg 31848b8605Smrg .text 32848b8605Smrg .align 64 33848b8605Smrg 34848b8605Smrgone_dot_zero: 35848b8605Smrg .word 0x3f800000 /* 1.0f */ 36848b8605Smrg 37848b8605Smrg /* This trick is shamelessly stolen from the x86 38848b8605Smrg * Mesa asm. Very clever, and we can do it too 39848b8605Smrg * since we have the necessary add with carry 40848b8605Smrg * instructions on Sparc. 41848b8605Smrg */ 42848b8605Smrgclip_table: 43848b8605Smrg .byte 0, 1, 0, 2, 4, 5, 4, 6 44848b8605Smrg .byte 0, 1, 0, 2, 8, 9, 8, 10 45848b8605Smrg .byte 32, 33, 32, 34, 36, 37, 36, 38 46848b8605Smrg .byte 32, 33, 32, 34, 40, 41, 40, 42 47848b8605Smrg .byte 0, 1, 0, 2, 4, 5, 4, 6 48848b8605Smrg .byte 0, 1, 0, 2, 8, 9, 8, 10 49848b8605Smrg .byte 16, 17, 16, 18, 20, 21, 20, 22 50848b8605Smrg .byte 16, 17, 16, 18, 24, 25, 24, 26 51848b8605Smrg .byte 63, 61, 63, 62, 55, 53, 55, 54 52848b8605Smrg .byte 63, 61, 63, 62, 59, 57, 59, 58 53848b8605Smrg .byte 47, 45, 47, 46, 39, 37, 39, 38 54848b8605Smrg .byte 47, 45, 47, 46, 43, 41, 43, 42 55848b8605Smrg .byte 63, 61, 63, 62, 55, 53, 55, 54 56848b8605Smrg .byte 63, 61, 63, 62, 59, 57, 59, 58 57848b8605Smrg .byte 31, 29, 31, 30, 23, 21, 23, 22 58848b8605Smrg .byte 31, 29, 31, 30, 27, 25, 27, 26 59848b8605Smrg 60848b8605Smrg/* GLvector4f *clip_vec, GLvector4f *proj_vec, 61848b8605Smrg GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, 62848b8605Smrg GLboolean viewport_z_enable */ 63848b8605Smrg 64848b8605Smrg .align 64 65848b8605Smrg__pc_tramp: 66848b8605Smrg retl 67848b8605Smrg nop 68848b8605Smrg 69848b8605Smrg .globl _mesa_sparc_cliptest_points4 70848b8605Smrg_mesa_sparc_cliptest_points4: 71848b8605Smrg save %sp, -64, %sp 72848b8605Smrg call __pc_tramp 73848b8605Smrg sub %o7, (. - one_dot_zero - 4), %g1 74848b8605Smrg ld [%g1 + 0x0], %f4 75848b8605Smrg add %g1, 0x4, %g1 76848b8605Smrg 77848b8605Smrg ld [%i0 + V4F_STRIDE], %l1 78848b8605Smrg ld [%i0 + V4F_COUNT], %l3 79848b8605Smrg LDPTR [%i0 + V4F_START], %i0 80848b8605Smrg LDPTR [%i1 + V4F_START], %i5 81848b8605Smrg ldub [%i3], %g2 82848b8605Smrg ldub [%i4], %g3 83848b8605Smrg sll %g3, 8, %g3 84848b8605Smrg or %g2, %g3, %g2 85848b8605Smrg 86848b8605Smrg ld [%i1 + V4F_FLAGS], %g3 87848b8605Smrg or %g3, VEC_SIZE_4, %g3 88848b8605Smrg st %g3, [%i1 + V4F_FLAGS] 89848b8605Smrg mov 3, %g3 90848b8605Smrg st %g3, [%i1 + V4F_SIZE] 91848b8605Smrg st %l3, [%i1 + V4F_COUNT] 92848b8605Smrg clr %l2 93848b8605Smrg clr %l0 94848b8605Smrg 95848b8605Smrg /* l0: i 96848b8605Smrg * l3: count 97848b8605Smrg * l1: stride 98848b8605Smrg * l2: c 99848b8605Smrg * g2: (tmpAndMask << 8) | tmpOrMask 100848b8605Smrg * g1: clip_table 101848b8605Smrg * i0: from[stride][i] 102848b8605Smrg * i2: clipMask 103848b8605Smrg * i5: vProj[4][i] 104848b8605Smrg */ 105848b8605Smrg 106848b8605Smrg1: ld [%i0 + 0x0c], %f3 ! LSU Group 107848b8605Smrg ld [%i0 + 0x0c], %g5 ! LSU Group 108848b8605Smrg ld [%i0 + 0x08], %g4 ! LSU Group 109848b8605Smrg fdivs %f4, %f3, %f8 ! FGM 110848b8605Smrg addcc %g5, %g5, %g5 ! IEU1 Group 111848b8605Smrg addx %g0, 0x0, %g3 ! IEU1 Group 112848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 113848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 114848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 115848b8605Smrg ld [%i0 + 0x04], %g4 ! LSU Group 116848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 117848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 118848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 119848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 120848b8605Smrg ld [%i0 + 0x00], %g4 ! LSU Group 121848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 122848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 123848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 124848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 125848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 126848b8605Smrg ldub [%g1 + %g3], %g3 ! LSU Group 127848b8605Smrg cmp %g3, 0 ! IEU1 Group, stall 128848b8605Smrg be 2f ! CTI 129848b8605Smrg stb %g3, [%i2] ! LSU 130848b8605Smrg sll %g3, 8, %g4 ! IEU1 Group 131848b8605Smrg add %l2, 1, %l2 ! IEU0 132848b8605Smrg st %g0, [%i5 + 0x00] ! LSU 133848b8605Smrg or %g4, 0xff, %g4 ! IEU0 Group 134848b8605Smrg or %g2, %g3, %g2 ! IEU1 135848b8605Smrg st %g0, [%i5 + 0x04] ! LSU 136848b8605Smrg and %g2, %g4, %g2 ! IEU0 Group 137848b8605Smrg st %g0, [%i5 + 0x08] ! LSU 138848b8605Smrg b 3f ! CTI 139848b8605Smrg st %f4, [%i5 + 0x0c] ! LSU Group 140848b8605Smrg2: ld [%i0 + 0x00], %f0 ! LSU Group 141848b8605Smrg ld [%i0 + 0x04], %f1 ! LSU Group 142848b8605Smrg ld [%i0 + 0x08], %f2 ! LSU Group 143848b8605Smrg fmuls %f0, %f8, %f0 ! FGM 144848b8605Smrg st %f0, [%i5 + 0x00] ! LSU Group 145848b8605Smrg fmuls %f1, %f8, %f1 ! FGM 146848b8605Smrg st %f1, [%i5 + 0x04] ! LSU Group 147848b8605Smrg fmuls %f2, %f8, %f2 ! FGM 148848b8605Smrg st %f2, [%i5 + 0x08] ! LSU Group 149848b8605Smrg st %f8, [%i5 + 0x0c] ! LSU Group 150848b8605Smrg3: add %i5, 0x10, %i5 ! IEU1 151848b8605Smrg add %l0, 1, %l0 ! IEU0 Group 152848b8605Smrg add %i2, 1, %i2 ! IEU0 Group 153848b8605Smrg cmp %l0, %l3 ! IEU1 Group 154848b8605Smrg bne 1b ! CTI 155848b8605Smrg add %i0, %l1, %i0 ! IEU0 Group 156848b8605Smrg stb %g2, [%i3] ! LSU 157848b8605Smrg srl %g2, 8, %g3 ! IEU0 Group 158848b8605Smrg cmp %l2, %l3 ! IEU1 Group 159848b8605Smrg bl,a 1f ! CTI 160848b8605Smrg clr %g3 ! IEU0 161848b8605Smrg1: stb %g3, [%i4] ! LSU Group 162848b8605Smrg ret ! CTI Group 163848b8605Smrg restore %i1, 0x0, %o0 164848b8605Smrg 165848b8605Smrg .globl _mesa_sparc_cliptest_points4_np 166848b8605Smrg_mesa_sparc_cliptest_points4_np: 167848b8605Smrg save %sp, -64, %sp 168848b8605Smrg 169848b8605Smrg call __pc_tramp 170848b8605Smrg sub %o7, (. - one_dot_zero - 4), %g1 171848b8605Smrg add %g1, 0x4, %g1 172848b8605Smrg 173848b8605Smrg ld [%i0 + V4F_STRIDE], %l1 174848b8605Smrg ld [%i0 + V4F_COUNT], %l3 175848b8605Smrg LDPTR [%i0 + V4F_START], %i0 176848b8605Smrg ldub [%i3], %g2 177848b8605Smrg ldub [%i4], %g3 178848b8605Smrg sll %g3, 8, %g3 179848b8605Smrg or %g2, %g3, %g2 180848b8605Smrg 181848b8605Smrg clr %l2 182848b8605Smrg clr %l0 183848b8605Smrg 184848b8605Smrg /* l0: i 185848b8605Smrg * l3: count 186848b8605Smrg * l1: stride 187848b8605Smrg * l2: c 188848b8605Smrg * g2: (tmpAndMask << 8) | tmpOrMask 189848b8605Smrg * g1: clip_table 190848b8605Smrg * i0: from[stride][i] 191848b8605Smrg * i2: clipMask 192848b8605Smrg */ 193848b8605Smrg 194848b8605Smrg1: ld [%i0 + 0x0c], %g5 ! LSU Group 195848b8605Smrg ld [%i0 + 0x08], %g4 ! LSU Group 196848b8605Smrg addcc %g5, %g5, %g5 ! IEU1 Group 197848b8605Smrg addx %g0, 0x0, %g3 ! IEU1 Group 198848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 199848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 200848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 201848b8605Smrg ld [%i0 + 0x04], %g4 ! LSU Group 202848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 203848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 204848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 205848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 206848b8605Smrg ld [%i0 + 0x00], %g4 ! LSU Group 207848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 208848b8605Smrg addcc %g4, %g4, %g4 ! IEU1 Group 209848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 210848b8605Smrg subcc %g5, %g4, %g0 ! IEU1 Group 211848b8605Smrg addx %g3, %g3, %g3 ! IEU1 Group 212848b8605Smrg ldub [%g1 + %g3], %g3 ! LSU Group 213848b8605Smrg cmp %g3, 0 ! IEU1 Group, stall 214848b8605Smrg be 2f ! CTI 215848b8605Smrg stb %g3, [%i2] ! LSU 216848b8605Smrg sll %g3, 8, %g4 ! IEU1 Group 217848b8605Smrg add %l2, 1, %l2 ! IEU0 218848b8605Smrg or %g4, 0xff, %g4 ! IEU0 Group 219848b8605Smrg or %g2, %g3, %g2 ! IEU1 220848b8605Smrg and %g2, %g4, %g2 ! IEU0 Group 221848b8605Smrg2: add %l0, 1, %l0 ! IEU0 Group 222848b8605Smrg add %i2, 1, %i2 ! IEU0 Group 223848b8605Smrg cmp %l0, %l3 ! IEU1 Group 224848b8605Smrg bne 1b ! CTI 225848b8605Smrg add %i0, %l1, %i0 ! IEU0 Group 226848b8605Smrg stb %g2, [%i3] ! LSU 227848b8605Smrg srl %g2, 8, %g3 ! IEU0 Group 228848b8605Smrg cmp %l2, %l3 ! IEU1 Group 229848b8605Smrg bl,a 1f ! CTI 230848b8605Smrg clr %g3 ! IEU0 231848b8605Smrg1: stb %g3, [%i4] ! LSU Group 232848b8605Smrg ret ! CTI Group 233848b8605Smrg restore %i1, 0x0, %o0 234