1848b8605Smrg 2848b8605Smrg#include "sparc_matrix.h" 3848b8605Smrg 4848b8605Smrg .register %g2, #scratch 5848b8605Smrg .register %g3, #scratch 6848b8605Smrg 7848b8605Smrg .text 8848b8605Smrg 9848b8605Smrg#ifdef __arch64__ 10848b8605Smrg#define STACK_VAR_OFF (2047 + (8 * 16)) 11848b8605Smrg#else 12848b8605Smrg#define STACK_VAR_OFF (4 * 16) 13848b8605Smrg#endif 14848b8605Smrg 15848b8605Smrg /* Newton-Raphson approximation turns out to be slower 16848b8605Smrg * (and less accurate) than direct fsqrts/fdivs. 17848b8605Smrg */ 18848b8605Smrg#define ONE_DOT_ZERO 0x3f800000 19848b8605Smrg 20848b8605Smrg .globl _mesa_sparc_transform_normalize_normals 21848b8605Smrg_mesa_sparc_transform_normalize_normals: 22848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 23848b8605Smrg 24848b8605Smrg sethi %hi(ONE_DOT_ZERO), %g2 25848b8605Smrg sub %sp, 16, %sp 26848b8605Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 27848b8605Smrg st %o1, [%sp + STACK_VAR_OFF+0x4] 28848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 29848b8605Smrg ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 30848b8605Smrg add %sp, 16, %sp 31848b8605Smrg 32848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 33848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 34848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 35848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 36848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 37848b8605Smrg 38848b8605Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 39848b8605Smrg 40848b8605Smrg /* dest->count = in->count */ 41848b8605Smrg st %g1, [%o4 + V4F_COUNT] 42848b8605Smrg 43848b8605Smrg cmp %g1, 1 44848b8605Smrg bl 7f 45848b8605Smrg cmp %o3, 0 46848b8605Smrg bne 4f 47848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 48848b8605Smrg 49848b8605Smrg1: /* LENGTHS == NULL */ 50848b8605Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 51848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 52848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 53848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 54848b8605Smrg add %o4, 1, %o4 ! i++ 55848b8605Smrg 56848b8605Smrg /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 57848b8605Smrg * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 58848b8605Smrg * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 59848b8605Smrg */ 60848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 61848b8605Smrg fmuls %f1, M1, %f4 ! FGM Group 62848b8605Smrg fmuls %f0, M4, %f5 ! FGM Group 63848b8605Smrg fmuls %f1, M5, %f6 ! FGM Group 64848b8605Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 65848b8605Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 66848b8605Smrg fadds %f3, %f4, %f3 ! FGA 67848b8605Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 68848b8605Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 69848b8605Smrg fadds %f5, %f6, %f5 ! FGA 70848b8605Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 71848b8605Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 72848b8605Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 73848b8605Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 74848b8605Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 75848b8605Smrg 76848b8605Smrg /* f3=tx, f5=ty, f7=tz */ 77848b8605Smrg 78848b8605Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 79848b8605Smrg fmuls %f3, %f3, %f6 ! FGM Group f3 available 80848b8605Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 81848b8605Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 82848b8605Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 83848b8605Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 84848b8605Smrg 85848b8605Smrg /* scale (f6) = 1.0 / sqrt(len) */ 86848b8605Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 87848b8605Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 88848b8605Smrg 89848b8605Smrg fmuls %f3, %f6, %f3 90848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 91848b8605Smrg fmuls %f5, %f6, %f5 92848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 93848b8605Smrg fmuls %f7, %f6, %f7 94848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 95848b8605Smrg 96848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 97848b8605Smrg bl 1b 98848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 99848b8605Smrg 100848b8605Smrg ba 7f 101848b8605Smrg nop 102848b8605Smrg 103848b8605Smrg4: /* LENGTHS != NULL */ 104848b8605Smrg fmuls M0, %f15, M0 105848b8605Smrg fmuls M1, %f15, M1 106848b8605Smrg fmuls M2, %f15, M2 107848b8605Smrg fmuls M4, %f15, M4 108848b8605Smrg fmuls M5, %f15, M5 109848b8605Smrg fmuls M6, %f15, M6 110848b8605Smrg fmuls M8, %f15, M8 111848b8605Smrg fmuls M9, %f15, M9 112848b8605Smrg fmuls M10, %f15, M10 113848b8605Smrg 114848b8605Smrg5: 115848b8605Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 116848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 117848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 118848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 119848b8605Smrg add %o4, 1, %o4 ! i++ 120848b8605Smrg 121848b8605Smrg /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 122848b8605Smrg * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 123848b8605Smrg * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 124848b8605Smrg */ 125848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 126848b8605Smrg fmuls %f1, M1, %f4 ! FGM Group 127848b8605Smrg fmuls %f0, M4, %f5 ! FGM Group 128848b8605Smrg fmuls %f1, M5, %f6 ! FGM Group 129848b8605Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 130848b8605Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 131848b8605Smrg fadds %f3, %f4, %f3 ! FGA 132848b8605Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 133848b8605Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 134848b8605Smrg fadds %f5, %f6, %f5 ! FGA 135848b8605Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 136848b8605Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 137848b8605Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 138848b8605Smrg ld [%o3], %f13 ! LSU 139848b8605Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 140848b8605Smrg add %o3, 4, %o3 ! IEU0 141848b8605Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 142848b8605Smrg 143848b8605Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 144848b8605Smrg 145848b8605Smrg fmuls %f3, %f13, %f3 146848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 147848b8605Smrg fmuls %f5, %f13, %f5 148848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 149848b8605Smrg fmuls %f7, %f13, %f7 150848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 151848b8605Smrg 152848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 153848b8605Smrg bl 5b 154848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 155848b8605Smrg 156848b8605Smrg7: retl 157848b8605Smrg nop 158848b8605Smrg 159848b8605Smrg .globl _mesa_sparc_transform_normalize_normals_no_rot 160848b8605Smrg_mesa_sparc_transform_normalize_normals_no_rot: 161848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 162848b8605Smrg 163848b8605Smrg sethi %hi(ONE_DOT_ZERO), %g2 164848b8605Smrg sub %sp, 16, %sp 165848b8605Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 166848b8605Smrg st %o1, [%sp + STACK_VAR_OFF+0x4] 167848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 168848b8605Smrg ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 169848b8605Smrg add %sp, 16, %sp 170848b8605Smrg 171848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 172848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 173848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 174848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 175848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 176848b8605Smrg 177848b8605Smrg LDMATRIX_0_5_10(%o0) 178848b8605Smrg 179848b8605Smrg /* dest->count = in->count */ 180848b8605Smrg st %g1, [%o4 + V4F_COUNT] 181848b8605Smrg 182848b8605Smrg cmp %g1, 1 183848b8605Smrg bl 7f 184848b8605Smrg cmp %o3, 0 185848b8605Smrg bne 4f 186848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 187848b8605Smrg 188848b8605Smrg1: /* LENGTHS == NULL */ 189848b8605Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 190848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 191848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 192848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 193848b8605Smrg add %o4, 1, %o4 ! i++ 194848b8605Smrg 195848b8605Smrg /* tx (f3) = (ux * m0) 196848b8605Smrg * ty (f5) = (uy * m5) 197848b8605Smrg * tz (f7) = (uz * m10) 198848b8605Smrg */ 199848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 200848b8605Smrg fmuls %f1, M5, %f5 ! FGM Group 201848b8605Smrg fmuls %f2, M10, %f7 ! FGM Group 202848b8605Smrg 203848b8605Smrg /* f3=tx, f5=ty, f7=tz */ 204848b8605Smrg 205848b8605Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 206848b8605Smrg fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available 207848b8605Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 208848b8605Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 209848b8605Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 210848b8605Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 211848b8605Smrg 212848b8605Smrg /* scale (f6) = 1.0 / sqrt(len) */ 213848b8605Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 214848b8605Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 215848b8605Smrg 216848b8605Smrg fmuls %f3, %f6, %f3 217848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 218848b8605Smrg fmuls %f5, %f6, %f5 219848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 220848b8605Smrg fmuls %f7, %f6, %f7 221848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 222848b8605Smrg 223848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 224848b8605Smrg bl 1b 225848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 226848b8605Smrg 227848b8605Smrg ba 7f 228848b8605Smrg nop 229848b8605Smrg 230848b8605Smrg4: /* LENGTHS != NULL */ 231848b8605Smrg fmuls M0, %f15, M0 232848b8605Smrg fmuls M5, %f15, M5 233848b8605Smrg fmuls M10, %f15, M10 234848b8605Smrg 235848b8605Smrg5: 236848b8605Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 237848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 238848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 239848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 240848b8605Smrg add %o4, 1, %o4 ! i++ 241848b8605Smrg 242848b8605Smrg /* tx (f3) = (ux * m0) 243848b8605Smrg * ty (f5) = (uy * m5) 244848b8605Smrg * tz (f7) = (uz * m10) 245848b8605Smrg */ 246848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 247848b8605Smrg ld [%o3], %f13 ! LSU 248848b8605Smrg fmuls %f1, M5, %f5 ! FGM Group 249848b8605Smrg add %o3, 4, %o3 ! IEU0 250848b8605Smrg fmuls %f2, M10, %f7 ! FGM Group 251848b8605Smrg 252848b8605Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 253848b8605Smrg 254848b8605Smrg fmuls %f3, %f13, %f3 255848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 256848b8605Smrg fmuls %f5, %f13, %f5 257848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 258848b8605Smrg fmuls %f7, %f13, %f7 259848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 260848b8605Smrg 261848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 262848b8605Smrg bl 5b 263848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 264848b8605Smrg 265848b8605Smrg7: retl 266848b8605Smrg nop 267848b8605Smrg 268848b8605Smrg .globl _mesa_sparc_transform_rescale_normals_no_rot 269848b8605Smrg_mesa_sparc_transform_rescale_normals_no_rot: 270848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 271848b8605Smrg sub %sp, 16, %sp 272848b8605Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 273848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 274848b8605Smrg add %sp, 16, %sp 275848b8605Smrg 276848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 277848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 278848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 279848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 280848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 281848b8605Smrg 282848b8605Smrg LDMATRIX_0_5_10(%o0) 283848b8605Smrg 284848b8605Smrg /* dest->count = in->count */ 285848b8605Smrg st %g1, [%o4 + V4F_COUNT] 286848b8605Smrg 287848b8605Smrg cmp %g1, 1 288848b8605Smrg bl 7f 289848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 290848b8605Smrg 291848b8605Smrg fmuls M0, %f15, M0 292848b8605Smrg fmuls M5, %f15, M5 293848b8605Smrg fmuls M10, %f15, M10 294848b8605Smrg 295848b8605Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 296848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 297848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 298848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 299848b8605Smrg add %o4, 1, %o4 ! i++ 300848b8605Smrg 301848b8605Smrg /* tx (f3) = (ux * m0) 302848b8605Smrg * ty (f5) = (uy * m5) 303848b8605Smrg * tz (f7) = (uz * m10) 304848b8605Smrg */ 305848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 306848b8605Smrg st %f3, [%g3 + 0x00] ! LSU 307848b8605Smrg fmuls %f1, M5, %f5 ! FGM Group 308848b8605Smrg st %f5, [%g3 + 0x04] ! LSU 309848b8605Smrg fmuls %f2, M10, %f7 ! FGM Group 310848b8605Smrg st %f7, [%g3 + 0x08] ! LSU 311848b8605Smrg 312848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 313848b8605Smrg bl 1b 314848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 315848b8605Smrg 316848b8605Smrg7: retl 317848b8605Smrg nop 318848b8605Smrg 319848b8605Smrg .globl _mesa_sparc_transform_rescale_normals 320848b8605Smrg_mesa_sparc_transform_rescale_normals: 321848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 322848b8605Smrg sub %sp, 16, %sp 323848b8605Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 324848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 325848b8605Smrg add %sp, 16, %sp 326848b8605Smrg 327848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 328848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 329848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 330848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 331848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 332848b8605Smrg 333848b8605Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 334848b8605Smrg 335848b8605Smrg /* dest->count = in->count */ 336848b8605Smrg st %g1, [%o4 + V4F_COUNT] 337848b8605Smrg 338848b8605Smrg cmp %g1, 1 339848b8605Smrg bl 7f 340848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 341848b8605Smrg 342848b8605Smrg fmuls M0, %f15, M0 343848b8605Smrg fmuls M1, %f15, M1 344848b8605Smrg fmuls M2, %f15, M2 345848b8605Smrg fmuls M4, %f15, M4 346848b8605Smrg fmuls M5, %f15, M5 347848b8605Smrg fmuls M6, %f15, M6 348848b8605Smrg fmuls M8, %f15, M8 349848b8605Smrg fmuls M9, %f15, M9 350848b8605Smrg fmuls M10, %f15, M10 351848b8605Smrg 352848b8605Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 353848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 354848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 355848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 356848b8605Smrg add %o4, 1, %o4 ! i++ 357848b8605Smrg 358848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 359848b8605Smrg fmuls %f1, M1, %f4 ! FGM Group 360848b8605Smrg fmuls %f0, M4, %f5 ! FGM Group 361848b8605Smrg fmuls %f1, M5, %f6 ! FGM Group 362848b8605Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 363848b8605Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 364848b8605Smrg fadds %f3, %f4, %f3 ! FGA 365848b8605Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 366848b8605Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 367848b8605Smrg fadds %f5, %f6, %f5 ! FGA 368848b8605Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 369848b8605Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 370848b8605Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 371848b8605Smrg st %f3, [%g3 + 0x00] ! LSU 372848b8605Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 373848b8605Smrg st %f5, [%g3 + 0x04] ! LSU 374848b8605Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 375848b8605Smrg st %f7, [%g3 + 0x08] ! LSU 376848b8605Smrg 377848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 378848b8605Smrg bl 1b 379848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 380848b8605Smrg 381848b8605Smrg7: retl 382848b8605Smrg nop 383848b8605Smrg 384848b8605Smrg .globl _mesa_sparc_transform_normals_no_rot 385848b8605Smrg_mesa_sparc_transform_normals_no_rot: 386848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 387848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 388848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 389848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 390848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 391848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 392848b8605Smrg 393848b8605Smrg LDMATRIX_0_5_10(%o0) 394848b8605Smrg 395848b8605Smrg /* dest->count = in->count */ 396848b8605Smrg st %g1, [%o4 + V4F_COUNT] 397848b8605Smrg 398848b8605Smrg cmp %g1, 1 399848b8605Smrg bl 7f 400848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 401848b8605Smrg 402848b8605Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 403848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 404848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 405848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 406848b8605Smrg add %o4, 1, %o4 ! i++ 407848b8605Smrg 408848b8605Smrg /* tx (f3) = (ux * m0) 409848b8605Smrg * ty (f5) = (uy * m5) 410848b8605Smrg * tz (f7) = (uz * m10) 411848b8605Smrg */ 412848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 413848b8605Smrg st %f3, [%g3 + 0x00] ! LSU 414848b8605Smrg fmuls %f1, M5, %f5 ! FGM Group 415848b8605Smrg st %f5, [%g3 + 0x04] ! LSU 416848b8605Smrg fmuls %f2, M10, %f7 ! FGM Group 417848b8605Smrg st %f7, [%g3 + 0x08] ! LSU 418848b8605Smrg 419848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 420848b8605Smrg bl 1b 421848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 422848b8605Smrg 423848b8605Smrg7: retl 424848b8605Smrg nop 425848b8605Smrg 426848b8605Smrg .globl _mesa_sparc_transform_normals 427848b8605Smrg_mesa_sparc_transform_normals: 428848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 429848b8605Smrg LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 430848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 431848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 432848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 433848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 434848b8605Smrg 435848b8605Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 436848b8605Smrg 437848b8605Smrg /* dest->count = in->count */ 438848b8605Smrg st %g1, [%o4 + V4F_COUNT] 439848b8605Smrg 440848b8605Smrg cmp %g1, 1 441848b8605Smrg bl 7f 442848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 443848b8605Smrg 444848b8605Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 445848b8605Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 446848b8605Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 447848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 448848b8605Smrg add %o4, 1, %o4 ! i++ 449848b8605Smrg 450848b8605Smrg fmuls %f0, M0, %f3 ! FGM Group 451848b8605Smrg fmuls %f1, M1, %f4 ! FGM Group 452848b8605Smrg fmuls %f0, M4, %f5 ! FGM Group 453848b8605Smrg fmuls %f1, M5, %f6 ! FGM Group 454848b8605Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 455848b8605Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 456848b8605Smrg fadds %f3, %f4, %f3 ! FGA 457848b8605Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 458848b8605Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 459848b8605Smrg fadds %f5, %f6, %f5 ! FGA 460848b8605Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 461848b8605Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 462848b8605Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 463848b8605Smrg st %f3, [%g3 + 0x00] ! LSU 464848b8605Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 465848b8605Smrg st %f5, [%g3 + 0x04] ! LSU 466848b8605Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 467848b8605Smrg st %f7, [%g3 + 0x08] ! LSU 468848b8605Smrg 469848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 470848b8605Smrg bl 1b 471848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 472848b8605Smrg 473848b8605Smrg7: retl 474848b8605Smrg nop 475848b8605Smrg 476848b8605Smrg .globl _mesa_sparc_normalize_normals 477848b8605Smrg_mesa_sparc_normalize_normals: 478848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 479848b8605Smrg 480848b8605Smrg sethi %hi(ONE_DOT_ZERO), %g2 481848b8605Smrg sub %sp, 16, %sp 482848b8605Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 483848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 484848b8605Smrg add %sp, 16, %sp 485848b8605Smrg 486848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 487848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 488848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 489848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 490848b8605Smrg 491848b8605Smrg /* dest->count = in->count */ 492848b8605Smrg st %g1, [%o4 + V4F_COUNT] 493848b8605Smrg 494848b8605Smrg cmp %g1, 1 495848b8605Smrg bl 7f 496848b8605Smrg cmp %o3, 0 497848b8605Smrg bne 4f 498848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 499848b8605Smrg 500848b8605Smrg1: /* LENGTHS == NULL */ 501848b8605Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 502848b8605Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 503848b8605Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 504848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 505848b8605Smrg add %o4, 1, %o4 ! i++ 506848b8605Smrg 507848b8605Smrg /* f3=tx, f5=ty, f7=tz */ 508848b8605Smrg 509848b8605Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 510848b8605Smrg fmuls %f3, %f3, %f6 ! FGM Group f3 available 511848b8605Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 512848b8605Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 513848b8605Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 514848b8605Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 515848b8605Smrg 516848b8605Smrg /* scale (f6) = 1.0 / sqrt(len) */ 517848b8605Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 518848b8605Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 519848b8605Smrg 520848b8605Smrg fmuls %f3, %f6, %f3 521848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 522848b8605Smrg fmuls %f5, %f6, %f5 523848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 524848b8605Smrg fmuls %f7, %f6, %f7 525848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 526848b8605Smrg 527848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 528848b8605Smrg bl 1b 529848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 530848b8605Smrg 531848b8605Smrg ba 7f 532848b8605Smrg nop 533848b8605Smrg 534848b8605Smrg4: /* LENGTHS != NULL */ 535848b8605Smrg 536848b8605Smrg5: 537848b8605Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 538848b8605Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 539848b8605Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 540848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 541848b8605Smrg add %o4, 1, %o4 ! i++ 542848b8605Smrg 543848b8605Smrg ld [%o3], %f13 ! LSU 544848b8605Smrg add %o3, 4, %o3 ! IEU0 545848b8605Smrg 546848b8605Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 547848b8605Smrg 548848b8605Smrg fmuls %f3, %f13, %f3 549848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 550848b8605Smrg fmuls %f5, %f13, %f5 551848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 552848b8605Smrg fmuls %f7, %f13, %f7 553848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 554848b8605Smrg 555848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 556848b8605Smrg bl 5b 557848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 558848b8605Smrg 559848b8605Smrg7: retl 560848b8605Smrg nop 561848b8605Smrg 562848b8605Smrg .globl _mesa_sparc_rescale_normals 563848b8605Smrg_mesa_sparc_rescale_normals: 564848b8605Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 565848b8605Smrg 566848b8605Smrg sethi %hi(ONE_DOT_ZERO), %g2 567848b8605Smrg sub %sp, 16, %sp 568848b8605Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 569848b8605Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 570848b8605Smrg add %sp, 16, %sp 571848b8605Smrg 572848b8605Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 573848b8605Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 574848b8605Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 575848b8605Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 576848b8605Smrg 577848b8605Smrg /* dest->count = in->count */ 578848b8605Smrg st %g1, [%o4 + V4F_COUNT] 579848b8605Smrg 580848b8605Smrg cmp %g1, 1 581848b8605Smrg bl 7f 582848b8605Smrg clr %o4 ! 'i' for STRIDE_LOOP 583848b8605Smrg 584848b8605Smrg1: 585848b8605Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 586848b8605Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 587848b8605Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 588848b8605Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 589848b8605Smrg add %o4, 1, %o4 ! i++ 590848b8605Smrg 591848b8605Smrg /* f3=tx, f5=ty, f7=tz */ 592848b8605Smrg 593848b8605Smrg fmuls %f3, %f15, %f3 594848b8605Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 595848b8605Smrg fmuls %f5, %f15, %f5 596848b8605Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 597848b8605Smrg fmuls %f7, %f15, %f7 598848b8605Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 599848b8605Smrg 600848b8605Smrg cmp %o4, %g1 ! continue if (i < count) 601848b8605Smrg bl 1b 602848b8605Smrg add %g3, 0x10, %g3 ! advance out vector pointer 603848b8605Smrg 604848b8605Smrg7: retl 605848b8605Smrg nop 606