17117f1b4Smrg 27117f1b4Smrg#include "sparc_matrix.h" 37117f1b4Smrg 47117f1b4Smrg .register %g2, #scratch 57117f1b4Smrg .register %g3, #scratch 67117f1b4Smrg 77117f1b4Smrg .text 87117f1b4Smrg 97117f1b4Smrg#ifdef __arch64__ 107117f1b4Smrg#define STACK_VAR_OFF (2047 + (8 * 16)) 117117f1b4Smrg#else 127117f1b4Smrg#define STACK_VAR_OFF (4 * 16) 137117f1b4Smrg#endif 147117f1b4Smrg 157117f1b4Smrg /* Newton-Raphson approximation turns out to be slower 167117f1b4Smrg * (and less accurate) than direct fsqrts/fdivs. 177117f1b4Smrg */ 187117f1b4Smrg#define ONE_DOT_ZERO 0x3f800000 197117f1b4Smrg 207117f1b4Smrg .globl _mesa_sparc_transform_normalize_normals 217117f1b4Smrg_mesa_sparc_transform_normalize_normals: 227117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 237117f1b4Smrg 247117f1b4Smrg sethi %hi(ONE_DOT_ZERO), %g2 257117f1b4Smrg sub %sp, 16, %sp 267117f1b4Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 277117f1b4Smrg st %o1, [%sp + STACK_VAR_OFF+0x4] 287117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 297117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 307117f1b4Smrg add %sp, 16, %sp 317117f1b4Smrg 327ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 337117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 347117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 357117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 367117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 377117f1b4Smrg 387117f1b4Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 397117f1b4Smrg 407117f1b4Smrg /* dest->count = in->count */ 417117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 427117f1b4Smrg 437117f1b4Smrg cmp %g1, 1 447117f1b4Smrg bl 7f 457117f1b4Smrg cmp %o3, 0 467117f1b4Smrg bne 4f 477117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 487117f1b4Smrg 497117f1b4Smrg1: /* LENGTHS == NULL */ 507117f1b4Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 517117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 527117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 537117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 547117f1b4Smrg add %o4, 1, %o4 ! i++ 557117f1b4Smrg 567117f1b4Smrg /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 577117f1b4Smrg * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 587117f1b4Smrg * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 597117f1b4Smrg */ 607117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 617117f1b4Smrg fmuls %f1, M1, %f4 ! FGM Group 627117f1b4Smrg fmuls %f0, M4, %f5 ! FGM Group 637117f1b4Smrg fmuls %f1, M5, %f6 ! FGM Group 647117f1b4Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 657117f1b4Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 667117f1b4Smrg fadds %f3, %f4, %f3 ! FGA 677117f1b4Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 687117f1b4Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 697117f1b4Smrg fadds %f5, %f6, %f5 ! FGA 707117f1b4Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 717117f1b4Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 727117f1b4Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 737117f1b4Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 747117f1b4Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 757117f1b4Smrg 767117f1b4Smrg /* f3=tx, f5=ty, f7=tz */ 777117f1b4Smrg 787117f1b4Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 797117f1b4Smrg fmuls %f3, %f3, %f6 ! FGM Group f3 available 807117f1b4Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 817117f1b4Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 827117f1b4Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 837117f1b4Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 847117f1b4Smrg 857117f1b4Smrg /* scale (f6) = 1.0 / sqrt(len) */ 867117f1b4Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 877117f1b4Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 887117f1b4Smrg 897117f1b4Smrg fmuls %f3, %f6, %f3 907117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 917117f1b4Smrg fmuls %f5, %f6, %f5 927117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 937117f1b4Smrg fmuls %f7, %f6, %f7 947117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 957117f1b4Smrg 967117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 977117f1b4Smrg bl 1b 984a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 997117f1b4Smrg 1007117f1b4Smrg ba 7f 1017117f1b4Smrg nop 1027117f1b4Smrg 1037117f1b4Smrg4: /* LENGTHS != NULL */ 1047117f1b4Smrg fmuls M0, %f15, M0 1057117f1b4Smrg fmuls M1, %f15, M1 1067117f1b4Smrg fmuls M2, %f15, M2 1077117f1b4Smrg fmuls M4, %f15, M4 1087117f1b4Smrg fmuls M5, %f15, M5 1097117f1b4Smrg fmuls M6, %f15, M6 1107117f1b4Smrg fmuls M8, %f15, M8 1117117f1b4Smrg fmuls M9, %f15, M9 1127117f1b4Smrg fmuls M10, %f15, M10 1137117f1b4Smrg 1147117f1b4Smrg5: 1157117f1b4Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 1167117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 1177117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 1187117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 1197117f1b4Smrg add %o4, 1, %o4 ! i++ 1207117f1b4Smrg 1217117f1b4Smrg /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 1227117f1b4Smrg * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 1237117f1b4Smrg * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 1247117f1b4Smrg */ 1257117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 1267117f1b4Smrg fmuls %f1, M1, %f4 ! FGM Group 1277117f1b4Smrg fmuls %f0, M4, %f5 ! FGM Group 1287117f1b4Smrg fmuls %f1, M5, %f6 ! FGM Group 1297117f1b4Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 1307117f1b4Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 1317117f1b4Smrg fadds %f3, %f4, %f3 ! FGA 1327117f1b4Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 1337117f1b4Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 1347117f1b4Smrg fadds %f5, %f6, %f5 ! FGA 1357117f1b4Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 1367117f1b4Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 1377117f1b4Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 1387117f1b4Smrg ld [%o3], %f13 ! LSU 1397117f1b4Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 1407117f1b4Smrg add %o3, 4, %o3 ! IEU0 1417117f1b4Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 1427117f1b4Smrg 1437117f1b4Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 1447117f1b4Smrg 1457117f1b4Smrg fmuls %f3, %f13, %f3 1467117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 1477117f1b4Smrg fmuls %f5, %f13, %f5 1487117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 1497117f1b4Smrg fmuls %f7, %f13, %f7 1507117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 1517117f1b4Smrg 1527117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 1537117f1b4Smrg bl 5b 1544a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 1557117f1b4Smrg 1567117f1b4Smrg7: retl 1577117f1b4Smrg nop 1587117f1b4Smrg 1597117f1b4Smrg .globl _mesa_sparc_transform_normalize_normals_no_rot 1607117f1b4Smrg_mesa_sparc_transform_normalize_normals_no_rot: 1617117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 1627117f1b4Smrg 1637117f1b4Smrg sethi %hi(ONE_DOT_ZERO), %g2 1647117f1b4Smrg sub %sp, 16, %sp 1657117f1b4Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 1667117f1b4Smrg st %o1, [%sp + STACK_VAR_OFF+0x4] 1677117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 1687117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 1697117f1b4Smrg add %sp, 16, %sp 1707117f1b4Smrg 1717ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 1727117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 1737117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 1747117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 1757117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 1767117f1b4Smrg 1777117f1b4Smrg LDMATRIX_0_5_10(%o0) 1787117f1b4Smrg 1797117f1b4Smrg /* dest->count = in->count */ 1807117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 1817117f1b4Smrg 1827117f1b4Smrg cmp %g1, 1 1837117f1b4Smrg bl 7f 1847117f1b4Smrg cmp %o3, 0 1857117f1b4Smrg bne 4f 1867117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 1877117f1b4Smrg 1887117f1b4Smrg1: /* LENGTHS == NULL */ 1897117f1b4Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 1907117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 1917117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 1927117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 1937117f1b4Smrg add %o4, 1, %o4 ! i++ 1947117f1b4Smrg 1957117f1b4Smrg /* tx (f3) = (ux * m0) 1967117f1b4Smrg * ty (f5) = (uy * m5) 1977117f1b4Smrg * tz (f7) = (uz * m10) 1987117f1b4Smrg */ 1997117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 2007117f1b4Smrg fmuls %f1, M5, %f5 ! FGM Group 2017117f1b4Smrg fmuls %f2, M10, %f7 ! FGM Group 2027117f1b4Smrg 2037117f1b4Smrg /* f3=tx, f5=ty, f7=tz */ 2047117f1b4Smrg 2057117f1b4Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 2067117f1b4Smrg fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available 2077117f1b4Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 2087117f1b4Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 2097117f1b4Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 2107117f1b4Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 2117117f1b4Smrg 2127117f1b4Smrg /* scale (f6) = 1.0 / sqrt(len) */ 2137117f1b4Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 2147117f1b4Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 2157117f1b4Smrg 2167117f1b4Smrg fmuls %f3, %f6, %f3 2177117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 2187117f1b4Smrg fmuls %f5, %f6, %f5 2197117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 2207117f1b4Smrg fmuls %f7, %f6, %f7 2217117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 2227117f1b4Smrg 2237117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 2247117f1b4Smrg bl 1b 2254a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 2267117f1b4Smrg 2277117f1b4Smrg ba 7f 2287117f1b4Smrg nop 2297117f1b4Smrg 2307117f1b4Smrg4: /* LENGTHS != NULL */ 2317117f1b4Smrg fmuls M0, %f15, M0 2327117f1b4Smrg fmuls M5, %f15, M5 2337117f1b4Smrg fmuls M10, %f15, M10 2347117f1b4Smrg 2357117f1b4Smrg5: 2367117f1b4Smrg ld [%o5 + 0x00], %f0 ! ux = from[0] 2377117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 2387117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 2397117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 2407117f1b4Smrg add %o4, 1, %o4 ! i++ 2417117f1b4Smrg 2427117f1b4Smrg /* tx (f3) = (ux * m0) 2437117f1b4Smrg * ty (f5) = (uy * m5) 2447117f1b4Smrg * tz (f7) = (uz * m10) 2457117f1b4Smrg */ 2467117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 2477117f1b4Smrg ld [%o3], %f13 ! LSU 2487117f1b4Smrg fmuls %f1, M5, %f5 ! FGM Group 2497117f1b4Smrg add %o3, 4, %o3 ! IEU0 2507117f1b4Smrg fmuls %f2, M10, %f7 ! FGM Group 2517117f1b4Smrg 2527117f1b4Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 2537117f1b4Smrg 2547117f1b4Smrg fmuls %f3, %f13, %f3 2557117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 2567117f1b4Smrg fmuls %f5, %f13, %f5 2577117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 2587117f1b4Smrg fmuls %f7, %f13, %f7 2597117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 2607117f1b4Smrg 2617117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 2627117f1b4Smrg bl 5b 2634a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 2647117f1b4Smrg 2657117f1b4Smrg7: retl 2667117f1b4Smrg nop 2677117f1b4Smrg 2687117f1b4Smrg .globl _mesa_sparc_transform_rescale_normals_no_rot 2697117f1b4Smrg_mesa_sparc_transform_rescale_normals_no_rot: 2707117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 2717117f1b4Smrg sub %sp, 16, %sp 2727117f1b4Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 2737117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 2747117f1b4Smrg add %sp, 16, %sp 2757117f1b4Smrg 2767ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 2777117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 2787117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 2797117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 2807117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 2817117f1b4Smrg 2827117f1b4Smrg LDMATRIX_0_5_10(%o0) 2837117f1b4Smrg 2847117f1b4Smrg /* dest->count = in->count */ 2857117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 2867117f1b4Smrg 2877117f1b4Smrg cmp %g1, 1 2887117f1b4Smrg bl 7f 2897117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 2907117f1b4Smrg 2917117f1b4Smrg fmuls M0, %f15, M0 2927117f1b4Smrg fmuls M5, %f15, M5 2937117f1b4Smrg fmuls M10, %f15, M10 2947117f1b4Smrg 2957117f1b4Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 2967117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 2977117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 2987117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 2997117f1b4Smrg add %o4, 1, %o4 ! i++ 3007117f1b4Smrg 3017117f1b4Smrg /* tx (f3) = (ux * m0) 3027117f1b4Smrg * ty (f5) = (uy * m5) 3037117f1b4Smrg * tz (f7) = (uz * m10) 3047117f1b4Smrg */ 3057117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 3067117f1b4Smrg st %f3, [%g3 + 0x00] ! LSU 3077117f1b4Smrg fmuls %f1, M5, %f5 ! FGM Group 3087117f1b4Smrg st %f5, [%g3 + 0x04] ! LSU 3097117f1b4Smrg fmuls %f2, M10, %f7 ! FGM Group 3107117f1b4Smrg st %f7, [%g3 + 0x08] ! LSU 3117117f1b4Smrg 3127117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 3137117f1b4Smrg bl 1b 3144a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 3157117f1b4Smrg 3167117f1b4Smrg7: retl 3177117f1b4Smrg nop 3187117f1b4Smrg 3197117f1b4Smrg .globl _mesa_sparc_transform_rescale_normals 3207117f1b4Smrg_mesa_sparc_transform_rescale_normals: 3217117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 3227117f1b4Smrg sub %sp, 16, %sp 3237117f1b4Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 3247117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 3257117f1b4Smrg add %sp, 16, %sp 3267117f1b4Smrg 3277ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 3287117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 3297117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 3307117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 3317117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 3327117f1b4Smrg 3337117f1b4Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 3347117f1b4Smrg 3357117f1b4Smrg /* dest->count = in->count */ 3367117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 3377117f1b4Smrg 3387117f1b4Smrg cmp %g1, 1 3397117f1b4Smrg bl 7f 3407117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 3417117f1b4Smrg 3427117f1b4Smrg fmuls M0, %f15, M0 3437117f1b4Smrg fmuls M1, %f15, M1 3447117f1b4Smrg fmuls M2, %f15, M2 3457117f1b4Smrg fmuls M4, %f15, M4 3467117f1b4Smrg fmuls M5, %f15, M5 3477117f1b4Smrg fmuls M6, %f15, M6 3487117f1b4Smrg fmuls M8, %f15, M8 3497117f1b4Smrg fmuls M9, %f15, M9 3507117f1b4Smrg fmuls M10, %f15, M10 3517117f1b4Smrg 3527117f1b4Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 3537117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 3547117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 3557117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 3567117f1b4Smrg add %o4, 1, %o4 ! i++ 3577117f1b4Smrg 3587117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 3597117f1b4Smrg fmuls %f1, M1, %f4 ! FGM Group 3607117f1b4Smrg fmuls %f0, M4, %f5 ! FGM Group 3617117f1b4Smrg fmuls %f1, M5, %f6 ! FGM Group 3627117f1b4Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 3637117f1b4Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 3647117f1b4Smrg fadds %f3, %f4, %f3 ! FGA 3657117f1b4Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 3667117f1b4Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 3677117f1b4Smrg fadds %f5, %f6, %f5 ! FGA 3687117f1b4Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 3697117f1b4Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 3707117f1b4Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 3717117f1b4Smrg st %f3, [%g3 + 0x00] ! LSU 3727117f1b4Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 3737117f1b4Smrg st %f5, [%g3 + 0x04] ! LSU 3747117f1b4Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 3757117f1b4Smrg st %f7, [%g3 + 0x08] ! LSU 3767117f1b4Smrg 3777117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 3787117f1b4Smrg bl 1b 3794a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 3807117f1b4Smrg 3817117f1b4Smrg7: retl 3827117f1b4Smrg nop 3837117f1b4Smrg 3847117f1b4Smrg .globl _mesa_sparc_transform_normals_no_rot 3857117f1b4Smrg_mesa_sparc_transform_normals_no_rot: 3867117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 3877ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 3887117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 3897117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 3907117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 3917117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 3927117f1b4Smrg 3937117f1b4Smrg LDMATRIX_0_5_10(%o0) 3947117f1b4Smrg 3957117f1b4Smrg /* dest->count = in->count */ 3967117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 3977117f1b4Smrg 3987117f1b4Smrg cmp %g1, 1 3997117f1b4Smrg bl 7f 4007117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 4017117f1b4Smrg 4027117f1b4Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 4037117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 4047117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 4057117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 4067117f1b4Smrg add %o4, 1, %o4 ! i++ 4077117f1b4Smrg 4087117f1b4Smrg /* tx (f3) = (ux * m0) 4097117f1b4Smrg * ty (f5) = (uy * m5) 4107117f1b4Smrg * tz (f7) = (uz * m10) 4117117f1b4Smrg */ 4127117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 4137117f1b4Smrg st %f3, [%g3 + 0x00] ! LSU 4147117f1b4Smrg fmuls %f1, M5, %f5 ! FGM Group 4157117f1b4Smrg st %f5, [%g3 + 0x04] ! LSU 4167117f1b4Smrg fmuls %f2, M10, %f7 ! FGM Group 4177117f1b4Smrg st %f7, [%g3 + 0x08] ! LSU 4187117f1b4Smrg 4197117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 4207117f1b4Smrg bl 1b 4214a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 4227117f1b4Smrg 4237117f1b4Smrg7: retl 4247117f1b4Smrg nop 4257117f1b4Smrg 4267117f1b4Smrg .globl _mesa_sparc_transform_normals 4277117f1b4Smrg_mesa_sparc_transform_normals: 4287117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 4297ec681f3Smrg add %o0, MATRIX_INV, %o0 ! o0 = mat->inv 4307117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 4317117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 4327117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 4337117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 4347117f1b4Smrg 4357117f1b4Smrg LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 4367117f1b4Smrg 4377117f1b4Smrg /* dest->count = in->count */ 4387117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 4397117f1b4Smrg 4407117f1b4Smrg cmp %g1, 1 4417117f1b4Smrg bl 7f 4427117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 4437117f1b4Smrg 4447117f1b4Smrg1: ld [%o5 + 0x00], %f0 ! ux = from[0] 4457117f1b4Smrg ld [%o5 + 0x04], %f1 ! uy = from[1] 4467117f1b4Smrg ld [%o5 + 0x08], %f2 ! uz = from[2] 4477117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 4487117f1b4Smrg add %o4, 1, %o4 ! i++ 4497117f1b4Smrg 4507117f1b4Smrg fmuls %f0, M0, %f3 ! FGM Group 4517117f1b4Smrg fmuls %f1, M1, %f4 ! FGM Group 4527117f1b4Smrg fmuls %f0, M4, %f5 ! FGM Group 4537117f1b4Smrg fmuls %f1, M5, %f6 ! FGM Group 4547117f1b4Smrg fmuls %f0, M8, %f7 ! FGM Group f3 available 4557117f1b4Smrg fmuls %f1, M9, %f8 ! FGM Group f4 available 4567117f1b4Smrg fadds %f3, %f4, %f3 ! FGA 4577117f1b4Smrg fmuls %f2, M2, %f10 ! FGM Group f5 available 4587117f1b4Smrg fmuls %f2, M6, %f0 ! FGM Group f6 available 4597117f1b4Smrg fadds %f5, %f6, %f5 ! FGA 4607117f1b4Smrg fmuls %f2, M10, %f4 ! FGM Group f7 available 4617117f1b4Smrg fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 4627117f1b4Smrg fadds %f3, %f10, %f3 ! FGA Group f10 available 4637117f1b4Smrg st %f3, [%g3 + 0x00] ! LSU 4647117f1b4Smrg fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 4657117f1b4Smrg st %f5, [%g3 + 0x04] ! LSU 4667117f1b4Smrg fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 4677117f1b4Smrg st %f7, [%g3 + 0x08] ! LSU 4687117f1b4Smrg 4697117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 4707117f1b4Smrg bl 1b 4714a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 4727117f1b4Smrg 4737117f1b4Smrg7: retl 4747117f1b4Smrg nop 4757117f1b4Smrg 4767117f1b4Smrg .globl _mesa_sparc_normalize_normals 4777117f1b4Smrg_mesa_sparc_normalize_normals: 4787117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 4797117f1b4Smrg 4807117f1b4Smrg sethi %hi(ONE_DOT_ZERO), %g2 4817117f1b4Smrg sub %sp, 16, %sp 4827117f1b4Smrg st %g2, [%sp + STACK_VAR_OFF+0x0] 4837117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 4847117f1b4Smrg add %sp, 16, %sp 4857117f1b4Smrg 4867117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 4877117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 4887117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 4897117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 4907117f1b4Smrg 4917117f1b4Smrg /* dest->count = in->count */ 4927117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 4937117f1b4Smrg 4947117f1b4Smrg cmp %g1, 1 4957117f1b4Smrg bl 7f 4967117f1b4Smrg cmp %o3, 0 4977117f1b4Smrg bne 4f 4987117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 4997117f1b4Smrg 5007117f1b4Smrg1: /* LENGTHS == NULL */ 5017117f1b4Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 5027117f1b4Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 5037117f1b4Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 5047117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 5057117f1b4Smrg add %o4, 1, %o4 ! i++ 5067117f1b4Smrg 5077117f1b4Smrg /* f3=tx, f5=ty, f7=tz */ 5087117f1b4Smrg 5097117f1b4Smrg /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 5107117f1b4Smrg fmuls %f3, %f3, %f6 ! FGM Group f3 available 5117117f1b4Smrg fmuls %f5, %f5, %f8 ! FGM Group f5 available 5127117f1b4Smrg fmuls %f7, %f7, %f10 ! FGM Group f7 available 5137117f1b4Smrg fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 5147117f1b4Smrg fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 5157117f1b4Smrg 5167117f1b4Smrg /* scale (f6) = 1.0 / sqrt(len) */ 5177117f1b4Smrg fsqrts %f6, %f6 ! FDIV 20 cycles 5187117f1b4Smrg fdivs %f12, %f6, %f6 ! FDIV 14 cycles 5197117f1b4Smrg 5207117f1b4Smrg fmuls %f3, %f6, %f3 5217117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 5227117f1b4Smrg fmuls %f5, %f6, %f5 5237117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 5247117f1b4Smrg fmuls %f7, %f6, %f7 5257117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 5267117f1b4Smrg 5277117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 5287117f1b4Smrg bl 1b 5294a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 5307117f1b4Smrg 5317117f1b4Smrg ba 7f 5327117f1b4Smrg nop 5337117f1b4Smrg 5347117f1b4Smrg4: /* LENGTHS != NULL */ 5357117f1b4Smrg 5367117f1b4Smrg5: 5377117f1b4Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 5387117f1b4Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 5397117f1b4Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 5407117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 5417117f1b4Smrg add %o4, 1, %o4 ! i++ 5427117f1b4Smrg 5437117f1b4Smrg ld [%o3], %f13 ! LSU 5447117f1b4Smrg add %o3, 4, %o3 ! IEU0 5457117f1b4Smrg 5467117f1b4Smrg /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 5477117f1b4Smrg 5487117f1b4Smrg fmuls %f3, %f13, %f3 5497117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 5507117f1b4Smrg fmuls %f5, %f13, %f5 5517117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 5527117f1b4Smrg fmuls %f7, %f13, %f7 5537117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 5547117f1b4Smrg 5557117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 5567117f1b4Smrg bl 5b 5574a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 5587117f1b4Smrg 5597117f1b4Smrg7: retl 5607117f1b4Smrg nop 5617117f1b4Smrg 5627117f1b4Smrg .globl _mesa_sparc_rescale_normals 5637117f1b4Smrg_mesa_sparc_rescale_normals: 5647117f1b4Smrg /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 5657117f1b4Smrg 5667117f1b4Smrg sethi %hi(ONE_DOT_ZERO), %g2 5677117f1b4Smrg sub %sp, 16, %sp 5687117f1b4Smrg st %o1, [%sp + STACK_VAR_OFF+0x0] 5697117f1b4Smrg ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 5707117f1b4Smrg add %sp, 16, %sp 5717117f1b4Smrg 5727117f1b4Smrg LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 5737117f1b4Smrg ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 5747117f1b4Smrg ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 5757117f1b4Smrg LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 5767117f1b4Smrg 5777117f1b4Smrg /* dest->count = in->count */ 5787117f1b4Smrg st %g1, [%o4 + V4F_COUNT] 5797117f1b4Smrg 5807117f1b4Smrg cmp %g1, 1 5817117f1b4Smrg bl 7f 5827117f1b4Smrg clr %o4 ! 'i' for STRIDE_LOOP 5837117f1b4Smrg 5847117f1b4Smrg1: 5857117f1b4Smrg ld [%o5 + 0x00], %f3 ! ux = from[0] 5867117f1b4Smrg ld [%o5 + 0x04], %f5 ! uy = from[1] 5877117f1b4Smrg ld [%o5 + 0x08], %f7 ! uz = from[2] 5887117f1b4Smrg add %o5, %g2, %o5 ! STRIDE_F(from, stride) 5897117f1b4Smrg add %o4, 1, %o4 ! i++ 5907117f1b4Smrg 5917117f1b4Smrg /* f3=tx, f5=ty, f7=tz */ 5927117f1b4Smrg 5937117f1b4Smrg fmuls %f3, %f15, %f3 5947117f1b4Smrg st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 5957117f1b4Smrg fmuls %f5, %f15, %f5 5967117f1b4Smrg st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 5977117f1b4Smrg fmuls %f7, %f15, %f7 5987117f1b4Smrg st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 5997117f1b4Smrg 6007117f1b4Smrg cmp %o4, %g1 ! continue if (i < count) 6017117f1b4Smrg bl 1b 6024a49301eSmrg add %g3, 0x10, %g3 ! advance out vector pointer 6037117f1b4Smrg 6047117f1b4Smrg7: retl 6057117f1b4Smrg nop 606