norm.S revision 7117f1b4
1/* $Id: norm.S,v 1.1.1.1 2008/07/29 05:10:21 mrg Exp $ */ 2 3#include "sparc_matrix.h" 4 5#if defined(SVR4) || defined(__SVR4) || defined(__svr4__) 6 /* Solaris requires this for 64-bit. */ 7 .register %g2, #scratch 8 .register %g3, #scratch 9#endif 10 11 .text 12 13#ifdef __arch64__ 14#define STACK_VAR_OFF (2047 + (8 * 16)) 15#else 16#define STACK_VAR_OFF (4 * 16) 17#endif 18 19 /* Newton-Raphson approximation turns out to be slower 20 * (and less accurate) than direct fsqrts/fdivs. 21 */ 22#define ONE_DOT_ZERO 0x3f800000 23 24 .globl _mesa_sparc_transform_normalize_normals 25_mesa_sparc_transform_normalize_normals: 26 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 27 28 sethi %hi(ONE_DOT_ZERO), %g2 29 sub %sp, 16, %sp 30 st %g2, [%sp + STACK_VAR_OFF+0x0] 31 st %o1, [%sp + STACK_VAR_OFF+0x4] 32 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 33 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 34 add %sp, 16, %sp 35 36 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 37 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 38 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 39 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 40 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 41 42 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 43 44 /* dest->count = in->count */ 45 st %g1, [%o4 + V4F_COUNT] 46 47 cmp %g1, 1 48 bl 7f 49 cmp %o3, 0 50 bne 4f 51 clr %o4 ! 'i' for STRIDE_LOOP 52 531: /* LENGTHS == NULL */ 54 ld [%o5 + 0x00], %f0 ! ux = from[0] 55 ld [%o5 + 0x04], %f1 ! uy = from[1] 56 ld [%o5 + 0x08], %f2 ! uz = from[2] 57 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 58 add %o4, 1, %o4 ! i++ 59 60 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 61 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 62 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 63 */ 64 fmuls %f0, M0, %f3 ! FGM Group 65 fmuls %f1, M1, %f4 ! FGM Group 66 fmuls %f0, M4, %f5 ! FGM Group 67 fmuls %f1, M5, %f6 ! FGM Group 68 fmuls %f0, M8, %f7 ! FGM Group f3 available 69 fmuls %f1, M9, %f8 ! FGM Group f4 available 70 fadds %f3, %f4, %f3 ! FGA 71 fmuls %f2, M2, %f10 ! FGM Group f5 available 72 fmuls %f2, M6, %f0 ! FGM Group f6 available 73 fadds %f5, %f6, %f5 ! FGA 74 fmuls %f2, M10, %f4 ! FGM Group f7 available 75 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 76 fadds %f3, %f10, %f3 ! FGA Group f10 available 77 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 78 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 79 80 /* f3=tx, f5=ty, f7=tz */ 81 82 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 83 fmuls %f3, %f3, %f6 ! FGM Group f3 available 84 fmuls %f5, %f5, %f8 ! FGM Group f5 available 85 fmuls %f7, %f7, %f10 ! FGM Group f7 available 86 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 87 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 88 89 /* scale (f6) = 1.0 / sqrt(len) */ 90 fsqrts %f6, %f6 ! FDIV 20 cycles 91 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 92 93 fmuls %f3, %f6, %f3 94 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 95 fmuls %f5, %f6, %f5 96 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 97 fmuls %f7, %f6, %f7 98 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 99 100 cmp %o4, %g1 ! continue if (i < count) 101 bl 1b 102 add %g3, 0x0c, %g3 ! advance out vector pointer 103 104 ba 7f 105 nop 106 1074: /* LENGTHS != NULL */ 108 fmuls M0, %f15, M0 109 fmuls M1, %f15, M1 110 fmuls M2, %f15, M2 111 fmuls M4, %f15, M4 112 fmuls M5, %f15, M5 113 fmuls M6, %f15, M6 114 fmuls M8, %f15, M8 115 fmuls M9, %f15, M9 116 fmuls M10, %f15, M10 117 1185: 119 ld [%o5 + 0x00], %f0 ! ux = from[0] 120 ld [%o5 + 0x04], %f1 ! uy = from[1] 121 ld [%o5 + 0x08], %f2 ! uz = from[2] 122 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 123 add %o4, 1, %o4 ! i++ 124 125 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2) 126 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6) 127 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10) 128 */ 129 fmuls %f0, M0, %f3 ! FGM Group 130 fmuls %f1, M1, %f4 ! FGM Group 131 fmuls %f0, M4, %f5 ! FGM Group 132 fmuls %f1, M5, %f6 ! FGM Group 133 fmuls %f0, M8, %f7 ! FGM Group f3 available 134 fmuls %f1, M9, %f8 ! FGM Group f4 available 135 fadds %f3, %f4, %f3 ! FGA 136 fmuls %f2, M2, %f10 ! FGM Group f5 available 137 fmuls %f2, M6, %f0 ! FGM Group f6 available 138 fadds %f5, %f6, %f5 ! FGA 139 fmuls %f2, M10, %f4 ! FGM Group f7 available 140 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 141 fadds %f3, %f10, %f3 ! FGA Group f10 available 142 ld [%o3], %f13 ! LSU 143 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 144 add %o3, 4, %o3 ! IEU0 145 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 146 147 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 148 149 fmuls %f3, %f13, %f3 150 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 151 fmuls %f5, %f13, %f5 152 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 153 fmuls %f7, %f13, %f7 154 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 155 156 cmp %o4, %g1 ! continue if (i < count) 157 bl 5b 158 add %g3, 0x0c, %g3 ! advance out vector pointer 159 1607: retl 161 nop 162 163 .globl _mesa_sparc_transform_normalize_normals_no_rot 164_mesa_sparc_transform_normalize_normals_no_rot: 165 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 166 167 sethi %hi(ONE_DOT_ZERO), %g2 168 sub %sp, 16, %sp 169 st %g2, [%sp + STACK_VAR_OFF+0x0] 170 st %o1, [%sp + STACK_VAR_OFF+0x4] 171 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 172 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale 173 add %sp, 16, %sp 174 175 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 176 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 177 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 178 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 179 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 180 181 LDMATRIX_0_5_10(%o0) 182 183 /* dest->count = in->count */ 184 st %g1, [%o4 + V4F_COUNT] 185 186 cmp %g1, 1 187 bl 7f 188 cmp %o3, 0 189 bne 4f 190 clr %o4 ! 'i' for STRIDE_LOOP 191 1921: /* LENGTHS == NULL */ 193 ld [%o5 + 0x00], %f0 ! ux = from[0] 194 ld [%o5 + 0x04], %f1 ! uy = from[1] 195 ld [%o5 + 0x08], %f2 ! uz = from[2] 196 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 197 add %o4, 1, %o4 ! i++ 198 199 /* tx (f3) = (ux * m0) 200 * ty (f5) = (uy * m5) 201 * tz (f7) = (uz * m10) 202 */ 203 fmuls %f0, M0, %f3 ! FGM Group 204 fmuls %f1, M5, %f5 ! FGM Group 205 fmuls %f2, M10, %f7 ! FGM Group 206 207 /* f3=tx, f5=ty, f7=tz */ 208 209 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 210 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available 211 fmuls %f5, %f5, %f8 ! FGM Group f5 available 212 fmuls %f7, %f7, %f10 ! FGM Group f7 available 213 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 214 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 215 216 /* scale (f6) = 1.0 / sqrt(len) */ 217 fsqrts %f6, %f6 ! FDIV 20 cycles 218 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 219 220 fmuls %f3, %f6, %f3 221 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 222 fmuls %f5, %f6, %f5 223 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 224 fmuls %f7, %f6, %f7 225 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 226 227 cmp %o4, %g1 ! continue if (i < count) 228 bl 1b 229 add %g3, 0x0c, %g3 ! advance out vector pointer 230 231 ba 7f 232 nop 233 2344: /* LENGTHS != NULL */ 235 fmuls M0, %f15, M0 236 fmuls M5, %f15, M5 237 fmuls M10, %f15, M10 238 2395: 240 ld [%o5 + 0x00], %f0 ! ux = from[0] 241 ld [%o5 + 0x04], %f1 ! uy = from[1] 242 ld [%o5 + 0x08], %f2 ! uz = from[2] 243 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 244 add %o4, 1, %o4 ! i++ 245 246 /* tx (f3) = (ux * m0) 247 * ty (f5) = (uy * m5) 248 * tz (f7) = (uz * m10) 249 */ 250 fmuls %f0, M0, %f3 ! FGM Group 251 ld [%o3], %f13 ! LSU 252 fmuls %f1, M5, %f5 ! FGM Group 253 add %o3, 4, %o3 ! IEU0 254 fmuls %f2, M10, %f7 ! FGM Group 255 256 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 257 258 fmuls %f3, %f13, %f3 259 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 260 fmuls %f5, %f13, %f5 261 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 262 fmuls %f7, %f13, %f7 263 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 264 265 cmp %o4, %g1 ! continue if (i < count) 266 bl 5b 267 add %g3, 0x0c, %g3 ! advance out vector pointer 268 2697: retl 270 nop 271 272 .globl _mesa_sparc_transform_rescale_normals_no_rot 273_mesa_sparc_transform_rescale_normals_no_rot: 274 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 275 sub %sp, 16, %sp 276 st %o1, [%sp + STACK_VAR_OFF+0x0] 277 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 278 add %sp, 16, %sp 279 280 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 281 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 282 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 283 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 284 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 285 286 LDMATRIX_0_5_10(%o0) 287 288 /* dest->count = in->count */ 289 st %g1, [%o4 + V4F_COUNT] 290 291 cmp %g1, 1 292 bl 7f 293 clr %o4 ! 'i' for STRIDE_LOOP 294 295 fmuls M0, %f15, M0 296 fmuls M5, %f15, M5 297 fmuls M10, %f15, M10 298 2991: ld [%o5 + 0x00], %f0 ! ux = from[0] 300 ld [%o5 + 0x04], %f1 ! uy = from[1] 301 ld [%o5 + 0x08], %f2 ! uz = from[2] 302 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 303 add %o4, 1, %o4 ! i++ 304 305 /* tx (f3) = (ux * m0) 306 * ty (f5) = (uy * m5) 307 * tz (f7) = (uz * m10) 308 */ 309 fmuls %f0, M0, %f3 ! FGM Group 310 st %f3, [%g3 + 0x00] ! LSU 311 fmuls %f1, M5, %f5 ! FGM Group 312 st %f5, [%g3 + 0x04] ! LSU 313 fmuls %f2, M10, %f7 ! FGM Group 314 st %f7, [%g3 + 0x08] ! LSU 315 316 cmp %o4, %g1 ! continue if (i < count) 317 bl 1b 318 add %g3, 0x0c, %g3 ! advance out vector pointer 319 3207: retl 321 nop 322 323 .globl _mesa_sparc_transform_rescale_normals 324_mesa_sparc_transform_rescale_normals: 325 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 326 sub %sp, 16, %sp 327 st %o1, [%sp + STACK_VAR_OFF+0x0] 328 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 329 add %sp, 16, %sp 330 331 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 332 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 333 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 334 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 335 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 336 337 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 338 339 /* dest->count = in->count */ 340 st %g1, [%o4 + V4F_COUNT] 341 342 cmp %g1, 1 343 bl 7f 344 clr %o4 ! 'i' for STRIDE_LOOP 345 346 fmuls M0, %f15, M0 347 fmuls M1, %f15, M1 348 fmuls M2, %f15, M2 349 fmuls M4, %f15, M4 350 fmuls M5, %f15, M5 351 fmuls M6, %f15, M6 352 fmuls M8, %f15, M8 353 fmuls M9, %f15, M9 354 fmuls M10, %f15, M10 355 3561: ld [%o5 + 0x00], %f0 ! ux = from[0] 357 ld [%o5 + 0x04], %f1 ! uy = from[1] 358 ld [%o5 + 0x08], %f2 ! uz = from[2] 359 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 360 add %o4, 1, %o4 ! i++ 361 362 fmuls %f0, M0, %f3 ! FGM Group 363 fmuls %f1, M1, %f4 ! FGM Group 364 fmuls %f0, M4, %f5 ! FGM Group 365 fmuls %f1, M5, %f6 ! FGM Group 366 fmuls %f0, M8, %f7 ! FGM Group f3 available 367 fmuls %f1, M9, %f8 ! FGM Group f4 available 368 fadds %f3, %f4, %f3 ! FGA 369 fmuls %f2, M2, %f10 ! FGM Group f5 available 370 fmuls %f2, M6, %f0 ! FGM Group f6 available 371 fadds %f5, %f6, %f5 ! FGA 372 fmuls %f2, M10, %f4 ! FGM Group f7 available 373 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 374 fadds %f3, %f10, %f3 ! FGA Group f10 available 375 st %f3, [%g3 + 0x00] ! LSU 376 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 377 st %f5, [%g3 + 0x04] ! LSU 378 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 379 st %f7, [%g3 + 0x08] ! LSU 380 381 cmp %o4, %g1 ! continue if (i < count) 382 bl 1b 383 add %g3, 0x0c, %g3 ! advance out vector pointer 384 3857: retl 386 nop 387 388 .globl _mesa_sparc_transform_normals_no_rot 389_mesa_sparc_transform_normals_no_rot: 390 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 391 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 392 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 393 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 394 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 395 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 396 397 LDMATRIX_0_5_10(%o0) 398 399 /* dest->count = in->count */ 400 st %g1, [%o4 + V4F_COUNT] 401 402 cmp %g1, 1 403 bl 7f 404 clr %o4 ! 'i' for STRIDE_LOOP 405 4061: ld [%o5 + 0x00], %f0 ! ux = from[0] 407 ld [%o5 + 0x04], %f1 ! uy = from[1] 408 ld [%o5 + 0x08], %f2 ! uz = from[2] 409 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 410 add %o4, 1, %o4 ! i++ 411 412 /* tx (f3) = (ux * m0) 413 * ty (f5) = (uy * m5) 414 * tz (f7) = (uz * m10) 415 */ 416 fmuls %f0, M0, %f3 ! FGM Group 417 st %f3, [%g3 + 0x00] ! LSU 418 fmuls %f1, M5, %f5 ! FGM Group 419 st %f5, [%g3 + 0x04] ! LSU 420 fmuls %f2, M10, %f7 ! FGM Group 421 st %f7, [%g3 + 0x08] ! LSU 422 423 cmp %o4, %g1 ! continue if (i < count) 424 bl 1b 425 add %g3, 0x0c, %g3 ! advance out vector pointer 426 4277: retl 428 nop 429 430 .globl _mesa_sparc_transform_normals 431_mesa_sparc_transform_normals: 432 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 433 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv 434 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 435 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 436 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 437 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 438 439 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0) 440 441 /* dest->count = in->count */ 442 st %g1, [%o4 + V4F_COUNT] 443 444 cmp %g1, 1 445 bl 7f 446 clr %o4 ! 'i' for STRIDE_LOOP 447 4481: ld [%o5 + 0x00], %f0 ! ux = from[0] 449 ld [%o5 + 0x04], %f1 ! uy = from[1] 450 ld [%o5 + 0x08], %f2 ! uz = from[2] 451 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 452 add %o4, 1, %o4 ! i++ 453 454 fmuls %f0, M0, %f3 ! FGM Group 455 fmuls %f1, M1, %f4 ! FGM Group 456 fmuls %f0, M4, %f5 ! FGM Group 457 fmuls %f1, M5, %f6 ! FGM Group 458 fmuls %f0, M8, %f7 ! FGM Group f3 available 459 fmuls %f1, M9, %f8 ! FGM Group f4 available 460 fadds %f3, %f4, %f3 ! FGA 461 fmuls %f2, M2, %f10 ! FGM Group f5 available 462 fmuls %f2, M6, %f0 ! FGM Group f6 available 463 fadds %f5, %f6, %f5 ! FGA 464 fmuls %f2, M10, %f4 ! FGM Group f7 available 465 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available 466 fadds %f3, %f10, %f3 ! FGA Group f10 available 467 st %f3, [%g3 + 0x00] ! LSU 468 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available 469 st %f5, [%g3 + 0x04] ! LSU 470 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available 471 st %f7, [%g3 + 0x08] ! LSU 472 473 cmp %o4, %g1 ! continue if (i < count) 474 bl 1b 475 add %g3, 0x0c, %g3 ! advance out vector pointer 476 4777: retl 478 nop 479 480 .globl _mesa_sparc_normalize_normals 481_mesa_sparc_normalize_normals: 482 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 483 484 sethi %hi(ONE_DOT_ZERO), %g2 485 sub %sp, 16, %sp 486 st %g2, [%sp + STACK_VAR_OFF+0x0] 487 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f 488 add %sp, 16, %sp 489 490 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 491 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 492 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 493 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 494 495 /* dest->count = in->count */ 496 st %g1, [%o4 + V4F_COUNT] 497 498 cmp %g1, 1 499 bl 7f 500 cmp %o3, 0 501 bne 4f 502 clr %o4 ! 'i' for STRIDE_LOOP 503 5041: /* LENGTHS == NULL */ 505 ld [%o5 + 0x00], %f3 ! ux = from[0] 506 ld [%o5 + 0x04], %f5 ! uy = from[1] 507 ld [%o5 + 0x08], %f7 ! uz = from[2] 508 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 509 add %o4, 1, %o4 ! i++ 510 511 /* f3=tx, f5=ty, f7=tz */ 512 513 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */ 514 fmuls %f3, %f3, %f6 ! FGM Group f3 available 515 fmuls %f5, %f5, %f8 ! FGM Group f5 available 516 fmuls %f7, %f7, %f10 ! FGM Group f7 available 517 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available 518 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available 519 520 /* scale (f6) = 1.0 / sqrt(len) */ 521 fsqrts %f6, %f6 ! FDIV 20 cycles 522 fdivs %f12, %f6, %f6 ! FDIV 14 cycles 523 524 fmuls %f3, %f6, %f3 525 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 526 fmuls %f5, %f6, %f5 527 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 528 fmuls %f7, %f6, %f7 529 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 530 531 cmp %o4, %g1 ! continue if (i < count) 532 bl 1b 533 add %g3, 0x0c, %g3 ! advance out vector pointer 534 535 ba 7f 536 nop 537 5384: /* LENGTHS != NULL */ 539 5405: 541 ld [%o5 + 0x00], %f3 ! ux = from[0] 542 ld [%o5 + 0x04], %f5 ! uy = from[1] 543 ld [%o5 + 0x08], %f7 ! uz = from[2] 544 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 545 add %o4, 1, %o4 ! i++ 546 547 ld [%o3], %f13 ! LSU 548 add %o3, 4, %o3 ! IEU0 549 550 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */ 551 552 fmuls %f3, %f13, %f3 553 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len 554 fmuls %f5, %f13, %f5 555 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len 556 fmuls %f7, %f13, %f7 557 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len 558 559 cmp %o4, %g1 ! continue if (i < count) 560 bl 5b 561 add %g3, 0x0c, %g3 ! advance out vector pointer 562 5637: retl 564 nop 565 566 .globl _mesa_sparc_rescale_normals 567_mesa_sparc_rescale_normals: 568 /* o0=mat o1=scale o2=in o3=lengths o4=dest */ 569 570 sethi %hi(ONE_DOT_ZERO), %g2 571 sub %sp, 16, %sp 572 st %o1, [%sp + STACK_VAR_OFF+0x0] 573 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale 574 add %sp, 16, %sp 575 576 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start 577 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count 578 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride 579 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start 580 581 /* dest->count = in->count */ 582 st %g1, [%o4 + V4F_COUNT] 583 584 cmp %g1, 1 585 bl 7f 586 clr %o4 ! 'i' for STRIDE_LOOP 587 5881: 589 ld [%o5 + 0x00], %f3 ! ux = from[0] 590 ld [%o5 + 0x04], %f5 ! uy = from[1] 591 ld [%o5 + 0x08], %f7 ! uz = from[2] 592 add %o5, %g2, %o5 ! STRIDE_F(from, stride) 593 add %o4, 1, %o4 ! i++ 594 595 /* f3=tx, f5=ty, f7=tz */ 596 597 fmuls %f3, %f15, %f3 598 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale 599 fmuls %f5, %f15, %f5 600 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale 601 fmuls %f7, %f15, %f7 602 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale 603 604 cmp %o4, %g1 ! continue if (i < count) 605 bl 1b 606 add %g3, 0x0c, %g3 ! advance out vector pointer 607 6087: retl 609 nop 610