xform.S revision 7117f1b4
1/* $Id: xform.S,v 1.1.1.1 2008/07/29 05:10:21 mrg Exp $ */ 2 3 /* TODO 4 * 5 * 1) It would be nice if load/store double could be used 6 * at least for the matrix parts. I think for the matrices 7 * it is safe, but for the vertices it probably is not due to 8 * things like glInterleavedArrays etc. 9 * 10 * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624 11 * 12 * 2) One extremely slick trick would be if we could enclose 13 * groups of xform calls on the same vertices such that 14 * we just load the matrix into f16-->f31 before the calls 15 * and then we would not have to do them here. This may be 16 * tricky and not much of a gain though. 17 */ 18 19#include "sparc_matrix.h" 20 21#if defined(SVR4) || defined(__SVR4) || defined(__svr4__) 22 /* Solaris requires this for 64-bit. */ 23 .register %g2, #scratch 24 .register %g3, #scratch 25#endif 26 27 .text 28 .align 64 29 30__set_v4f_1: 31 ld [%o0 + V4F_FLAGS], %g2 32 mov 1, %g1 33 st %g1, [%o0 + V4F_SIZE] 34 or %g2, VEC_SIZE_1, %g2 35 retl 36 st %g2, [%o0 + V4F_FLAGS] 37__set_v4f_2: 38 ld [%o0 + V4F_FLAGS], %g2 39 mov 2, %g1 40 st %g1, [%o0 + V4F_SIZE] 41 or %g2, VEC_SIZE_2, %g2 42 retl 43 st %g2, [%o0 + V4F_FLAGS] 44__set_v4f_3: 45 ld [%o0 + V4F_FLAGS], %g2 46 mov 3, %g1 47 st %g1, [%o0 + V4F_SIZE] 48 or %g2, VEC_SIZE_3, %g2 49 retl 50 st %g2, [%o0 + V4F_FLAGS] 51__set_v4f_4: 52 ld [%o0 + V4F_FLAGS], %g2 53 mov 4, %g1 54 st %g1, [%o0 + V4F_SIZE] 55 or %g2, VEC_SIZE_4, %g2 56 retl 57 st %g2, [%o0 + V4F_FLAGS] 58 59 /* First the raw versions. */ 60 61 .globl _mesa_sparc_transform_points1_general 62_mesa_sparc_transform_points1_general: 63 ld [%o2 + V4F_STRIDE], %o5 64 LDPTR [%o2 + V4F_START], %g1 65 LDPTR [%o0 + V4F_START], %g2 66 ld [%o2 + V4F_COUNT], %g3 67 68 LDMATRIX_0_1_2_3_12_13_14_15(%o1) 69 70 cmp %g3, 1 71 st %g3, [%o0 + V4F_COUNT] 72 bl 3f 73 clr %o1 74 75 be 2f 76 andn %g3, 1, %o2 77 781: ld [%g1 + 0x00], %f0 ! LSU Group 79 add %g1, %o5, %g1 ! IEU0 80 ld [%g1 + 0x00], %f8 ! LSU Group 81 add %o1, 2, %o1 ! IEU0 82 add %g1, %o5, %g1 ! IEU1 83 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0 84 fmuls %f0, M1, %f2 ! FGM Group 85 fmuls %f0, M2, %f3 ! FGM Group 86 fmuls %f0, M3, %f4 ! FGM Group 87 fmuls %f8, M0, %f9 ! FGM Group f1 available 88 fadds %f1, M12, %f1 ! FGA 89 st %f1, [%g2 + 0x00] ! LSU 90 fmuls %f8, M1, %f10 ! FGM Group f2 available 91 fadds %f2, M13, %f2 ! FGA 92 st %f2, [%g2 + 0x04] ! LSU 93 fmuls %f8, M2, %f11 ! FGM Group f3 available 94 fadds %f3, M14, %f3 ! FGA 95 st %f3, [%g2 + 0x08] ! LSU 96 fmuls %f8, M3, %f12 ! FGM Group f4 available 97 fadds %f4, M15, %f4 ! FGA 98 st %f4, [%g2 + 0x0c] ! LSU 99 fadds %f9, M12, %f9 ! FGA Group f9 available 100 st %f9, [%g2 + 0x10] ! LSU 101 fadds %f10, M13, %f10 ! FGA Group f10 available 102 st %f10, [%g2 + 0x14] ! LSU 103 fadds %f11, M14, %f11 ! FGA Group f11 available 104 st %f11, [%g2 + 0x18] ! LSU 105 fadds %f12, M15, %f12 ! FGA Group f12 available 106 st %f12, [%g2 + 0x1c] ! LSU 107 cmp %o1, %o2 ! IEU1 108 bne 1b ! CTI 109 add %g2, 0x20, %g2 ! IEU0 Group 110 111 cmp %o1, %g3 112 be 3f 113 nop 114 1152: ld [%g1 + 0x00], %f0 ! LSU Group 116 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0 117 fmuls %f0, M1, %f2 ! FGM Group 118 fmuls %f0, M2, %f3 ! FGM Group 119 fmuls %f0, M3, %f4 ! FGM Group 120 fadds %f1, M12, %f1 ! FGA Group 121 st %f1, [%g2 + 0x00] ! LSU 122 fadds %f2, M13, %f2 ! FGA Group 123 st %f2, [%g2 + 0x04] ! LSU 124 fadds %f3, M14, %f3 ! FGA Group 125 st %f3, [%g2 + 0x08] ! LSU 126 fadds %f4, M15, %f4 ! FGA Group 127 st %f4, [%g2 + 0x0c] ! LSU 128 1293: 130 ba __set_v4f_4 131 nop 132 133 .globl _mesa_sparc_transform_points1_identity 134_mesa_sparc_transform_points1_identity: 135 cmp %o0, %o2 136 be 4f 137 ld [%o2 + V4F_STRIDE], %o5 138 LDPTR [%o2 + V4F_START], %g1 139 LDPTR [%o0 + V4F_START], %g2 140 ld [%o2 + V4F_COUNT], %g3 141 142 cmp %g3, 1 143 st %g3, [%o0 + V4F_COUNT] 144 bl 3f 145 clr %o1 146 147 be 2f 148 andn %g3, 1, %o2 149 1501: ld [%g1 + 0x00], %f0 ! LSU Group 151 add %g1, %o5, %g1 ! IEU0 152 ld [%g1 + 0x00], %f1 ! LSU Group 153 add %o1, 2, %o1 ! IEU0 154 add %g1, %o5, %g1 ! IEU1 155 st %f0, [%g2 + 0x00] ! LSU Group 156 cmp %o1, %o2 ! IEU1 157 st %f1, [%g2 + 0x10] ! LSU Group 158 bne 1b ! CTI 159 add %g2, 0x20, %g2 ! IEU0 160 161 cmp %o1, %g3 162 be 3f 163 nop 164 1652: ld [%g1 + 0x00], %f0 166 addx %g0, %g0, %g0 167 st %f0, [%g2 + 0x00] 168 1693: 170 ba __set_v4f_1 171 nop 172 1734: retl 174 nop 175 176 .globl _mesa_sparc_transform_points1_2d 177_mesa_sparc_transform_points1_2d: 178 ld [%o2 + V4F_STRIDE], %o5 179 LDPTR [%o2 + V4F_START], %g1 180 LDPTR [%o0 + V4F_START], %g2 181 ld [%o2 + V4F_COUNT], %g3 182 183 LDMATRIX_0_1_12_13(%o1) 184 185 cmp %g3, 1 186 st %g3, [%o0 + V4F_COUNT] 187 bl 3f 188 clr %o1 189 190 be 2f 191 andn %g3, 1, %o2 192 1931: ld [%g1 + 0x00], %f0 ! LSU Group 194 add %g1, %o5, %g1 ! IEU0 195 ld [%g1 + 0x00], %f8 ! LSU Group 196 add %o1, 2, %o1 ! IEU0 197 add %g1, %o5, %g1 ! IEU1 198 fmuls %f0, M0, %f1 ! FGM Group 199 fmuls %f0, M1, %f2 ! FGM Group 200 fmuls %f8, M0, %f9 ! FGM Group 201 fmuls %f8, M1, %f10 ! FGM Group 202 fadds %f1, M12, %f3 ! FGA Group f1 available 203 st %f3, [%g2 + 0x00] ! LSU 204 fadds %f2, M13, %f4 ! FGA Group f2 available 205 st %f4, [%g2 + 0x04] ! LSU 206 fadds %f9, M12, %f11 ! FGA Group f9 available 207 st %f11, [%g2 + 0x10] ! LSU 208 fadds %f10, M13, %f12 ! FGA Group f10 available 209 st %f12, [%g2 + 0x14] ! LSU 210 cmp %o1, %o2 ! IEU1 211 bne 1b ! CTI 212 add %g2, 0x20, %g2 ! IEU0 Group 213 214 cmp %o1, %g3 215 be 3f 216 nop 217 2182: ld [%g1 + 0x00], %f0 219 fmuls %f0, M0, %f1 220 fmuls %f0, M1, %f2 221 fadds %f1, M12, %f3 222 st %f3, [%g2 + 0x00] 223 fadds %f2, M13, %f4 224 st %f4, [%g2 + 0x04] 225 2263: 227 ba __set_v4f_2 228 nop 229 230 .globl _mesa_sparc_transform_points1_2d_no_rot 231_mesa_sparc_transform_points1_2d_no_rot: 232 ld [%o2 + V4F_STRIDE], %o5 233 LDPTR [%o2 + V4F_START], %g1 234 LDPTR [%o0 + V4F_START], %g2 235 ld [%o2 + V4F_COUNT], %g3 236 237 LDMATRIX_0_12_13(%o1) 238 239 cmp %g3, 1 240 st %g3, [%o0 + V4F_COUNT] 241 bl 3f 242 clr %o1 243 244 be 2f 245 andn %g3, 1, %o2 246 2471: ld [%g1 + 0x00], %f0 ! LSU Group 248 add %g1, %o5, %g1 ! IEU0 249 ld [%g1 + 0x00], %f4 ! LSU Group 250 add %o1, 2, %o1 ! IEU0 251 add %g1, %o5, %g1 ! IEU1 252 fmuls %f0, M0, %f1 ! FGM Group 253 fmuls %f4, M0, %f5 ! FGM Group 254 fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available 255 st %f3, [%g2 + 0x00] ! LSU 256 st M13, [%g2 + 0x04] ! LSU Group, f5 available 257 fadds %f5, M12, %f6 ! FGA 258 st %f6, [%g2 + 0x10] ! LSU Group 259 st M13, [%g2 + 0x14] ! LSU Group 260 cmp %o1, %o2 ! IEU1 261 bne 1b ! CTI 262 add %g2, 0x20, %g2 ! IEU0 Group 263 264 cmp %o1, %g3 265 be 3f 266 nop 267 2682: ld [%g1 + 0x00], %f0 269 fmuls %f0, M0, %f1 270 fadds %f1, M12, %f3 271 st %f3, [%g2 + 0x00] 272 st M13, [%g2 + 0x04] 273 2743: 275 ba __set_v4f_2 276 nop 277 278 .globl _mesa_sparc_transform_points1_3d 279_mesa_sparc_transform_points1_3d: 280 ld [%o2 + V4F_STRIDE], %o5 281 LDPTR [%o2 + V4F_START], %g1 282 LDPTR [%o0 + V4F_START], %g2 283 ld [%o2 + V4F_COUNT], %g3 284 285 LDMATRIX_0_1_2_12_13_14(%o1) 286 287 cmp %g3, 1 288 st %g3, [%o0 + V4F_COUNT] 289 bl 3f 290 clr %o1 291 292 be 2f 293 andn %g3, 1, %o2 294 2951: ld [%g1 + 0x00], %f0 ! LSU Group 296 add %g1, %o5, %g1 ! IEU0 297 ld [%g1 + 0x00], %f4 ! LSU Group 298 add %o1, 2, %o1 ! IEU0 299 add %g1, %o5, %g1 ! IEU1 300 fmuls %f0, M0, %f1 ! FGM Group 301 fmuls %f0, M1, %f2 ! FGM Group 302 fmuls %f0, M2, %f3 ! FGM Group 303 fmuls %f4, M0, %f5 ! FGM Group 304 fadds %f1, M12, %f1 ! FGA Group, f1 available 305 st %f1, [%g2 + 0x00] ! LSU 306 fmuls %f4, M1, %f6 ! FGM 307 fadds %f2, M13, %f2 ! FGA Group, f2 available 308 st %f2, [%g2 + 0x04] ! LSU 309 fmuls %f4, M2, %f7 ! FGM 310 fadds %f3, M14, %f3 ! FGA Group, f3 available 311 st %f3, [%g2 + 0x08] ! LSU 312 fadds %f5, M12, %f5 ! FGA Group, f5 available 313 st %f5, [%g2 + 0x10] ! LSU 314 fadds %f6, M13, %f6 ! FGA Group, f6 available 315 st %f6, [%g2 + 0x14] ! LSU 316 fadds %f7, M14, %f7 ! FGA Group, f7 available 317 st %f7, [%g2 + 0x18] ! LSU 318 cmp %o1, %o2 ! IEU1 319 bne 1b ! CTI 320 add %g2, 0x20, %g2 ! IEU0 Group 321 322 cmp %o1, %g3 323 be 3f 324 nop 325 3262: ld [%g1 + 0x00], %f0 327 fmuls %f0, M0, %f1 328 fmuls %f0, M1, %f2 329 fmuls %f0, M2, %f3 330 fadds %f1, M12, %f1 331 st %f1, [%g2 + 0x00] 332 fadds %f2, M13, %f2 333 st %f2, [%g2 + 0x04] 334 fadds %f3, M14, %f3 335 st %f3, [%g2 + 0x08] 336 3373: 338 ba __set_v4f_3 339 nop 340 341 .globl _mesa_sparc_transform_points1_3d_no_rot 342_mesa_sparc_transform_points1_3d_no_rot: 343 ld [%o2 + V4F_STRIDE], %o5 344 LDPTR [%o2 + V4F_START], %g1 345 LDPTR [%o0 + V4F_START], %g2 346 ld [%o2 + V4F_COUNT], %g3 347 348 LDMATRIX_0_12_13_14(%o1) 349 350 cmp %g3, 1 351 st %g3, [%o0 + V4F_COUNT] 352 bl 3f 353 clr %o1 354 355 be 2f 356 andn %g3, 1, %o2 357 3581: ld [%g1 + 0x00], %f0 ! LSU Group 359 add %g1, %o5, %g1 ! IEU0 360 ld [%g1 + 0x00], %f2 ! LSU Group 361 add %o1, 2, %o1 ! IEU0 362 add %g1, %o5, %g1 ! IEU1 363 fmuls %f0, M0, %f1 ! FGM Group 364 fmuls %f2, M0, %f3 ! FGM Group 365 fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available 366 st %f1, [%g2 + 0x00] ! LSU 367 fadds %f3, M12, %f3 ! FGA Group, f3 available 368 st M13, [%g2 + 0x04] ! LSU 369 st M14, [%g2 + 0x08] ! LSU Group 370 st %f3, [%g2 + 0x10] ! LSU Group 371 st M13, [%g2 + 0x14] ! LSU Group 372 st M14, [%g2 + 0x18] ! LSU Group 373 cmp %o1, %o2 ! IEU1 374 bne 1b ! CTI 375 add %g2, 0x20, %g2 ! IEU0 Group 376 377 cmp %o1, %g3 378 be 3f 379 nop 380 3812: ld [%g1 + 0x00], %f0 382 fmuls %f0, M0, %f1 383 fadds %f1, M12, %f1 384 st %f1, [%g2 + 0x00] 385 st M13, [%g2 + 0x04] 386 st M14, [%g2 + 0x08] 387 3883: 389 ba __set_v4f_3 390 nop 391 392 .globl _mesa_sparc_transform_points1_perspective 393_mesa_sparc_transform_points1_perspective: 394 ld [%o2 + V4F_STRIDE], %o5 395 LDPTR [%o2 + V4F_START], %g1 396 LDPTR [%o0 + V4F_START], %g2 397 ld [%o2 + V4F_COUNT], %g3 398 399 LDMATRIX_0_14(%o1) 400 401 cmp %g3, 1 402 st %g3, [%o0 + V4F_COUNT] 403 bl 3f 404 clr %o1 405 406 be 2f 407 andn %g3, 1, %o2 408 4091: ld [%g1 + 0x00], %f0 ! LSU Group 410 add %g1, %o5, %g1 ! IEU0 411 ld [%g1 + 0x00], %f2 ! LSU Group 412 add %o1, 2, %o1 ! IEU0 413 add %g1, %o5, %g1 ! IEU1 414 fmuls %f0, M0, %f1 ! FGM Group 415 st %f1, [%g2 + 0x00] ! LSU 416 fmuls %f2, M0, %f3 ! FGM Group 417 st %g0, [%g2 + 0x04] ! LSU 418 st M14, [%g2 + 0x08] ! LSU Group 419 st %g0, [%g2 + 0x0c] ! LSU Group 420 st %f3, [%g2 + 0x10] ! LSU Group 421 st %g0, [%g2 + 0x14] ! LSU Group 422 st M14, [%g2 + 0x18] ! LSU Group 423 st %g0, [%g2 + 0x1c] ! LSU Group 424 cmp %o1, %o2 ! IEU1 425 bne 1b ! CTI 426 add %g2, 0x20, %g2 ! IEU0 Group 427 428 cmp %o1, %g3 429 be 3f 430 nop 431 4322: ld [%g1 + 0x00], %f0 433 fmuls %f0, M0, %f1 434 st %f1, [%g2 + 0x00] 435 st %g0, [%g2 + 0x04] 436 st M14, [%g2 + 0x08] 437 st %g0, [%g2 + 0x0c] 438 4393: 440 ba __set_v4f_4 441 nop 442 443 .globl _mesa_sparc_transform_points2_general 444_mesa_sparc_transform_points2_general: 445 ld [%o2 + V4F_STRIDE], %o5 446 LDPTR [%o2 + V4F_START], %g1 447 LDPTR [%o0 + V4F_START], %g2 448 ld [%o2 + V4F_COUNT], %g3 449 450 LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1) 451 452 cmp %g3, 0 453 st %g3, [%o0 + V4F_COUNT] 454 be 2f 455 clr %o1 456 4571: ld [%g1 + 0x00], %f0 ! LSU Group 458 ld [%g1 + 0x04], %f1 ! LSU Group 459 add %o1, 1, %o1 ! IEU0 460 add %g1, %o5, %g1 ! IEU1 461 fmuls %f0, M0, %f2 ! FGM Group 462 fmuls %f0, M1, %f3 ! FGM Group 463 fmuls %f0, M2, %f4 ! FGM Group 464 fmuls %f0, M3, %f5 ! FGM Group 465 fadds %f2, M12, %f2 ! FGA Group f2 available 466 fmuls %f1, M4, %f6 ! FGM 467 fadds %f3, M13, %f3 ! FGA Group f3 available 468 fmuls %f1, M5, %f7 ! FGM 469 fadds %f4, M14, %f4 ! FGA Group f4 available 470 fmuls %f1, M6, %f8 ! FGM 471 fadds %f5, M15, %f5 ! FGA Group f5 available 472 fmuls %f1, M7, %f9 ! FGM 473 fadds %f2, %f6, %f2 ! FGA Group f6 available 474 st %f2, [%g2 + 0x00] ! LSU 475 fadds %f3, %f7, %f3 ! FGA Group f7 available 476 st %f3, [%g2 + 0x04] ! LSU 477 fadds %f4, %f8, %f4 ! FGA Group f8 available 478 st %f4, [%g2 + 0x08] ! LSU 479 fadds %f5, %f9, %f5 ! FGA Group f9 available 480 st %f5, [%g2 + 0x0c] ! LSU 481 cmp %o1, %g3 ! IEU1 482 bne 1b ! CTI 483 add %g2, 0x10, %g2 ! IEU0 Group 4842: 485 ba __set_v4f_4 486 nop 487 488 .globl _mesa_sparc_transform_points2_identity 489_mesa_sparc_transform_points2_identity: 490 cmp %o2, %o0 491 be 3f 492 ld [%o2 + V4F_STRIDE], %o5 493 LDPTR [%o2 + V4F_START], %g1 494 LDPTR [%o0 + V4F_START], %g2 495 ld [%o2 + V4F_COUNT], %g3 496 497 cmp %g3, 0 498 st %g3, [%o0 + V4F_COUNT] 499 be 2f 500 clr %o1 501 5021: ld [%g1 + 0x00], %f0 ! LSU Group 503 add %o1, 1, %o1 ! IEU0 504 ld [%g1 + 0x04], %f1 ! LSU Group 505 add %g1, %o5, %g1 ! IEU0 506 cmp %o1, %g3 ! IEU1 507 st %f0, [%g2 + 0x00] ! LSU Group 508 st %f1, [%g2 + 0x04] ! LSU Group 509 bne 1b ! CTI 510 add %g2, 0x10, %g2 ! IEU0 5112: 512 ba __set_v4f_2 513 nop 514 5153: retl 516 nop 517 518 .globl _mesa_sparc_transform_points2_2d 519_mesa_sparc_transform_points2_2d: 520 ld [%o2 + V4F_STRIDE], %o5 521 LDPTR [%o2 + V4F_START], %g1 522 LDPTR [%o0 + V4F_START], %g2 523 ld [%o2 + V4F_COUNT], %g3 524 525 LDMATRIX_0_1_4_5_12_13(%o1) 526 527 cmp %g3, 1 528 st %g3, [%o0 + V4F_COUNT] 529 bl 3f 530 clr %o1 531 532 be 2f 533 andn %g3, 1, %o2 534 5351: ld [%g1 + 0x00], %f0 ! LSU Group 536 ld [%g1 + 0x04], %f1 ! LSU Group 537 add %o1, 2, %o1 ! IEU0 538 add %g1, %o5, %g1 ! IEU1 539 fmuls %f0, M0, %f2 ! FGM 540 ld [%g1 + 0x00], %f8 ! LSU Group 541 fmuls %f0, M1, %f3 ! FGM 542 ld [%g1 + 0x04], %f9 ! LSU Group 543 fmuls %f1, M4, %f6 ! FGM 544 fmuls %f1, M5, %f7 ! FGM Group 545 add %g1, %o5, %g1 ! IEU0 546 fmuls %f8, M0, %f10 ! FGM Group f2 available 547 fadds %f2, M12, %f2 ! FGA 548 fmuls %f8, M1, %f11 ! FGM Group f3 available 549 fadds %f3, M13, %f3 ! FGA 550 fmuls %f9, M4, %f12 ! FGM Group 551 fmuls %f9, M5, %f13 ! FGM Group 552 fadds %f10, M12, %f10 ! FGA Group f2, f10 available 553 fadds %f2, %f6, %f2 ! FGA Group f3, f11 available 554 st %f2, [%g2 + 0x00] ! LSU 555 fadds %f11, M13, %f11 ! FGA Group f12 available 556 fadds %f3, %f7, %f3 ! FGA Group f13 available 557 st %f3, [%g2 + 0x04] ! LSU 558 fadds %f10, %f12, %f10 ! FGA Group f10 available 559 st %f10, [%g2 + 0x10] ! LSU 560 fadds %f11, %f13, %f11 ! FGA Group f11 available 561 st %f11, [%g2 + 0x14] ! LSU 562 cmp %o1, %o2 ! IEU1 563 bne 1b ! CTI 564 add %g2, 0x20, %g2 ! IEU0 Group 565 566 cmp %o1, %g3 567 be 3f 568 nop 569 5702: ld [%g1 + 0x00], %f0 ! LSU Group 571 ld [%g1 + 0x04], %f1 ! LSU Group 572 fmuls %f0, M0, %f2 ! FGM Group 573 fmuls %f0, M1, %f3 ! FGM Group 574 fmuls %f1, M4, %f6 ! FGM Group 575 fmuls %f1, M5, %f7 ! FGM Group 576 fadds %f2, M12, %f2 ! FGA Group f2 available 577 fadds %f3, M13, %f3 ! FGA Group f3 available 578 fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available 579 st %f2, [%g2 + 0x00] ! LSU 580 fadds %f3, %f7, %f3 ! FGA Group f3 available 581 st %f3, [%g2 + 0x04] ! LSU 582 5833: 584 ba __set_v4f_2 585 nop 586 587 .globl _mesa_sparc_transform_points2_2d_no_rot 588_mesa_sparc_transform_points2_2d_no_rot: 589 ld [%o2 + V4F_STRIDE], %o5 590 LDPTR [%o2 + V4F_START], %g1 591 LDPTR [%o0 + V4F_START], %g2 592 ld [%o2 + V4F_COUNT], %g3 593 594 LDMATRIX_0_5_12_13(%o1) 595 596 cmp %g3, 1 597 st %g3, [%o0 + V4F_COUNT] 598 bl 3f 599 clr %o1 600 601 be 2f 602 andn %g3, 1, %o2 603 6041: ld [%g1 + 0x00], %f0 ! LSU Group 605 ld [%g1 + 0x04], %f1 ! LSU Group 606 add %o1, 2, %o1 ! IEU0 607 add %g1, %o5, %g1 ! IEU1 608 ld [%g1 + 0x00], %f4 ! LSU Group 609 fmuls %f0, M0, %f2 ! FGM 610 ld [%g1 + 0x04], %f5 ! LSU Group 611 fmuls %f1, M5, %f3 ! FGM 612 fmuls %f4, M0, %f6 ! FGM Group 613 add %g1, %o5, %g1 ! IEU0 614 fmuls %f5, M5, %f7 ! FGM Group 615 fadds %f2, M12, %f2 ! FGA Group f2 available 616 st %f2, [%g2 + 0x00] ! LSU 617 fadds %f3, M13, %f3 ! FGA Group f3 available 618 st %f3, [%g2 + 0x04] ! LSU 619 fadds %f6, M12, %f6 ! FGA Group f6 available 620 st %f6, [%g2 + 0x10] ! LSU 621 fadds %f7, M13, %f7 ! FGA Group f7 available 622 st %f7, [%g2 + 0x14] ! LSU 623 cmp %o1, %o2 ! IEU1 624 bne 1b ! CTI 625 add %g2, 0x20, %g2 ! IEU0 Group 626 627 cmp %o1, %g3 628 be 3f 629 nop 630 6312: ld [%g1 + 0x00], %f0 ! LSU Group 632 ld [%g1 + 0x04], %f1 ! LSU Group 633 fmuls %f0, M0, %f2 ! FGM Group 634 fmuls %f1, M5, %f3 ! FGM Group 635 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available 636 st %f2, [%g2 + 0x00] ! LSU 637 fadds %f3, M13, %f3 ! FGA Group f3 available 638 st %f3, [%g2 + 0x04] ! LSU 639 6403: 641 ba __set_v4f_2 642 nop 643 644 /* orig: 12 cycles */ 645 .globl _mesa_sparc_transform_points2_3d 646_mesa_sparc_transform_points2_3d: 647 ld [%o2 + V4F_STRIDE], %o5 648 ld [%o2 + V4F_START], %g1 649 ld [%o0 + V4F_START], %g2 650 ld [%o2 + V4F_COUNT], %g3 651 652 LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1) 653 654 cmp %g3, 1 655 st %g3, [%o0 + V4F_COUNT] 656 bl 3f 657 clr %o1 658 659 be 2f 660 andn %g3, 1, %o2 661 6621: ld [%g1 + 0x00], %f0 ! LSU Group 663 ld [%g1 + 0x04], %f1 ! LSU Group 664 add %o1, 2, %o1 ! IEU0 665 add %g1, %o5, %g1 ! IEU1 666 ld [%g1 + 0x00], %f9 ! LSU Group 667 fmuls %f0, M0, %f2 ! FGM 668 ld [%g1 + 0x04], %f10 ! LSU Group 669 fmuls %f0, M1, %f3 ! FGM 670 fmuls %f0, M2, %f4 ! FGM Group 671 add %g1, %o5, %g1 ! IEU0 672 fmuls %f1, M4, %f6 ! FGM Group 673 fmuls %f1, M5, %f7 ! FGM Group f2 available 674 fadds %f2, M12, %f2 ! FGA 675 fmuls %f1, M6, %f8 ! FGM Group f3 available 676 fadds %f3, M13, %f3 ! FGA 677 fmuls %f9, M0, %f11 ! FGM Group f4 available 678 fadds %f4, M14, %f4 ! FGA 679 fmuls %f9, M1, %f12 ! FGM Group f6 available 680 fmuls %f9, M2, %f13 ! FGM Group f2, f7 available 681 fadds %f2, %f6, %f2 ! FGA 682 st %f2, [%g2 + 0x00] ! LSU 683 fmuls %f10, M4, %f14 ! FGM Group f3, f8 available 684 fadds %f3, %f7, %f3 ! FGA 685 st %f3, [%g2 + 0x04] ! LSU 686 fmuls %f10, M5, %f15 ! FGM Group f4, f11 available 687 fadds %f11, M12, %f11 ! FGA 688 fmuls %f10, M6, %f0 ! FGM Group f12 available 689 fadds %f12, M13, %f12 ! FGA 690 fadds %f13, M14, %f13 ! FGA Group f13 available 691 fadds %f4, %f8, %f4 ! FGA Group f14 available 692 st %f4, [%g2 + 0x08] ! LSU 693 fadds %f11, %f14, %f11 ! FGA Group f15, f11 available 694 st %f11, [%g2 + 0x10] ! LSU 695 fadds %f12, %f15, %f12 ! FGA Group f0, f12 available 696 st %f12, [%g2 + 0x14] ! LSU 697 fadds %f13, %f0, %f13 ! FGA Group f13 available 698 st %f13, [%g2 + 0x18] ! LSU 699 700 cmp %o1, %o2 ! IEU1 701 bne 1b ! CTI 702 add %g2, 0x20, %g2 ! IEU0 Group 703 704 cmp %o1, %g3 705 be 3f 706 nop 707 7082: ld [%g1 + 0x00], %f0 ! LSU Group 709 ld [%g1 + 0x04], %f1 ! LSU Group 710 fmuls %f0, M0, %f2 ! FGM Group 711 fmuls %f0, M1, %f3 ! FGM Group 712 fmuls %f0, M2, %f4 ! FGM Group 713 fmuls %f1, M4, %f6 ! FGM Group 714 fmuls %f1, M5, %f7 ! FGM Group f2 available 715 fadds %f2, M12, %f2 ! FGA 716 fmuls %f1, M6, %f8 ! FGM Group f3 available 717 fadds %f3, M13, %f3 ! FGA 718 fadds %f4, M14, %f4 ! FGA Group f4 available 719 fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available 720 st %f2, [%g2 + 0x00] ! LSU 721 fadds %f3, %f7, %f3 ! FGA Group f3, f8 available 722 st %f3, [%g2 + 0x04] ! LSU 723 fadds %f4, %f8, %f4 ! FGA Group f4 available 724 st %f4, [%g2 + 0x08] ! LSU 725 7263: 727 ba __set_v4f_3 728 nop 729 730 .globl _mesa_sparc_transform_points2_3d_no_rot 731_mesa_sparc_transform_points2_3d_no_rot: 732 ld [%o2 + V4F_STRIDE], %o5 733 LDPTR [%o2 + V4F_START], %g1 734 LDPTR [%o0 + V4F_START], %g2 735 ld [%o2 + V4F_COUNT], %g3 736 737 LDMATRIX_0_5_12_13_14(%o1) 738 739 cmp %g3, 1 740 st %g3, [%o0 + V4F_COUNT] 741 bl 3f 742 clr %o3 743 744 be 2f 745 andn %g3, 1, %o2 746 7471: ld [%g1 + 0x00], %f0 ! LSU Group 748 ld [%g1 + 0x04], %f1 ! LSU Group 749 add %o3, 2, %o3 ! IEU0 750 add %g1, %o5, %g1 ! IEU1 751 ld [%g1 + 0x00], %f4 ! LSU Group 752 fmuls %f0, M0, %f2 ! FGM 753 ld [%g1 + 0x04], %f5 ! LSU Group 754 fmuls %f1, M5, %f3 ! FGM 755 fmuls %f4, M0, %f6 ! FGM Group 756 add %g1, %o5, %g1 ! IEU0 757 fmuls %f5, M5, %f7 ! FGM Group 758 fadds %f2, M12, %f2 ! FGA Group f2 available 759 st %f2, [%g2 + 0x00] ! LSU 760 fadds %f3, M13, %f3 ! FGA Group f3 available 761 st %f3, [%g2 + 0x04] ! LSU 762 fadds %f6, M12, %f6 ! FGA Group f6 available 763 st M14, [%g2 + 0x08] ! LSU 764 fadds %f7, M13, %f7 ! FGA Group f7 available 765 st %f6, [%g2 + 0x10] ! LSU 766 st %f7, [%g2 + 0x14] ! LSU Group 767 st M14, [%g2 + 0x18] ! LSU Group 768 cmp %o3, %o2 ! IEU1 769 bne 1b ! CTI 770 add %g2, 0x20, %g2 ! IEU0 Group 771 772 cmp %o3, %g3 773 be 3f 774 nop 775 7762: ld [%g1 + 0x00], %f0 ! LSU Group 777 ld [%g1 + 0x04], %f1 ! LSU Group 778 fmuls %f0, M0, %f2 ! FGM Group 779 fmuls %f1, M5, %f3 ! FGM Group 780 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available 781 st %f2, [%g2 + 0x00] ! LSU 782 fadds %f3, M13, %f3 ! FGA Group f3 available 783 st %f3, [%g2 + 0x04] ! LSU 784 st M14, [%g2 + 0x08] ! LSU Group 785 7863: ld [%o1 + (14 * 0x4)], %g3 787 cmp %g3, 0 788 bne __set_v4f_3 789 nop 790 ba __set_v4f_2 791 nop 792 793 .globl _mesa_sparc_transform_points2_perspective 794_mesa_sparc_transform_points2_perspective: 795 ld [%o2 + V4F_STRIDE], %o5 796 LDPTR [%o2 + V4F_START], %g1 797 LDPTR [%o0 + V4F_START], %g2 798 ld [%o2 + V4F_COUNT], %g3 799 800 LDMATRIX_0_5_14(%o1) 801 802 cmp %g3, 0 803 st %g3, [%o0 + V4F_COUNT] 804 be 2f 805 clr %o1 806 8071: ld [%g1 + 0x00], %f0 808 ld [%g1 + 0x04], %f1 809 add %o1, 1, %o1 810 add %g1, %o5, %g1 811 fmuls %f0, M0, %f2 812 st %f2, [%g2 + 0x00] 813 fmuls %f1, M5, %f3 814 st %f3, [%g2 + 0x04] 815 st M14, [%g2 + 0x08] 816 st %g0, [%g2 + 0x0c] 817 cmp %o1, %g3 818 bne 1b 819 add %g2, 0x10, %g2 8202: 821 ba __set_v4f_4 822 nop 823 824 .globl _mesa_sparc_transform_points3_general 825_mesa_sparc_transform_points3_general: 826 ld [%o2 + V4F_STRIDE], %o5 827 LDPTR [%o2 + V4F_START], %g1 828 LDPTR [%o0 + V4F_START], %g2 829 ld [%o2 + V4F_COUNT], %g3 830 831 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1) 832 833 cmp %g3, 0 834 st %g3, [%o0 + V4F_COUNT] 835 be 2f 836 clr %o1 837 8381: ld [%g1 + 0x00], %f0 ! LSU Group 839 ld [%g1 + 0x04], %f1 ! LSU Group 840 ld [%g1 + 0x08], %f2 ! LSU Group 841 add %o1, 1, %o1 ! IEU0 842 add %g1, %o5, %g1 ! IEU1 843 fmuls %f0, M0, %f3 ! FGM 844 fmuls %f1, M4, %f7 ! FGM Group 845 fmuls %f0, M1, %f4 ! FGM Group 846 fmuls %f1, M5, %f8 ! FGM Group 847 fmuls %f0, M2, %f5 ! FGM Group f3 available 848 fmuls %f1, M6, %f9 ! FGM Group f7 available 849 fadds %f3, %f7, %f3 ! FGA 850 fmuls %f0, M3, %f6 ! FGM Group f4 available 851 fmuls %f1, M7, %f10 ! FGM Group f8 available 852 fadds %f4, %f8, %f4 ! FGA 853 fmuls %f2, M8, %f7 ! FGM Group f5 available 854 fmuls %f2, M9, %f8 ! FGM Group f9,f3 available 855 fadds %f5, %f9, %f5 ! FGA 856 fmuls %f2, M10, %f9 ! FGM Group f6 available 857 fadds %f6, %f10, %f6 ! FGA Group f10,f4 available 858 fmuls %f2, M11, %f10 ! FGM 859 fadds %f3, M12, %f3 ! FGA Group f7 available 860 fadds %f4, M13, %f4 ! FGA Group f8,f5 available 861 fadds %f5, M14, %f5 ! FGA Group f9 available 862 fadds %f6, M15, %f6 ! FGA Group f10,f6 available 863 fadds %f3, %f7, %f3 ! FGA Group f3 available 864 st %f3, [%g2 + 0x00] ! LSU 865 fadds %f4, %f8, %f4 ! FGA Group f4 available 866 st %f4, [%g2 + 0x04] ! LSU 867 fadds %f5, %f9, %f5 ! FGA Group f5 available 868 st %f5, [%g2 + 0x08] ! LSU 869 fadds %f6, %f10, %f6 ! FGA Group f6 available 870 st %f6, [%g2 + 0x0c] ! LSU 871 cmp %o1, %g3 ! IEU1 872 bne 1b ! CTI 873 add %g2, 0x10, %g2 ! IEU0 Group 8742: 875 ba __set_v4f_4 876 nop 877 878 .globl _mesa_sparc_transform_points3_identity 879_mesa_sparc_transform_points3_identity: 880 ld [%o2 + V4F_STRIDE], %o5 881 LDPTR [%o2 + V4F_START], %g1 882 LDPTR [%o0 + V4F_START], %g2 883 ld [%o2 + V4F_COUNT], %g3 884 885 cmp %g3, 0 886 st %g3, [%o0 + V4F_COUNT] 887 be 2f 888 clr %o1 889 8901: ld [%g1 + 0x00], %f0 891 ld [%g1 + 0x04], %f1 892 ld [%g1 + 0x08], %f2 893 add %o1, 1, %o1 894 add %g1, %o5, %g1 895 cmp %o1, %g3 896 st %f0, [%g2 + 0x00] 897 st %f1, [%g2 + 0x04] 898 st %f2, [%g2 + 0x08] 899 bne 1b 900 add %g2, 0x10, %g2 9012: 902 ba __set_v4f_3 903 nop 904 905 .globl _mesa_sparc_transform_points3_2d 906_mesa_sparc_transform_points3_2d: 907 ld [%o2 + V4F_STRIDE], %o5 908 LDPTR [%o2 + V4F_START], %g1 909 LDPTR [%o0 + V4F_START], %g2 910 ld [%o2 + V4F_COUNT], %g3 911 912 LDMATRIX_0_1_4_5_12_13(%o1) 913 914 cmp %g3, 0 915 st %g3, [%o0 + V4F_COUNT] 916 be 2f 917 clr %o1 918 9191: ld [%g1 + 0x00], %f0 ! LSU Group 920 ld [%g1 + 0x04], %f1 ! LSU Group 921 ld [%g1 + 0x08], %f2 ! LSU Group 922 add %o1, 1, %o1 ! IEU0 923 add %g1, %o5, %g1 ! IEU1 924 fmuls %f0, M0, %f3 ! FGM 925 fmuls %f0, M1, %f4 ! FGM Group 926 fmuls %f1, M4, %f6 ! FGM Group 927 fmuls %f1, M5, %f7 ! FGM Group 928 fadds %f3, M12, %f3 ! FGA Group f3 available 929 fadds %f4, M13, %f4 ! FGA Group f4 available 930 fadds %f3, %f6, %f3 ! FGA Group f6 available 931 st %f3, [%g2 + 0x00] ! LSU 932 fadds %f4, %f7, %f4 ! FGA Group f7 available 933 st %f4, [%g2 + 0x04] ! LSU 934 st %f2, [%g2 + 0x08] ! LSU Group 935 cmp %o1, %g3 ! IEU1 936 bne 1b ! CTI 937 add %g2, 0x10, %g2 ! IEU0 Group 9382: 939 ba __set_v4f_3 940 nop 941 942 .globl _mesa_sparc_transform_points3_2d_no_rot 943_mesa_sparc_transform_points3_2d_no_rot: 944 ld [%o2 + V4F_STRIDE], %o5 945 LDPTR [%o2 + V4F_START], %g1 946 LDPTR [%o0 + V4F_START], %g2 947 ld [%o2 + V4F_COUNT], %g3 948 949 LDMATRIX_0_5_12_13(%o1) 950 951 cmp %g3, 0 952 st %g3, [%o0 + V4F_COUNT] 953 be 2f 954 clr %o1 955 9561: ld [%g1 + 0x00], %f0 ! LSU Group 957 ld [%g1 + 0x04], %f1 ! LSU Group 958 ld [%g1 + 0x08], %f2 ! LSU Group 959 add %o1, 1, %o1 ! IEU0 960 add %g1, %o5, %g1 ! IEU1 961 fmuls %f0, M0, %f3 ! FGM 962 fmuls %f1, M5, %f4 ! FGM Group 963 st %f2, [%g2 + 0x08] ! LSU 964 fadds %f3, M12, %f3 ! FGA Group 965 st %f3, [%g2 + 0x00] ! LSU 966 fadds %f4, M13, %f4 ! FGA Group 967 st %f4, [%g2 + 0x04] ! LSU 968 cmp %o1, %g3 ! IEU1 969 bne 1b ! CTI 970 add %g2, 0x10, %g2 ! IEU0 Group 9712: 972 ba __set_v4f_3 973 nop 974 975 .globl _mesa_sparc_transform_points3_3d 976_mesa_sparc_transform_points3_3d: 977 ld [%o2 + V4F_STRIDE], %o5 978 LDPTR [%o2 + V4F_START], %g1 979 LDPTR [%o0 + V4F_START], %g2 980 ld [%o2 + V4F_COUNT], %g3 981 982 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1) 983 984 cmp %g3, 0 985 st %g3, [%o0 + V4F_COUNT] 986 be 2f 987 clr %o1 988 9891: ld [%g1 + 0x00], %f0 ! LSU Group 990 ld [%g1 + 0x04], %f1 ! LSU Group 991 ld [%g1 + 0x08], %f2 ! LSU Group 992 add %o1, 1, %o1 ! IEU0 993 add %g1, %o5, %g1 ! IEU1 994 fmuls %f0, M0, %f3 ! FGM 995 fmuls %f1, M4, %f6 ! FGM Group 996 fmuls %f0, M1, %f4 ! FGM Group 997 fmuls %f1, M5, %f7 ! FGM Group 998 fmuls %f0, M2, %f5 ! FGM Group f3 available 999 fmuls %f1, M6, %f8 ! FGM Group f6 available 1000 fadds %f3, %f6, %f3 ! FGA 1001 fmuls %f2, M8, %f9 ! FGM Group f4 available 1002 fmuls %f2, M9, %f10 ! FGM Group f7 available 1003 fadds %f4, %f7, %f4 ! FGA 1004 fmuls %f2, M10, %f11 ! FGM Group f5 available 1005 fadds %f5, %f8, %f5 ! FGA Group f8, f3 available 1006 fadds %f3, %f9, %f3 ! FGA Group f9 available 1007 fadds %f4, %f10, %f4 ! FGA Group f10, f4 available 1008 fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available 1009 fadds %f3, M12, %f3 ! FGA Group f3 available 1010 st %f3, [%g2 + 0x00] ! LSU 1011 fadds %f4, M13, %f4 ! FGA Group f4 available 1012 st %f4, [%g2 + 0x04] ! LSU 1013 fadds %f5, M14, %f5 ! FGA Group f5 available 1014 st %f5, [%g2 + 0x08] ! LSU 1015 cmp %o1, %g3 ! IEU1 1016 bne 1b ! CTI 1017 add %g2, 0x10, %g2 ! IEU0 Group 10182: 1019 ba __set_v4f_3 1020 nop 1021 1022 .globl _mesa_sparc_transform_points3_3d_no_rot 1023_mesa_sparc_transform_points3_3d_no_rot: 1024 ld [%o2 + V4F_STRIDE], %o5 1025 LDPTR [%o2 + V4F_START], %g1 1026 LDPTR [%o0 + V4F_START], %g2 1027 ld [%o2 + V4F_COUNT], %g3 1028 1029 LDMATRIX_0_5_10_12_13_14(%o1) 1030 1031 cmp %g3, 0 1032 st %g3, [%o0 + V4F_COUNT] 1033 be 2f 1034 clr %o1 1035 10361: ld [%g1 + 0x00], %f0 ! LSU Group 1037 ld [%g1 + 0x04], %f1 ! LSU Group 1038 ld [%g1 + 0x08], %f2 ! LSU Group 1039 add %o1, 1, %o1 ! IEU0 1040 add %g1, %o5, %g1 ! IEU1 1041 cmp %o1, %g3 ! IEU1 Group 1042 fmuls %f0, M0, %f3 ! FGM 1043 fmuls %f1, M5, %f4 ! FGM Group 1044 fmuls %f2, M10, %f5 ! FGM Group 1045 fadds %f3, M12, %f3 ! FGA Group, stall, f3 available 1046 st %f3, [%g2 + 0x00] ! LSU 1047 fadds %f4, M13, %f4 ! FGA Group, f4 available 1048 st %f4, [%g2 + 0x04] ! LSU 1049 fadds %f5, M14, %f5 ! FGA Group, f5 available 1050 st %f5, [%g2 + 0x08] ! LEU 1051 bne 1b ! CTI 1052 add %g2, 0x10, %g2 ! IEU0 Group 10532: 1054 ba __set_v4f_3 1055 nop 1056 1057 .globl _mesa_sparc_transform_points3_perspective 1058_mesa_sparc_transform_points3_perspective: 1059 ld [%o2 + V4F_STRIDE], %o5 1060 LDPTR [%o2 + V4F_START], %g1 1061 LDPTR [%o0 + V4F_START], %g2 1062 ld [%o2 + V4F_COUNT], %g3 1063 1064 LDMATRIX_0_5_8_9_10_14(%o1) 1065 1066 cmp %g3, 0 1067 st %g3, [%o0 + V4F_COUNT] 1068 be 2f 1069 clr %o1 1070 10711: ld [%g1 + 0x00], %f0 ! LSU Group 1072 ld [%g1 + 0x04], %f1 ! LSU Group 1073 ld [%g1 + 0x08], %f2 ! LSU Group 1074 add %o1, 1, %o1 ! IEU0 1075 add %g1, %o5, %g1 ! IEU1 1076 fmuls %f0, M0, %f3 ! FGM 1077 fmuls %f2, M8, %f6 ! FGM Group 1078 fmuls %f1, M5, %f4 ! FGM Group 1079 fmuls %f2, M9, %f7 ! FGM Group 1080 fmuls %f2, M10, %f5 ! FGM Group f3 available 1081 fadds %f3, %f6, %f3 ! FGA Group f6 available 1082 st %f3, [%g2 + 0x00] ! LSU 1083 fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available 1084 st %f4, [%g2 + 0x04] ! LSU 1085 fadds %f5, M14, %f5 ! FGA Group 1086 st %f5, [%g2 + 0x08] ! LSU 1087 fnegs %f2, %f6 ! FGA Group 1088 st %f6, [%g2 + 0x0c] ! LSU 1089 cmp %o1, %g3 ! IEU1 1090 bne 1b ! CTI 1091 add %g2, 0x10, %g2 ! IEU0 Group 10922: 1093 ba __set_v4f_4 1094 nop 1095 1096 .globl _mesa_sparc_transform_points4_general 1097_mesa_sparc_transform_points4_general: 1098 ld [%o2 + V4F_STRIDE], %o5 1099 LDPTR [%o2 + V4F_START], %g1 1100 LDPTR [%o0 + V4F_START], %g2 1101 ld [%o2 + V4F_COUNT], %g3 1102 1103 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1) 1104 1105 cmp %g3, 0 1106 st %g3, [%o0 + V4F_COUNT] 1107 be 2f 1108 clr %o1 1109 11101: ld [%g1 + 0x00], %f0 ! LSU Group 1111 ld [%g1 + 0x04], %f1 ! LSU Group 1112 ld [%g1 + 0x08], %f2 ! LSU Group 1113 ld [%g1 + 0x0c], %f3 ! LSU Group 1114 add %o1, 1, %o1 ! IEU0 1115 add %g1, %o5, %g1 ! IEU1 1116 fmuls %f0, M0, %f4 ! FGM Group 1117 fmuls %f1, M4, %f8 ! FGM Group 1118 fmuls %f0, M1, %f5 ! FGM Group 1119 fmuls %f1, M5, %f9 ! FGM Group 1120 fmuls %f0, M2, %f6 ! FGM Group f4 available 1121 fmuls %f1, M6, %f10 ! FGM Group f8 available 1122 fadds %f4, %f8, %f4 ! FGA 1123 fmuls %f0, M3, %f7 ! FGM Group f5 available 1124 fmuls %f1, M7, %f11 ! FGM Group f9 available 1125 fadds %f5, %f9, %f5 ! FGA 1126 fmuls %f2, M8, %f12 ! FGM Group f6 available 1127 fmuls %f2, M9, %f13 ! FGM Group f10, f4 available 1128 fadds %f6, %f10, %f6 ! FGA 1129 fmuls %f2, M10, %f14 ! FGM Group f7 available 1130 fmuls %f2, M11, %f15 ! FGM Group f11, f5 available 1131 fadds %f7, %f11, %f7 ! FGA 1132 fmuls %f3, M12, %f8 ! FGM Group f12 available 1133 fadds %f4, %f12, %f4 ! FGA 1134 fmuls %f3, M13, %f9 ! FGM Group f13, f6 available 1135 fadds %f5, %f13, %f5 ! FGA 1136 fmuls %f3, M14, %f10 ! FGM Group f14 available 1137 fadds %f6, %f14, %f6 ! FGA 1138 fmuls %f3, M15, %f11 ! FGM Group f15, f7 available 1139 fadds %f7, %f15, %f7 ! FGA 1140 fadds %f4, %f8, %f4 ! FGA Group f8, f4 available 1141 st %f4, [%g2 + 0x00] ! LSU 1142 fadds %f5, %f9, %f5 ! FGA Group f9, f5 available 1143 st %f5, [%g2 + 0x04] ! LSU 1144 fadds %f6, %f10, %f6 ! FGA Group f10, f6 available 1145 st %f6, [%g2 + 0x08] ! LSU 1146 fadds %f7, %f11, %f7 ! FGA Group f11, f7 available 1147 st %f7, [%g2 + 0x0c] ! LSU 1148 cmp %o1, %g3 ! IEU1 1149 bne 1b ! CTI 1150 add %g2, 0x10, %g2 ! IEU0 Group 11512: 1152 ba __set_v4f_4 1153 nop 1154 1155 .globl _mesa_sparc_transform_points4_identity 1156_mesa_sparc_transform_points4_identity: 1157 ld [%o2 + V4F_STRIDE], %o5 1158 LDPTR [%o2 + V4F_START], %g1 1159 LDPTR [%o0 + V4F_START], %g2 1160 ld [%o2 + V4F_COUNT], %g3 1161 1162 cmp %g3, 0 1163 st %g3, [%o0 + V4F_COUNT] 1164 be 2f 1165 clr %o1 1166 11671: ld [%g1 + 0x00], %f0 1168 ld [%g1 + 0x04], %f1 1169 ld [%g1 + 0x08], %f2 1170 add %o1, 1, %o1 1171 ld [%g1 + 0x0c], %f3 1172 add %g1, %o5, %g1 1173 st %f0, [%g2 + 0x00] 1174 st %f1, [%g2 + 0x04] 1175 st %f2, [%g2 + 0x08] 1176 cmp %o1, %g3 1177 st %f3, [%g2 + 0x0c] 1178 bne 1b 1179 add %g2, 0x10, %g2 11802: 1181 ba __set_v4f_4 1182 nop 1183 1184 .globl _mesa_sparc_transform_points4_2d 1185_mesa_sparc_transform_points4_2d: 1186 ld [%o2 + V4F_STRIDE], %o5 1187 LDPTR [%o2 + V4F_START], %g1 1188 LDPTR [%o0 + V4F_START], %g2 1189 ld [%o2 + V4F_COUNT], %g3 1190 1191 LDMATRIX_0_1_4_5_12_13(%o1) 1192 1193 cmp %g3, 0 1194 st %g3, [%o0 + V4F_COUNT] 1195 be 2f 1196 clr %o1 1197 11981: ld [%g1 + 0x00], %f0 ! LSU Group 1199 ld [%g1 + 0x04], %f1 ! LSU Group 1200 ld [%g1 + 0x08], %f2 ! LSU Group 1201 ld [%g1 + 0x0c], %f3 ! LSU Group 1202 add %o1, 1, %o1 ! IEU0 1203 add %g1, %o5, %g1 ! IEU1 1204 fmuls %f0, M0, %f4 ! FGM 1205 fmuls %f1, M4, %f8 ! FGM Group 1206 fmuls %f0, M1, %f5 ! FGM Group 1207 fmuls %f1, M5, %f9 ! FGM Group f4 available 1208 fmuls %f3, M12, %f12 ! FGM Group 1209 fmuls %f3, M13, %f13 ! FGM Group f8 available 1210 fadds %f4, %f8, %f4 ! FGA 1211 fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available 1212 fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail 1213 st %f4, [%g2 + 0x00] ! LSU 1214 fadds %f5, %f13, %f5 ! FGA Group f5 available 1215 st %f5, [%g2 + 0x04] ! LSU 1216 st %f2, [%g2 + 0x08] ! LSU Group 1217 st %f3, [%g2 + 0x0c] ! LSU Group 1218 cmp %o1, %g3 ! IEU1 1219 bne 1b ! CTI 1220 add %g2, 0x10, %g2 ! IEU0 Group 12212: 1222 ba __set_v4f_4 1223 nop 1224 1225 .globl _mesa_sparc_transform_points4_2d_no_rot 1226_mesa_sparc_transform_points4_2d_no_rot: 1227 ld [%o2 + V4F_STRIDE], %o5 1228 LDPTR [%o2 + V4F_START], %g1 1229 LDPTR [%o0 + V4F_START], %g2 1230 ld [%o2 + V4F_COUNT], %g3 1231 1232 LDMATRIX_0_1_4_5_12_13(%o1) 1233 1234 cmp %g3, 0 1235 st %g3, [%o0 + V4F_COUNT] 1236 be 2f 1237 clr %o1 1238 12391: ld [%g1 + 0x00], %f0 1240 ld [%g1 + 0x04], %f1 1241 ld [%g1 + 0x08], %f2 1242 ld [%g1 + 0x0c], %f3 1243 add %o1, 1, %o1 1244 add %g1, %o5, %g1 1245 fmuls %f0, M0, %f4 1246 fmuls %f3, M12, %f8 1247 fmuls %f1, M5, %f5 1248 fmuls %f3, M13, %f9 1249 fadds %f4, %f8, %f4 1250 st %f4, [%g2 + 0x00] 1251 fadds %f5, %f9, %f5 1252 st %f5, [%g2 + 0x04] 1253 st %f2, [%g2 + 0x08] 1254 st %f3, [%g2 + 0x0c] 1255 cmp %o1, %g3 1256 bne 1b 1257 add %g2, 0x10, %g2 12582: 1259 ba __set_v4f_4 1260 nop 1261 1262 .globl _mesa_sparc_transform_points4_3d 1263_mesa_sparc_transform_points4_3d: 1264 ld [%o2 + V4F_STRIDE], %o5 1265 LDPTR [%o2 + V4F_START], %g1 1266 LDPTR [%o0 + V4F_START], %g2 1267 ld [%o2 + V4F_COUNT], %g3 1268 1269 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1) 1270 1271 cmp %g3, 0 1272 st %g3, [%o0 + V4F_COUNT] 1273 be 2f 1274 clr %o1 1275 12761: ld [%g1 + 0x00], %f0 ! LSU Group 1277 ld [%g1 + 0x04], %f1 ! LSU Group 1278 ld [%g1 + 0x08], %f2 ! LSU Group 1279 ld [%g1 + 0x0c], %f3 ! LSU Group 1280 add %o1, 1, %o1 ! IEU0 1281 add %g1, %o5, %g1 ! IEU1 1282 fmuls %f0, M0, %f4 ! FGM 1283 fmuls %f1, M4, %f7 ! FGM Group 1284 fmuls %f0, M1, %f5 ! FGM Group 1285 fmuls %f1, M5, %f8 ! FGM Group 1286 fmuls %f0, M2, %f6 ! FGM Group f4 available 1287 fmuls %f1, M6, %f9 ! FGM Group f7 available 1288 fadds %f4, %f7, %f4 ! FGA 1289 fmuls %f2, M8, %f10 ! FGM Group f5 available 1290 fmuls %f2, M9, %f11 ! FGM Group f8 available 1291 fadds %f5, %f8, %f5 ! FGA 1292 fmuls %f2, M10, %f12 ! FGM Group f6 available 1293 fmuls %f3, M12, %f13 ! FGM Group f9, f4 available 1294 fadds %f6, %f9, %f6 ! FGA 1295 fmuls %f3, M13, %f14 ! FGM Group f10 available 1296 fadds %f4, %f10, %f4 ! FGA 1297 fmuls %f3, M14, %f15 ! FGM Group f11, f5 available 1298 fadds %f5, %f11, %f5 ! FGA 1299 fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available 1300 fadds %f4, %f13, %f4 ! FGA Group f14, f4 available 1301 st %f4, [%g2 + 0x00] ! LSU 1302 fadds %f5, %f14, %f5 ! FGA Group f15, f5 available 1303 st %f5, [%g2 + 0x04] ! LSU 1304 fadds %f6, %f15, %f6 ! FGA Group f6 available 1305 st %f6, [%g2 + 0x08] ! LSU 1306 st %f3, [%g2 + 0x0c] ! LSU Group 1307 cmp %o1, %g3 ! IEU1 1308 bne 1b ! CTI 1309 add %g2, 0x10, %g2 ! IEU0 Group 13102: 1311 ba __set_v4f_4 1312 nop 1313 1314 .globl _mesa_sparc_transform_points4_3d_no_rot 1315_mesa_sparc_transform_points4_3d_no_rot: 1316 ld [%o2 + V4F_STRIDE], %o5 1317 LDPTR [%o2 + V4F_START], %g1 1318 LDPTR [%o0 + V4F_START], %g2 1319 ld [%o2 + V4F_COUNT], %g3 1320 1321 LDMATRIX_0_5_10_12_13_14(%o1) 1322 1323 cmp %g3, 0 1324 st %g3, [%o0 + V4F_COUNT] 1325 be 2f 1326 clr %o1 1327 13281: ld [%g1 + 0x00], %f0 ! LSU Group 1329 ld [%g1 + 0x04], %f1 ! LSU Group 1330 ld [%g1 + 0x08], %f2 ! LSU Group 1331 ld [%g1 + 0x0c], %f3 ! LSU Group 1332 add %o1, 1, %o1 ! IEU0 1333 add %g1, %o5, %g1 ! IEU1 1334 fmuls %f0, M0, %f4 ! FGM 1335 fmuls %f3, M12, %f7 ! FGM Group 1336 fmuls %f1, M5, %f5 ! FGM Group 1337 fmuls %f3, M13, %f8 ! FGM Group 1338 fmuls %f2, M10, %f6 ! FGM Group f4 available 1339 fmuls %f3, M14, %f9 ! FGM Group f7 available 1340 fadds %f4, %f7, %f4 ! FGA 1341 st %f4, [%g2 + 0x00] ! LSU 1342 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available 1343 st %f5, [%g2 + 0x04] ! LSU 1344 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available 1345 st %f6, [%g2 + 0x08] ! LSU 1346 st %f3, [%g2 + 0x0c] ! LSU Group 1347 cmp %o1, %g3 ! IEU1 1348 bne 1b ! CTI 1349 add %g2, 0x10, %g2 ! IEU0 Group 13502: 1351 ba __set_v4f_4 1352 nop 1353 1354 .globl _mesa_sparc_transform_points4_perspective 1355_mesa_sparc_transform_points4_perspective: 1356 ld [%o2 + V4F_STRIDE], %o5 1357 LDPTR [%o2 + V4F_START], %g1 1358 LDPTR [%o0 + V4F_START], %g2 1359 ld [%o2 + V4F_COUNT], %g3 1360 1361 LDMATRIX_0_5_8_9_10_14(%o1) 1362 1363 cmp %g3, 0 1364 st %g3, [%o0 + V4F_COUNT] 1365 be 2f 1366 clr %o1 1367 13681: ld [%g1 + 0x00], %f0 ! LSU Group 1369 ld [%g1 + 0x04], %f1 ! LSU Group 1370 ld [%g1 + 0x08], %f2 ! LSU Group 1371 ld [%g1 + 0x0c], %f3 ! LSU Group 1372 add %o1, 1, %o1 ! IEU0 1373 add %g1, %o5, %g1 ! IEU1 1374 fmuls %f0, M0, %f4 ! FGM 1375 fmuls %f2, M8, %f7 ! FGM Group 1376 fmuls %f1, M5, %f5 ! FGM Group 1377 fmuls %f2, M9, %f8 ! FGM Group 1378 fmuls %f2, M10, %f6 ! FGM Group f4 available 1379 fmuls %f3, M14, %f9 ! FGM Group f7 available 1380 fadds %f4, %f7, %f4 ! FGA 1381 st %f4, [%g2 + 0x00] ! LSU 1382 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available 1383 st %f5, [%g2 + 0x04] ! LSU 1384 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available 1385 st %f6, [%g2 + 0x08] ! LSU 1386 fnegs %f2, %f7 ! FGA Group 1387 st %f7, [%g2 + 0x0c] ! LSU 1388 cmp %o1, %g3 ! IEU1 1389 bne 1b ! CTI 1390 add %g2, 0x10, %g2 ! IEU0 Group 13912: 1392 ba __set_v4f_4 1393 nop 1394