1848b8605Smrg 2848b8605Smrg/* 3848b8605Smrg * Mesa 3-D graphics library 4848b8605Smrg * 5848b8605Smrg * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6848b8605Smrg * 7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 8848b8605Smrg * copy of this software and associated documentation files (the "Software"), 9848b8605Smrg * to deal in the Software without restriction, including without limitation 10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 12848b8605Smrg * Software is furnished to do so, subject to the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice shall be included 15848b8605Smrg * in all copies or substantial portions of the Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg */ 25848b8605Smrg 26848b8605Smrg/* 27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29848b8605Smrg * in there will break the build on some platforms. 30848b8605Smrg */ 31848b8605Smrg 32848b8605Smrg#include "assyntax.h" 33848b8605Smrg#include "matypes.h" 34848b8605Smrg#include "xform_args.h" 35848b8605Smrg 36848b8605Smrg SEG_TEXT 37848b8605Smrg 38848b8605Smrg#define FP_ONE 1065353216 39848b8605Smrg#define FP_ZERO 0 40848b8605Smrg 41848b8605Smrg#define SRC0 REGOFF(0, ESI) 42848b8605Smrg#define SRC1 REGOFF(4, ESI) 43848b8605Smrg#define SRC2 REGOFF(8, ESI) 44848b8605Smrg#define SRC3 REGOFF(12, ESI) 45848b8605Smrg#define DST0 REGOFF(0, EDI) 46848b8605Smrg#define DST1 REGOFF(4, EDI) 47848b8605Smrg#define DST2 REGOFF(8, EDI) 48848b8605Smrg#define DST3 REGOFF(12, EDI) 49848b8605Smrg#define MAT0 REGOFF(0, EDX) 50848b8605Smrg#define MAT1 REGOFF(4, EDX) 51848b8605Smrg#define MAT2 REGOFF(8, EDX) 52848b8605Smrg#define MAT3 REGOFF(12, EDX) 53848b8605Smrg#define MAT4 REGOFF(16, EDX) 54848b8605Smrg#define MAT5 REGOFF(20, EDX) 55848b8605Smrg#define MAT6 REGOFF(24, EDX) 56848b8605Smrg#define MAT7 REGOFF(28, EDX) 57848b8605Smrg#define MAT8 REGOFF(32, EDX) 58848b8605Smrg#define MAT9 REGOFF(36, EDX) 59848b8605Smrg#define MAT10 REGOFF(40, EDX) 60848b8605Smrg#define MAT11 REGOFF(44, EDX) 61848b8605Smrg#define MAT12 REGOFF(48, EDX) 62848b8605Smrg#define MAT13 REGOFF(52, EDX) 63848b8605Smrg#define MAT14 REGOFF(56, EDX) 64848b8605Smrg#define MAT15 REGOFF(60, EDX) 65848b8605Smrg 66848b8605Smrg 67848b8605SmrgALIGNTEXT16 68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_general ) 69848b8605SmrgHIDDEN(_mesa_x86_transform_points2_general) 70848b8605SmrgGLNAME( _mesa_x86_transform_points2_general ): 71848b8605Smrg 72848b8605Smrg#define FRAME_OFFSET 8 73848b8605Smrg PUSH_L( ESI ) 74848b8605Smrg PUSH_L( EDI ) 75848b8605Smrg 76848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 77848b8605Smrg MOV_L( ARG_DEST, EDI ) 78848b8605Smrg 79848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 80848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 81848b8605Smrg 82848b8605Smrg TEST_L( ECX, ECX ) 83848b8605Smrg JZ( LLBL(x86_p2_gr_done) ) 84848b8605Smrg 85848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 86848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 87848b8605Smrg 88848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 89848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 90848b8605Smrg 91848b8605Smrg SHL_L( CONST(4), ECX ) 92848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 93848b8605Smrg 94848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 95848b8605Smrg ADD_L( EDI, ECX ) 96848b8605Smrg 97848b8605SmrgALIGNTEXT16 98848b8605SmrgLLBL(x86_p2_gr_loop): 99848b8605Smrg 100848b8605Smrg FLD_S( SRC0 ) /* F4 */ 101848b8605Smrg FMUL_S( MAT0 ) 102848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 103848b8605Smrg FMUL_S( MAT1 ) 104848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 105848b8605Smrg FMUL_S( MAT2 ) 106848b8605Smrg FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 107848b8605Smrg FMUL_S( MAT3 ) 108848b8605Smrg 109848b8605Smrg FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 110848b8605Smrg FMUL_S( MAT4 ) 111848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 112848b8605Smrg FMUL_S( MAT5 ) 113848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 114848b8605Smrg FMUL_S( MAT6 ) 115848b8605Smrg FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 116848b8605Smrg FMUL_S( MAT7 ) 117848b8605Smrg 118848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 119848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 120848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 121848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 122848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 123848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 124848b8605Smrg 125848b8605Smrg FXCH( ST(3) ) /* F4 F6 F5 F7 */ 126848b8605Smrg FADD_S( MAT12 ) 127848b8605Smrg FXCH( ST(2) ) /* F5 F6 F4 F7 */ 128848b8605Smrg FADD_S( MAT13 ) 129848b8605Smrg FXCH( ST(1) ) /* F6 F5 F4 F7 */ 130848b8605Smrg FADD_S( MAT14 ) 131848b8605Smrg FXCH( ST(3) ) /* F7 F5 F4 F6 */ 132848b8605Smrg FADD_S( MAT15 ) 133848b8605Smrg 134848b8605Smrg FXCH( ST(2) ) /* F4 F5 F7 F6 */ 135848b8605Smrg FSTP_S( DST0 ) /* F5 F7 F6 */ 136848b8605Smrg FSTP_S( DST1 ) /* F7 F6 */ 137848b8605Smrg FXCH( ST(1) ) /* F6 F7 */ 138848b8605Smrg FSTP_S( DST2 ) /* F7 */ 139848b8605Smrg FSTP_S( DST3 ) /* */ 140848b8605Smrg 141848b8605SmrgLLBL(x86_p2_gr_skip): 142848b8605Smrg 143848b8605Smrg ADD_L( CONST(16), EDI ) 144848b8605Smrg ADD_L( EAX, ESI ) 145848b8605Smrg CMP_L( ECX, EDI ) 146848b8605Smrg JNE( LLBL(x86_p2_gr_loop) ) 147848b8605Smrg 148848b8605SmrgLLBL(x86_p2_gr_done): 149848b8605Smrg 150848b8605Smrg POP_L( EDI ) 151848b8605Smrg POP_L( ESI ) 152848b8605Smrg RET 153848b8605Smrg#undef FRAME_OFFSET 154848b8605Smrg 155848b8605Smrg 156848b8605Smrg 157848b8605Smrg 158848b8605SmrgALIGNTEXT16 159848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_perspective ) 160848b8605SmrgHIDDEN(_mesa_x86_transform_points2_perspective) 161848b8605SmrgGLNAME( _mesa_x86_transform_points2_perspective ): 162848b8605Smrg 163848b8605Smrg#define FRAME_OFFSET 12 164848b8605Smrg PUSH_L( ESI ) 165848b8605Smrg PUSH_L( EDI ) 166848b8605Smrg PUSH_L( EBX ) 167848b8605Smrg 168848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 169848b8605Smrg MOV_L( ARG_DEST, EDI ) 170848b8605Smrg 171848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 172848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 173848b8605Smrg 174848b8605Smrg TEST_L( ECX, ECX ) 175848b8605Smrg JZ( LLBL(x86_p2_pr_done) ) 176848b8605Smrg 177848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 178848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 179848b8605Smrg 180848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 181848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 182848b8605Smrg 183848b8605Smrg SHL_L( CONST(4), ECX ) 184848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 185848b8605Smrg 186848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 187848b8605Smrg ADD_L( EDI, ECX ) 188848b8605Smrg 189848b8605Smrg MOV_L( MAT14, EBX ) 190848b8605Smrg 191848b8605SmrgALIGNTEXT16 192848b8605SmrgLLBL(x86_p2_pr_loop): 193848b8605Smrg 194848b8605Smrg FLD_S( SRC0 ) /* F4 */ 195848b8605Smrg FMUL_S( MAT0 ) 196848b8605Smrg 197848b8605Smrg FLD_S( SRC1 ) /* F1 F4 */ 198848b8605Smrg FMUL_S( MAT5 ) 199848b8605Smrg 200848b8605Smrg FXCH( ST(1) ) /* F4 F1 */ 201848b8605Smrg FSTP_S( DST0 ) /* F1 */ 202848b8605Smrg FSTP_S( DST1 ) /* */ 203848b8605Smrg MOV_L( EBX, DST2 ) 204848b8605Smrg MOV_L( CONST(FP_ZERO), DST3 ) 205848b8605Smrg 206848b8605SmrgLLBL(x86_p2_pr_skip): 207848b8605Smrg 208848b8605Smrg ADD_L( CONST(16), EDI ) 209848b8605Smrg ADD_L( EAX, ESI ) 210848b8605Smrg CMP_L( ECX, EDI ) 211848b8605Smrg JNE( LLBL(x86_p2_pr_loop) ) 212848b8605Smrg 213848b8605SmrgLLBL(x86_p2_pr_done): 214848b8605Smrg 215848b8605Smrg POP_L( EBX ) 216848b8605Smrg POP_L( EDI ) 217848b8605Smrg POP_L( ESI ) 218848b8605Smrg RET 219848b8605Smrg#undef FRAME_OFFSET 220848b8605Smrg 221848b8605Smrg 222848b8605Smrg 223848b8605Smrg 224848b8605SmrgALIGNTEXT16 225848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d ) 226848b8605SmrgHIDDEN(_mesa_x86_transform_points2_3d) 227848b8605SmrgGLNAME( _mesa_x86_transform_points2_3d ): 228848b8605Smrg 229848b8605Smrg#define FRAME_OFFSET 8 230848b8605Smrg PUSH_L( ESI ) 231848b8605Smrg PUSH_L( EDI ) 232848b8605Smrg 233848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 234848b8605Smrg MOV_L( ARG_DEST, EDI ) 235848b8605Smrg 236848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 237848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 238848b8605Smrg 239848b8605Smrg TEST_L( ECX, ECX ) 240848b8605Smrg JZ( LLBL(x86_p2_3dr_done) ) 241848b8605Smrg 242848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 243848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 244848b8605Smrg 245848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 246848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 247848b8605Smrg 248848b8605Smrg SHL_L( CONST(4), ECX ) 249848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 250848b8605Smrg 251848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 252848b8605Smrg ADD_L( EDI, ECX ) 253848b8605Smrg 254848b8605SmrgALIGNTEXT16 255848b8605SmrgLLBL(x86_p2_3dr_loop): 256848b8605Smrg 257848b8605Smrg FLD_S( SRC0 ) /* F4 */ 258848b8605Smrg FMUL_S( MAT0 ) 259848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 260848b8605Smrg FMUL_S( MAT1 ) 261848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 262848b8605Smrg FMUL_S( MAT2 ) 263848b8605Smrg 264848b8605Smrg FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 265848b8605Smrg FMUL_S( MAT4 ) 266848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 267848b8605Smrg FMUL_S( MAT5 ) 268848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 269848b8605Smrg FMUL_S( MAT6 ) 270848b8605Smrg 271848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 272848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 273848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 274848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 275848b8605Smrg 276848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 277848b8605Smrg FADD_S( MAT12 ) 278848b8605Smrg FXCH( ST(1) ) /* F5 F4 F6 */ 279848b8605Smrg FADD_S( MAT13 ) 280848b8605Smrg FXCH( ST(2) ) /* F6 F4 F5 */ 281848b8605Smrg FADD_S( MAT14 ) 282848b8605Smrg 283848b8605Smrg FXCH( ST(1) ) /* F4 F6 F5 */ 284848b8605Smrg FSTP_S( DST0 ) /* F6 F5 */ 285848b8605Smrg FXCH( ST(1) ) /* F5 F6 */ 286848b8605Smrg FSTP_S( DST1 ) /* F6 */ 287848b8605Smrg FSTP_S( DST2 ) /* */ 288848b8605Smrg 289848b8605SmrgLLBL(x86_p2_3dr_skip): 290848b8605Smrg 291848b8605Smrg ADD_L( CONST(16), EDI ) 292848b8605Smrg ADD_L( EAX, ESI ) 293848b8605Smrg CMP_L( ECX, EDI ) 294848b8605Smrg JNE( LLBL(x86_p2_3dr_loop) ) 295848b8605Smrg 296848b8605SmrgLLBL(x86_p2_3dr_done): 297848b8605Smrg 298848b8605Smrg POP_L( EDI ) 299848b8605Smrg POP_L( ESI ) 300848b8605Smrg RET 301848b8605Smrg#undef FRAME_OFFSET 302848b8605Smrg 303848b8605Smrg 304848b8605Smrg 305848b8605Smrg 306848b8605SmrgALIGNTEXT16 307848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot ) 308848b8605SmrgHIDDEN(_mesa_x86_transform_points2_3d_no_rot) 309848b8605SmrgGLNAME( _mesa_x86_transform_points2_3d_no_rot ): 310848b8605Smrg 311848b8605Smrg#define FRAME_OFFSET 12 312848b8605Smrg PUSH_L( ESI ) 313848b8605Smrg PUSH_L( EDI ) 314848b8605Smrg PUSH_L( EBX ) 315848b8605Smrg 316848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 317848b8605Smrg MOV_L( ARG_DEST, EDI ) 318848b8605Smrg 319848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 320848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 321848b8605Smrg 322848b8605Smrg TEST_L( ECX, ECX ) 323848b8605Smrg JZ( LLBL(x86_p2_3dnrr_done) ) 324848b8605Smrg 325848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 326848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 327848b8605Smrg 328848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 329848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 330848b8605Smrg 331848b8605Smrg SHL_L( CONST(4), ECX ) 332848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 333848b8605Smrg 334848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 335848b8605Smrg ADD_L( EDI, ECX ) 336848b8605Smrg 337848b8605Smrg MOV_L( MAT14, EBX ) 338848b8605Smrg 339848b8605SmrgALIGNTEXT16 340848b8605SmrgLLBL(x86_p2_3dnrr_loop): 341848b8605Smrg 342848b8605Smrg FLD_S( SRC0 ) /* F4 */ 343848b8605Smrg FMUL_S( MAT0 ) 344848b8605Smrg 345848b8605Smrg FLD_S( SRC1 ) /* F1 F4 */ 346848b8605Smrg FMUL_S( MAT5 ) 347848b8605Smrg 348848b8605Smrg FXCH( ST(1) ) /* F4 F1 */ 349848b8605Smrg FADD_S( MAT12 ) 350848b8605Smrg FLD_S( MAT13 ) /* F5 F4 F1 */ 351848b8605Smrg FXCH( ST(2) ) /* F1 F4 F5 */ 352848b8605Smrg FADDP( ST0, ST(2) ) /* F4 F5 */ 353848b8605Smrg 354848b8605Smrg FSTP_S( DST0 ) /* F5 */ 355848b8605Smrg FSTP_S( DST1 ) /* */ 356848b8605Smrg MOV_L( EBX, DST2 ) 357848b8605Smrg 358848b8605SmrgLLBL(x86_p2_3dnrr_skip): 359848b8605Smrg 360848b8605Smrg ADD_L( CONST(16), EDI ) 361848b8605Smrg ADD_L( EAX, ESI ) 362848b8605Smrg CMP_L( ECX, EDI ) 363848b8605Smrg JNE( LLBL(x86_p2_3dnrr_loop) ) 364848b8605Smrg 365848b8605SmrgLLBL(x86_p2_3dnrr_done): 366848b8605Smrg 367848b8605Smrg POP_L( EBX ) 368848b8605Smrg POP_L( EDI ) 369848b8605Smrg POP_L( ESI ) 370848b8605Smrg RET 371848b8605Smrg#undef FRAME_OFFSET 372848b8605Smrg 373848b8605Smrg 374848b8605Smrg 375848b8605Smrg 376848b8605SmrgALIGNTEXT16 377848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d ) 378848b8605SmrgHIDDEN(_mesa_x86_transform_points2_2d) 379848b8605SmrgGLNAME( _mesa_x86_transform_points2_2d ): 380848b8605Smrg 381848b8605Smrg#define FRAME_OFFSET 8 382848b8605Smrg PUSH_L( ESI ) 383848b8605Smrg PUSH_L( EDI ) 384848b8605Smrg 385848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 386848b8605Smrg MOV_L( ARG_DEST, EDI ) 387848b8605Smrg 388848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 389848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 390848b8605Smrg 391848b8605Smrg TEST_L( ECX, ECX ) 392848b8605Smrg JZ( LLBL(x86_p2_2dr_done) ) 393848b8605Smrg 394848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 395848b8605Smrg OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 396848b8605Smrg 397848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 398848b8605Smrg MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 399848b8605Smrg 400848b8605Smrg SHL_L( CONST(4), ECX ) 401848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 402848b8605Smrg 403848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 404848b8605Smrg ADD_L( EDI, ECX ) 405848b8605Smrg 406848b8605SmrgALIGNTEXT16 407848b8605SmrgLLBL(x86_p2_2dr_loop): 408848b8605Smrg 409848b8605Smrg FLD_S( SRC0 ) /* F4 */ 410848b8605Smrg FMUL_S( MAT0 ) 411848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 412848b8605Smrg FMUL_S( MAT1 ) 413848b8605Smrg 414848b8605Smrg FLD_S( SRC1 ) /* F0 F5 F4 */ 415848b8605Smrg FMUL_S( MAT4 ) 416848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 417848b8605Smrg FMUL_S( MAT5 ) 418848b8605Smrg 419848b8605Smrg FXCH( ST(1) ) /* F0 F1 F5 F4 */ 420848b8605Smrg FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 421848b8605Smrg FADDP( ST0, ST(1) ) /* F5 F4 */ 422848b8605Smrg 423848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 424848b8605Smrg FADD_S( MAT12 ) 425848b8605Smrg FXCH( ST(1) ) /* F5 F4 */ 426848b8605Smrg FADD_S( MAT13 ) 427848b8605Smrg 428848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 429848b8605Smrg FSTP_S( DST0 ) /* F5 */ 430848b8605Smrg FSTP_S( DST1 ) /* */ 431848b8605Smrg 432848b8605SmrgLLBL(x86_p2_2dr_skip): 433848b8605Smrg 434848b8605Smrg ADD_L( CONST(16), EDI ) 435848b8605Smrg ADD_L( EAX, ESI ) 436848b8605Smrg CMP_L( ECX, EDI ) 437848b8605Smrg JNE( LLBL(x86_p2_2dr_loop) ) 438848b8605Smrg 439848b8605SmrgLLBL(x86_p2_2dr_done): 440848b8605Smrg 441848b8605Smrg POP_L( EDI ) 442848b8605Smrg POP_L( ESI ) 443848b8605Smrg RET 444848b8605Smrg#undef FRAME_OFFSET 445848b8605Smrg 446848b8605Smrg 447848b8605Smrg 448848b8605Smrg 449848b8605SmrgALIGNTEXT4 450848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot ) 451848b8605SmrgHIDDEN(_mesa_x86_transform_points2_2d_no_rot) 452848b8605SmrgGLNAME( _mesa_x86_transform_points2_2d_no_rot ): 453848b8605Smrg 454848b8605Smrg#define FRAME_OFFSET 8 455848b8605Smrg PUSH_L( ESI ) 456848b8605Smrg PUSH_L( EDI ) 457848b8605Smrg 458848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 459848b8605Smrg MOV_L( ARG_DEST, EDI ) 460848b8605Smrg 461848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 462848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 463848b8605Smrg 464848b8605Smrg TEST_L( ECX, ECX ) 465848b8605Smrg JZ( LLBL(x86_p2_2dnrr_done) ) 466848b8605Smrg 467848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 468848b8605Smrg OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 469848b8605Smrg 470848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 471848b8605Smrg MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 472848b8605Smrg 473848b8605Smrg SHL_L( CONST(4), ECX ) 474848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 475848b8605Smrg 476848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 477848b8605Smrg ADD_L( EDI, ECX ) 478848b8605Smrg 479848b8605SmrgALIGNTEXT16 480848b8605SmrgLLBL(x86_p2_2dnrr_loop): 481848b8605Smrg 482848b8605Smrg FLD_S( SRC0 ) /* F4 */ 483848b8605Smrg FMUL_S( MAT0 ) 484848b8605Smrg 485848b8605Smrg FLD_S( SRC1 ) /* F1 F4 */ 486848b8605Smrg FMUL_S( MAT5 ) 487848b8605Smrg 488848b8605Smrg FXCH( ST(1) ) /* F4 F1 */ 489848b8605Smrg FADD_S( MAT12 ) 490848b8605Smrg FLD_S( MAT13 ) /* F5 F4 F1 */ 491848b8605Smrg FXCH( ST(2) ) /* F1 F4 F5 */ 492848b8605Smrg FADDP( ST0, ST(2) ) /* F4 F5 */ 493848b8605Smrg 494848b8605Smrg FSTP_S( DST0 ) /* F5 */ 495848b8605Smrg FSTP_S( DST1 ) /* */ 496848b8605Smrg 497848b8605SmrgLLBL(x86_p2_2dnrr_skip): 498848b8605Smrg 499848b8605Smrg ADD_L( CONST(16), EDI ) 500848b8605Smrg ADD_L( EAX, ESI ) 501848b8605Smrg CMP_L( ECX, EDI ) 502848b8605Smrg JNE( LLBL(x86_p2_2dnrr_loop) ) 503848b8605Smrg 504848b8605SmrgLLBL(x86_p2_2dnrr_done): 505848b8605Smrg 506848b8605Smrg POP_L( EDI ) 507848b8605Smrg POP_L( ESI ) 508848b8605Smrg RET 509848b8605Smrg#undef FRAME_OFFSET 510848b8605Smrg 511848b8605Smrg 512848b8605Smrg 513848b8605Smrg 514848b8605SmrgALIGNTEXT16 515848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points2_identity ) 516848b8605SmrgHIDDEN(_mesa_x86_transform_points2_identity) 517848b8605SmrgGLNAME( _mesa_x86_transform_points2_identity ): 518848b8605Smrg 519848b8605Smrg#define FRAME_OFFSET 12 520848b8605Smrg PUSH_L( ESI ) 521848b8605Smrg PUSH_L( EDI ) 522848b8605Smrg PUSH_L( EBX ) 523848b8605Smrg 524848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 525848b8605Smrg MOV_L( ARG_DEST, EDI ) 526848b8605Smrg 527848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 528848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 529848b8605Smrg 530848b8605Smrg TEST_L( ECX, ECX ) 531848b8605Smrg JZ( LLBL(x86_p2_ir_done) ) 532848b8605Smrg 533848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 534848b8605Smrg OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 535848b8605Smrg 536848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 537848b8605Smrg MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 538848b8605Smrg 539848b8605Smrg SHL_L( CONST(4), ECX ) 540848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 541848b8605Smrg 542848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 543848b8605Smrg ADD_L( EDI, ECX ) 544848b8605Smrg 545848b8605Smrg CMP_L( ESI, EDI ) 546848b8605Smrg JE( LLBL(x86_p2_ir_done) ) 547848b8605Smrg 548848b8605SmrgALIGNTEXT16 549848b8605SmrgLLBL(x86_p2_ir_loop): 550848b8605Smrg 551848b8605Smrg MOV_L( SRC0, EBX ) 552848b8605Smrg MOV_L( SRC1, EDX ) 553848b8605Smrg 554848b8605Smrg MOV_L( EBX, DST0 ) 555848b8605Smrg MOV_L( EDX, DST1 ) 556848b8605Smrg 557848b8605SmrgLLBL(x86_p2_ir_skip): 558848b8605Smrg 559848b8605Smrg ADD_L( CONST(16), EDI ) 560848b8605Smrg ADD_L( EAX, ESI ) 561848b8605Smrg CMP_L( ECX, EDI ) 562848b8605Smrg JNE( LLBL(x86_p2_ir_loop) ) 563848b8605Smrg 564848b8605SmrgLLBL(x86_p2_ir_done): 565848b8605Smrg 566848b8605Smrg POP_L( EBX ) 567848b8605Smrg POP_L( EDI ) 568848b8605Smrg POP_L( ESI ) 569848b8605Smrg RET 570848b8605Smrg#undef FRAME_OFFSET 571848b8605Smrg 572848b8605Smrg#if defined (__ELF__) && defined (__linux__) 573848b8605Smrg .section .note.GNU-stack,"",%progbits 574848b8605Smrg#endif 575