1848b8605Smrg 2848b8605Smrg/* 3848b8605Smrg * Mesa 3-D graphics library 4848b8605Smrg * 5848b8605Smrg * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6848b8605Smrg * 7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 8848b8605Smrg * copy of this software and associated documentation files (the "Software"), 9848b8605Smrg * to deal in the Software without restriction, including without limitation 10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 12848b8605Smrg * Software is furnished to do so, subject to the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice shall be included 15848b8605Smrg * in all copies or substantial portions of the Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg */ 25848b8605Smrg 26848b8605Smrg/* 27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29848b8605Smrg * in there will break the build on some platforms. 30848b8605Smrg */ 31848b8605Smrg 32848b8605Smrg#include "assyntax.h" 33848b8605Smrg#include "matypes.h" 34848b8605Smrg#include "xform_args.h" 35848b8605Smrg 36848b8605Smrg SEG_TEXT 37848b8605Smrg 38848b8605Smrg#define FP_ONE 1065353216 39848b8605Smrg#define FP_ZERO 0 40848b8605Smrg 41848b8605Smrg#define SRC0 REGOFF(0, ESI) 42848b8605Smrg#define SRC1 REGOFF(4, ESI) 43848b8605Smrg#define SRC2 REGOFF(8, ESI) 44848b8605Smrg#define SRC3 REGOFF(12, ESI) 45848b8605Smrg#define DST0 REGOFF(0, EDI) 46848b8605Smrg#define DST1 REGOFF(4, EDI) 47848b8605Smrg#define DST2 REGOFF(8, EDI) 48848b8605Smrg#define DST3 REGOFF(12, EDI) 49848b8605Smrg#define MAT0 REGOFF(0, EDX) 50848b8605Smrg#define MAT1 REGOFF(4, EDX) 51848b8605Smrg#define MAT2 REGOFF(8, EDX) 52848b8605Smrg#define MAT3 REGOFF(12, EDX) 53848b8605Smrg#define MAT4 REGOFF(16, EDX) 54848b8605Smrg#define MAT5 REGOFF(20, EDX) 55848b8605Smrg#define MAT6 REGOFF(24, EDX) 56848b8605Smrg#define MAT7 REGOFF(28, EDX) 57848b8605Smrg#define MAT8 REGOFF(32, EDX) 58848b8605Smrg#define MAT9 REGOFF(36, EDX) 59848b8605Smrg#define MAT10 REGOFF(40, EDX) 60848b8605Smrg#define MAT11 REGOFF(44, EDX) 61848b8605Smrg#define MAT12 REGOFF(48, EDX) 62848b8605Smrg#define MAT13 REGOFF(52, EDX) 63848b8605Smrg#define MAT14 REGOFF(56, EDX) 64848b8605Smrg#define MAT15 REGOFF(60, EDX) 65848b8605Smrg 66848b8605Smrg 67848b8605SmrgALIGNTEXT16 68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_general ) 69848b8605SmrgHIDDEN(_mesa_x86_transform_points3_general) 70848b8605SmrgGLNAME( _mesa_x86_transform_points3_general ): 71848b8605Smrg 72848b8605Smrg#define FRAME_OFFSET 8 73848b8605Smrg PUSH_L( ESI ) 74848b8605Smrg PUSH_L( EDI ) 75848b8605Smrg 76848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 77848b8605Smrg MOV_L( ARG_DEST, EDI ) 78848b8605Smrg 79848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 80848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 81848b8605Smrg 82848b8605Smrg TEST_L( ECX, ECX ) 83848b8605Smrg JZ( LLBL(x86_p3_gr_done) ) 84848b8605Smrg 85848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 86848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 87848b8605Smrg 88848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 89848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 90848b8605Smrg 91848b8605Smrg SHL_L( CONST(4), ECX ) 92848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 93848b8605Smrg 94848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 95848b8605Smrg ADD_L( EDI, ECX ) 96848b8605Smrg 97848b8605SmrgALIGNTEXT16 98848b8605SmrgLLBL(x86_p3_gr_loop): 99848b8605Smrg 100848b8605Smrg FLD_S( SRC0 ) /* F4 */ 101848b8605Smrg FMUL_S( MAT0 ) 102848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 103848b8605Smrg FMUL_S( MAT1 ) 104848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 105848b8605Smrg FMUL_S( MAT2 ) 106848b8605Smrg FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 107848b8605Smrg FMUL_S( MAT3 ) 108848b8605Smrg 109848b8605Smrg FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 110848b8605Smrg FMUL_S( MAT4 ) 111848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 112848b8605Smrg FMUL_S( MAT5 ) 113848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 114848b8605Smrg FMUL_S( MAT6 ) 115848b8605Smrg FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 116848b8605Smrg FMUL_S( MAT7 ) 117848b8605Smrg 118848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 119848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 120848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 121848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 122848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 123848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 124848b8605Smrg 125848b8605Smrg FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ 126848b8605Smrg FMUL_S( MAT8 ) 127848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ 128848b8605Smrg FMUL_S( MAT9 ) 129848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ 130848b8605Smrg FMUL_S( MAT10 ) 131848b8605Smrg FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 132848b8605Smrg FMUL_S( MAT11 ) 133848b8605Smrg 134848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 135848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 136848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 137848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 138848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 139848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 140848b8605Smrg 141848b8605Smrg FXCH( ST(3) ) /* F4 F6 F5 F7 */ 142848b8605Smrg FADD_S( MAT12 ) 143848b8605Smrg FXCH( ST(2) ) /* F5 F6 F4 F7 */ 144848b8605Smrg FADD_S( MAT13 ) 145848b8605Smrg FXCH( ST(1) ) /* F6 F5 F4 F7 */ 146848b8605Smrg FADD_S( MAT14 ) 147848b8605Smrg FXCH( ST(3) ) /* F7 F5 F4 F6 */ 148848b8605Smrg FADD_S( MAT15 ) 149848b8605Smrg 150848b8605Smrg FXCH( ST(2) ) /* F4 F5 F7 F6 */ 151848b8605Smrg FSTP_S( DST0 ) /* F5 F7 F6 */ 152848b8605Smrg FSTP_S( DST1 ) /* F7 F6 */ 153848b8605Smrg FXCH( ST(1) ) /* F6 F7 */ 154848b8605Smrg FSTP_S( DST2 ) /* F7 */ 155848b8605Smrg FSTP_S( DST3 ) /* */ 156848b8605Smrg 157848b8605SmrgLLBL(x86_p3_gr_skip): 158848b8605Smrg 159848b8605Smrg ADD_L( CONST(16), EDI ) 160848b8605Smrg ADD_L( EAX, ESI ) 161848b8605Smrg CMP_L( ECX, EDI ) 162848b8605Smrg JNE( LLBL(x86_p3_gr_loop) ) 163848b8605Smrg 164848b8605SmrgLLBL(x86_p3_gr_done): 165848b8605Smrg 166848b8605Smrg POP_L( EDI ) 167848b8605Smrg POP_L( ESI ) 168848b8605Smrg RET 169848b8605Smrg#undef FRAME_OFFSET 170848b8605Smrg 171848b8605Smrg 172848b8605Smrg 173848b8605Smrg 174848b8605SmrgALIGNTEXT16 175848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_perspective ) 176848b8605SmrgHIDDEN(_mesa_x86_transform_points3_perspective) 177848b8605SmrgGLNAME( _mesa_x86_transform_points3_perspective ): 178848b8605Smrg 179848b8605Smrg#define FRAME_OFFSET 12 180848b8605Smrg PUSH_L( ESI ) 181848b8605Smrg PUSH_L( EDI ) 182848b8605Smrg PUSH_L( EBX ) 183848b8605Smrg 184848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 185848b8605Smrg MOV_L( ARG_DEST, EDI ) 186848b8605Smrg 187848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 188848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 189848b8605Smrg 190848b8605Smrg TEST_L( ECX, ECX ) 191848b8605Smrg JZ( LLBL(x86_p3_pr_done) ) 192848b8605Smrg 193848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 194848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 195848b8605Smrg 196848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 197848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 198848b8605Smrg 199848b8605Smrg SHL_L( CONST(4), ECX ) 200848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 201848b8605Smrg 202848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 203848b8605Smrg ADD_L( EDI, ECX ) 204848b8605Smrg 205848b8605SmrgALIGNTEXT16 206848b8605SmrgLLBL(x86_p3_pr_loop): 207848b8605Smrg 208848b8605Smrg FLD_S( SRC0 ) /* F4 */ 209848b8605Smrg FMUL_S( MAT0 ) 210848b8605Smrg 211848b8605Smrg FLD_S( SRC1 ) /* F5 F4 */ 212848b8605Smrg FMUL_S( MAT5 ) 213848b8605Smrg 214848b8605Smrg FLD_S( SRC2 ) /* F0 F5 F4 */ 215848b8605Smrg FMUL_S( MAT8 ) 216848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F5 F4 */ 217848b8605Smrg FMUL_S( MAT9 ) 218848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */ 219848b8605Smrg FMUL_S( MAT10 ) 220848b8605Smrg 221848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ 222848b8605Smrg FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */ 223848b8605Smrg FADDP( ST0, ST(2) ) /* F2 F5 F4 */ 224848b8605Smrg FLD_S( MAT14 ) /* F6 F2 F5 F4 */ 225848b8605Smrg FXCH( ST(1) ) /* F2 F6 F5 F4 */ 226848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 227848b8605Smrg 228848b8605Smrg MOV_L( SRC2, EBX ) 229848b8605Smrg XOR_L( CONST(-2147483648), EBX )/* change sign */ 230848b8605Smrg 231848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 232848b8605Smrg FSTP_S( DST0 ) /* F5 F6 */ 233848b8605Smrg FSTP_S( DST1 ) /* F6 */ 234848b8605Smrg FSTP_S( DST2 ) /* */ 235848b8605Smrg MOV_L( EBX, DST3 ) 236848b8605Smrg 237848b8605SmrgLLBL(x86_p3_pr_skip): 238848b8605Smrg 239848b8605Smrg ADD_L( CONST(16), EDI ) 240848b8605Smrg ADD_L( EAX, ESI ) 241848b8605Smrg CMP_L( ECX, EDI ) 242848b8605Smrg JNE( LLBL(x86_p3_pr_loop) ) 243848b8605Smrg 244848b8605SmrgLLBL(x86_p3_pr_done): 245848b8605Smrg 246848b8605Smrg POP_L( EBX ) 247848b8605Smrg POP_L( EDI ) 248848b8605Smrg POP_L( ESI ) 249848b8605Smrg RET 250848b8605Smrg#undef FRAME_OFFSET 251848b8605Smrg 252848b8605Smrg 253848b8605Smrg 254848b8605Smrg 255848b8605SmrgALIGNTEXT16 256848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_3d ) 257848b8605SmrgHIDDEN(_mesa_x86_transform_points3_3d) 258848b8605SmrgGLNAME( _mesa_x86_transform_points3_3d ): 259848b8605Smrg 260848b8605Smrg#define FRAME_OFFSET 8 261848b8605Smrg PUSH_L( ESI ) 262848b8605Smrg PUSH_L( EDI ) 263848b8605Smrg 264848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 265848b8605Smrg MOV_L( ARG_DEST, EDI ) 266848b8605Smrg 267848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 268848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 269848b8605Smrg 270848b8605Smrg TEST_L( ECX, ECX ) 271848b8605Smrg JZ( LLBL(x86_p3_3dr_done) ) 272848b8605Smrg 273848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 274848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 275848b8605Smrg 276848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 277848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 278848b8605Smrg 279848b8605Smrg SHL_L( CONST(4), ECX ) 280848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 281848b8605Smrg 282848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 283848b8605Smrg ADD_L( EDI, ECX ) 284848b8605Smrg 285848b8605SmrgALIGNTEXT16 286848b8605SmrgLLBL(x86_p3_3dr_loop): 287848b8605Smrg 288848b8605Smrg FLD_S( SRC0 ) /* F4 */ 289848b8605Smrg FMUL_S( MAT0 ) 290848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 291848b8605Smrg FMUL_S( MAT1 ) 292848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 293848b8605Smrg FMUL_S( MAT2 ) 294848b8605Smrg 295848b8605Smrg FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 296848b8605Smrg FMUL_S( MAT4 ) 297848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 298848b8605Smrg FMUL_S( MAT5 ) 299848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 300848b8605Smrg FMUL_S( MAT6 ) 301848b8605Smrg 302848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 303848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 304848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 305848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 306848b8605Smrg 307848b8605Smrg FLD_S( SRC2 ) /* F0 F6 F5 F4 */ 308848b8605Smrg FMUL_S( MAT8 ) 309848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ 310848b8605Smrg FMUL_S( MAT9 ) 311848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ 312848b8605Smrg FMUL_S( MAT10 ) 313848b8605Smrg 314848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 315848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 316848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 317848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 318848b8605Smrg 319848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 320848b8605Smrg FADD_S( MAT12 ) 321848b8605Smrg FXCH( ST(1) ) /* F5 F4 F6 */ 322848b8605Smrg FADD_S( MAT13 ) 323848b8605Smrg FXCH( ST(2) ) /* F6 F4 F5 */ 324848b8605Smrg FADD_S( MAT14 ) 325848b8605Smrg 326848b8605Smrg FXCH( ST(1) ) /* F4 F6 F5 */ 327848b8605Smrg FSTP_S( DST0 ) /* F6 F5 */ 328848b8605Smrg FXCH( ST(1) ) /* F5 F6 */ 329848b8605Smrg FSTP_S( DST1 ) /* F6 */ 330848b8605Smrg FSTP_S( DST2 ) /* */ 331848b8605Smrg 332848b8605SmrgLLBL(x86_p3_3dr_skip): 333848b8605Smrg 334848b8605Smrg ADD_L( CONST(16), EDI ) 335848b8605Smrg ADD_L( EAX, ESI ) 336848b8605Smrg CMP_L( ECX, EDI ) 337848b8605Smrg JNE( LLBL(x86_p3_3dr_loop) ) 338848b8605Smrg 339848b8605SmrgLLBL(x86_p3_3dr_done): 340848b8605Smrg 341848b8605Smrg POP_L( EDI ) 342848b8605Smrg POP_L( ESI ) 343848b8605Smrg RET 344848b8605Smrg#undef FRAME_OFFSET 345848b8605Smrg 346848b8605Smrg 347848b8605Smrg 348848b8605Smrg 349848b8605SmrgALIGNTEXT16 350848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot ) 351848b8605SmrgHIDDEN(_mesa_x86_transform_points3_3d_no_rot) 352848b8605SmrgGLNAME( _mesa_x86_transform_points3_3d_no_rot ): 353848b8605Smrg 354848b8605Smrg#define FRAME_OFFSET 8 355848b8605Smrg PUSH_L( ESI ) 356848b8605Smrg PUSH_L( EDI ) 357848b8605Smrg 358848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 359848b8605Smrg MOV_L( ARG_DEST, EDI ) 360848b8605Smrg 361848b8605Smrg 362848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 363848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 364848b8605Smrg 365848b8605Smrg TEST_L( ECX, ECX ) 366848b8605Smrg JZ( LLBL(x86_p3_3dnrr_done) ) 367848b8605Smrg 368848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 369848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 370848b8605Smrg 371848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 372848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 373848b8605Smrg 374848b8605Smrg SHL_L( CONST(4), ECX ) 375848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 376848b8605Smrg 377848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 378848b8605Smrg ADD_L( EDI, ECX ) 379848b8605Smrg 380848b8605SmrgALIGNTEXT16 381848b8605SmrgLLBL(x86_p3_3dnrr_loop): 382848b8605Smrg 383848b8605Smrg FLD_S( SRC0 ) /* F4 */ 384848b8605Smrg FMUL_S( MAT0 ) 385848b8605Smrg 386848b8605Smrg FLD_S( SRC1 ) /* F1 F4 */ 387848b8605Smrg FMUL_S( MAT5 ) 388848b8605Smrg 389848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F4 */ 390848b8605Smrg FMUL_S( MAT10 ) 391848b8605Smrg 392848b8605Smrg FXCH( ST(2) ) /* F4 F1 F2 */ 393848b8605Smrg FADD_S( MAT12 ) 394848b8605Smrg FLD_S( MAT13 ) /* F5 F4 F1 F2 */ 395848b8605Smrg FXCH( ST(2) ) /* F1 F4 F5 F2 */ 396848b8605Smrg FADDP( ST0, ST(2) ) /* F4 F5 F2 */ 397848b8605Smrg FLD_S( MAT14 ) /* F6 F4 F5 F2 */ 398848b8605Smrg FXCH( ST(3) ) /* F2 F4 F5 F6 */ 399848b8605Smrg FADDP( ST0, ST(3) ) /* F4 F5 F6 */ 400848b8605Smrg 401848b8605Smrg FSTP_S( DST0 ) /* F5 F6 */ 402848b8605Smrg FSTP_S( DST1 ) /* F6 */ 403848b8605Smrg FSTP_S( DST2 ) /* */ 404848b8605Smrg 405848b8605SmrgLLBL(x86_p3_3dnrr_skip): 406848b8605Smrg 407848b8605Smrg ADD_L( CONST(16), EDI ) 408848b8605Smrg ADD_L( EAX, ESI ) 409848b8605Smrg CMP_L( ECX, EDI ) 410848b8605Smrg JNE( LLBL(x86_p3_3dnrr_loop) ) 411848b8605Smrg 412848b8605SmrgLLBL(x86_p3_3dnrr_done): 413848b8605Smrg 414848b8605Smrg POP_L( EDI ) 415848b8605Smrg POP_L( ESI ) 416848b8605Smrg RET 417848b8605Smrg#undef FRAME_OFFSET 418848b8605Smrg 419848b8605Smrg 420848b8605Smrg 421848b8605Smrg 422848b8605SmrgALIGNTEXT16 423848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_2d ) 424848b8605SmrgHIDDEN(_mesa_x86_transform_points3_2d) 425848b8605SmrgGLNAME( _mesa_x86_transform_points3_2d ): 426848b8605Smrg 427848b8605Smrg#define FRAME_OFFSET 12 428848b8605Smrg PUSH_L( ESI ) 429848b8605Smrg PUSH_L( EDI ) 430848b8605Smrg PUSH_L( EBX ) 431848b8605Smrg 432848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 433848b8605Smrg MOV_L( ARG_DEST, EDI ) 434848b8605Smrg 435848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 436848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 437848b8605Smrg 438848b8605Smrg TEST_L( ECX, ECX ) 439848b8605Smrg JZ( LLBL(x86_p3_2dr_done) ) 440848b8605Smrg 441848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 442848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 443848b8605Smrg 444848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 445848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 446848b8605Smrg 447848b8605Smrg SHL_L( CONST(4), ECX ) 448848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 449848b8605Smrg 450848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 451848b8605Smrg ADD_L( EDI, ECX ) 452848b8605Smrg 453848b8605SmrgALIGNTEXT16 454848b8605SmrgLLBL(x86_p3_2dr_loop): 455848b8605Smrg 456848b8605Smrg FLD_S( SRC0 ) /* F4 */ 457848b8605Smrg FMUL_S( MAT0 ) 458848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 459848b8605Smrg FMUL_S( MAT1 ) 460848b8605Smrg 461848b8605Smrg FLD_S( SRC1 ) /* F0 F5 F4 */ 462848b8605Smrg FMUL_S( MAT4 ) 463848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 464848b8605Smrg FMUL_S( MAT5 ) 465848b8605Smrg 466848b8605Smrg FXCH( ST(1) ) /* F0 F1 F5 F4 */ 467848b8605Smrg FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 468848b8605Smrg FADDP( ST0, ST(1) ) /* F5 F4 */ 469848b8605Smrg 470848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 471848b8605Smrg FADD_S( MAT12 ) 472848b8605Smrg FXCH( ST(1) ) /* F5 F4 */ 473848b8605Smrg FADD_S( MAT13 ) 474848b8605Smrg 475848b8605Smrg MOV_L( SRC2, EBX ) 476848b8605Smrg 477848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 478848b8605Smrg FSTP_S( DST0 ) /* F5 */ 479848b8605Smrg FSTP_S( DST1 ) /* */ 480848b8605Smrg MOV_L( EBX, DST2 ) 481848b8605Smrg 482848b8605SmrgLLBL(x86_p3_2dr_skip): 483848b8605Smrg 484848b8605Smrg ADD_L( CONST(16), EDI ) 485848b8605Smrg ADD_L( EAX, ESI ) 486848b8605Smrg CMP_L( ECX, EDI ) 487848b8605Smrg JNE( LLBL(x86_p3_2dr_loop) ) 488848b8605Smrg 489848b8605SmrgLLBL(x86_p3_2dr_done): 490848b8605Smrg 491848b8605Smrg POP_L( EBX ) 492848b8605Smrg POP_L( EDI ) 493848b8605Smrg POP_L( ESI ) 494848b8605Smrg RET 495848b8605Smrg#undef FRAME_OFFSET 496848b8605Smrg 497848b8605Smrg 498848b8605Smrg 499848b8605Smrg 500848b8605SmrgALIGNTEXT16 501848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot ) 502848b8605SmrgHIDDEN(_mesa_x86_transform_points3_2d_no_rot) 503848b8605SmrgGLNAME( _mesa_x86_transform_points3_2d_no_rot ): 504848b8605Smrg 505848b8605Smrg#define FRAME_OFFSET 12 506848b8605Smrg PUSH_L( ESI ) 507848b8605Smrg PUSH_L( EDI ) 508848b8605Smrg PUSH_L( EBX ) 509848b8605Smrg 510848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 511848b8605Smrg MOV_L( ARG_DEST, EDI ) 512848b8605Smrg 513848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 514848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 515848b8605Smrg 516848b8605Smrg TEST_L( ECX, ECX ) 517848b8605Smrg JZ( LLBL(x86_p3_2dnrr_done) ) 518848b8605Smrg 519848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 520848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 521848b8605Smrg 522848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 523848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 524848b8605Smrg 525848b8605Smrg SHL_L( CONST(4), ECX ) 526848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 527848b8605Smrg 528848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 529848b8605Smrg ADD_L( EDI, ECX ) 530848b8605Smrg 531848b8605SmrgALIGNTEXT16 532848b8605SmrgLLBL(x86_p3_2dnrr_loop): 533848b8605Smrg 534848b8605Smrg FLD_S( SRC0 ) /* F4 */ 535848b8605Smrg FMUL_S( MAT0 ) 536848b8605Smrg 537848b8605Smrg FLD_S( SRC1 ) /* F1 F4 */ 538848b8605Smrg FMUL_S( MAT5 ) 539848b8605Smrg 540848b8605Smrg FXCH( ST(1) ) /* F4 F1 */ 541848b8605Smrg FADD_S( MAT12 ) 542848b8605Smrg FLD_S( MAT13 ) /* F5 F4 F1 */ 543848b8605Smrg 544848b8605Smrg FXCH( ST(2) ) /* F1 F4 F5 */ 545848b8605Smrg FADDP( ST0, ST(2) ) /* F4 F5 */ 546848b8605Smrg 547848b8605Smrg MOV_L( SRC2, EBX ) 548848b8605Smrg 549848b8605Smrg FSTP_S( DST0 ) /* F5 */ 550848b8605Smrg FSTP_S( DST1 ) /* */ 551848b8605Smrg MOV_L( EBX, DST2 ) 552848b8605Smrg 553848b8605SmrgLLBL(x86_p3_2dnrr_skip): 554848b8605Smrg 555848b8605Smrg ADD_L( CONST(16), EDI ) 556848b8605Smrg ADD_L( EAX, ESI ) 557848b8605Smrg CMP_L( ECX, EDI ) 558848b8605Smrg JNE( LLBL(x86_p3_2dnrr_loop) ) 559848b8605Smrg 560848b8605SmrgLLBL(x86_p3_2dnrr_done): 561848b8605Smrg 562848b8605Smrg POP_L( EBX ) 563848b8605Smrg POP_L( EDI ) 564848b8605Smrg POP_L( ESI ) 565848b8605Smrg RET 566848b8605Smrg#undef FRAME_OFFSET 567848b8605Smrg 568848b8605Smrg 569848b8605Smrg 570848b8605Smrg 571848b8605SmrgALIGNTEXT16 572848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points3_identity ) 573848b8605SmrgHIDDEN(_mesa_x86_transform_points3_identity) 574848b8605SmrgGLNAME(_mesa_x86_transform_points3_identity ): 575848b8605Smrg 576848b8605Smrg#define FRAME_OFFSET 16 577848b8605Smrg PUSH_L( ESI ) 578848b8605Smrg PUSH_L( EDI ) 579848b8605Smrg PUSH_L( EBX ) 580848b8605Smrg PUSH_L( EBP ) 581848b8605Smrg 582848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 583848b8605Smrg MOV_L( ARG_DEST, EDI ) 584848b8605Smrg 585848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 586848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 587848b8605Smrg 588848b8605Smrg TEST_L( ECX, ECX ) 589848b8605Smrg JZ( LLBL(x86_p3_ir_done) ) 590848b8605Smrg 591848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 592848b8605Smrg OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 593848b8605Smrg 594848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 595848b8605Smrg MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 596848b8605Smrg 597848b8605Smrg SHL_L( CONST(4), ECX ) 598848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 599848b8605Smrg 600848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 601848b8605Smrg ADD_L( EDI, ECX ) 602848b8605Smrg 603848b8605Smrg CMP_L( ESI, EDI ) 604848b8605Smrg JE( LLBL(x86_p3_ir_done) ) 605848b8605Smrg 606848b8605SmrgALIGNTEXT16 607848b8605SmrgLLBL(x86_p3_ir_loop): 608848b8605Smrg 609848b8605Smrg#if 1 610848b8605Smrg MOV_L( SRC0, EBX ) 611848b8605Smrg MOV_L( SRC1, EBP ) 612848b8605Smrg MOV_L( SRC2, EDX ) 613848b8605Smrg 614848b8605Smrg MOV_L( EBX, DST0 ) 615848b8605Smrg MOV_L( EBP, DST1 ) 616848b8605Smrg MOV_L( EDX, DST2 ) 617848b8605Smrg#else 618848b8605Smrg FLD_S( SRC0 ) 619848b8605Smrg FLD_S( SRC1 ) 620848b8605Smrg FLD_S( SRC2 ) 621848b8605Smrg 622848b8605Smrg FSTP_S( DST2 ) 623848b8605Smrg FSTP_S( DST1 ) 624848b8605Smrg FSTP_S( DST0 ) 625848b8605Smrg#endif 626848b8605Smrg 627848b8605SmrgLLBL(x86_p3_ir_skip): 628848b8605Smrg 629848b8605Smrg ADD_L( CONST(16), EDI ) 630848b8605Smrg ADD_L( EAX, ESI ) 631848b8605Smrg CMP_L( ECX, EDI ) 632848b8605Smrg JNE( LLBL(x86_p3_ir_loop) ) 633848b8605Smrg 634848b8605SmrgLLBL(x86_p3_ir_done): 635848b8605Smrg 636848b8605Smrg POP_L( EBP ) 637848b8605Smrg POP_L( EBX ) 638848b8605Smrg POP_L( EDI ) 639848b8605Smrg POP_L( ESI ) 640848b8605Smrg RET 641848b8605Smrg 642848b8605Smrg#if defined (__ELF__) && defined (__linux__) 643848b8605Smrg .section .note.GNU-stack,"",%progbits 644848b8605Smrg#endif 645