1848b8605Smrg 2848b8605Smrg/* 3848b8605Smrg * Mesa 3-D graphics library 4848b8605Smrg * 5848b8605Smrg * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. 6848b8605Smrg * 7848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 8848b8605Smrg * copy of this software and associated documentation files (the "Software"), 9848b8605Smrg * to deal in the Software without restriction, including without limitation 10848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 12848b8605Smrg * Software is furnished to do so, subject to the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice shall be included 15848b8605Smrg * in all copies or substantial portions of the Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg */ 25848b8605Smrg 26848b8605Smrg/* 27848b8605Smrg * NOTE: Avoid using spaces in between '(' ')' and arguments, especially 28848b8605Smrg * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces 29848b8605Smrg * in there will break the build on some platforms. 30848b8605Smrg */ 31848b8605Smrg 32848b8605Smrg#include "assyntax.h" 33848b8605Smrg#include "matypes.h" 34848b8605Smrg#include "xform_args.h" 35848b8605Smrg 36848b8605Smrg SEG_TEXT 37848b8605Smrg 38848b8605Smrg#define FP_ONE 1065353216 39848b8605Smrg#define FP_ZERO 0 40848b8605Smrg 41848b8605Smrg#define SRC0 REGOFF(0, ESI) 42848b8605Smrg#define SRC1 REGOFF(4, ESI) 43848b8605Smrg#define SRC2 REGOFF(8, ESI) 44848b8605Smrg#define SRC3 REGOFF(12, ESI) 45848b8605Smrg#define DST0 REGOFF(0, EDI) 46848b8605Smrg#define DST1 REGOFF(4, EDI) 47848b8605Smrg#define DST2 REGOFF(8, EDI) 48848b8605Smrg#define DST3 REGOFF(12, EDI) 49848b8605Smrg#define MAT0 REGOFF(0, EDX) 50848b8605Smrg#define MAT1 REGOFF(4, EDX) 51848b8605Smrg#define MAT2 REGOFF(8, EDX) 52848b8605Smrg#define MAT3 REGOFF(12, EDX) 53848b8605Smrg#define MAT4 REGOFF(16, EDX) 54848b8605Smrg#define MAT5 REGOFF(20, EDX) 55848b8605Smrg#define MAT6 REGOFF(24, EDX) 56848b8605Smrg#define MAT7 REGOFF(28, EDX) 57848b8605Smrg#define MAT8 REGOFF(32, EDX) 58848b8605Smrg#define MAT9 REGOFF(36, EDX) 59848b8605Smrg#define MAT10 REGOFF(40, EDX) 60848b8605Smrg#define MAT11 REGOFF(44, EDX) 61848b8605Smrg#define MAT12 REGOFF(48, EDX) 62848b8605Smrg#define MAT13 REGOFF(52, EDX) 63848b8605Smrg#define MAT14 REGOFF(56, EDX) 64848b8605Smrg#define MAT15 REGOFF(60, EDX) 65848b8605Smrg 66848b8605Smrg 67848b8605SmrgALIGNTEXT16 68848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_general ) 69848b8605SmrgHIDDEN(_mesa_x86_transform_points4_general) 70848b8605SmrgGLNAME( _mesa_x86_transform_points4_general ): 71848b8605Smrg 72848b8605Smrg#define FRAME_OFFSET 8 73848b8605Smrg PUSH_L( ESI ) 74848b8605Smrg PUSH_L( EDI ) 75848b8605Smrg 76848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 77848b8605Smrg MOV_L( ARG_DEST, EDI ) 78848b8605Smrg 79848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 80848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 81848b8605Smrg 82848b8605Smrg TEST_L( ECX, ECX ) 83848b8605Smrg JZ( LLBL(x86_p4_gr_done) ) 84848b8605Smrg 85848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 86848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 87848b8605Smrg 88848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 89848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 90848b8605Smrg 91848b8605Smrg SHL_L( CONST(4), ECX ) 92848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 93848b8605Smrg 94848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 95848b8605Smrg ADD_L( EDI, ECX ) 96848b8605Smrg 97848b8605SmrgALIGNTEXT16 98848b8605SmrgLLBL(x86_p4_gr_loop): 99848b8605Smrg 100848b8605Smrg FLD_S( SRC0 ) /* F4 */ 101848b8605Smrg FMUL_S( MAT0 ) 102848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 103848b8605Smrg FMUL_S( MAT1 ) 104848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 105848b8605Smrg FMUL_S( MAT2 ) 106848b8605Smrg FLD_S( SRC0 ) /* F7 F6 F5 F4 */ 107848b8605Smrg FMUL_S( MAT3 ) 108848b8605Smrg 109848b8605Smrg FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ 110848b8605Smrg FMUL_S( MAT4 ) 111848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ 112848b8605Smrg FMUL_S( MAT5 ) 113848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ 114848b8605Smrg FMUL_S( MAT6 ) 115848b8605Smrg FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 116848b8605Smrg FMUL_S( MAT7 ) 117848b8605Smrg 118848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 119848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 120848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 121848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 122848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 123848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 124848b8605Smrg 125848b8605Smrg FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ 126848b8605Smrg FMUL_S( MAT8 ) 127848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ 128848b8605Smrg FMUL_S( MAT9 ) 129848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ 130848b8605Smrg FMUL_S( MAT10 ) 131848b8605Smrg FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 132848b8605Smrg FMUL_S( MAT11 ) 133848b8605Smrg 134848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 135848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 136848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 137848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 138848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 139848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 140848b8605Smrg 141848b8605Smrg FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */ 142848b8605Smrg FMUL_S( MAT12 ) 143848b8605Smrg FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */ 144848b8605Smrg FMUL_S( MAT13 ) 145848b8605Smrg FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */ 146848b8605Smrg FMUL_S( MAT14 ) 147848b8605Smrg FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ 148848b8605Smrg FMUL_S( MAT15 ) 149848b8605Smrg 150848b8605Smrg FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ 151848b8605Smrg FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ 152848b8605Smrg FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ 153848b8605Smrg FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ 154848b8605Smrg FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ 155848b8605Smrg FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ 156848b8605Smrg 157848b8605Smrg FXCH( ST(3) ) /* F4 F6 F5 F7 */ 158848b8605Smrg FSTP_S( DST0 ) /* F6 F5 F7 */ 159848b8605Smrg FXCH( ST(1) ) /* F5 F6 F7 */ 160848b8605Smrg FSTP_S( DST1 ) /* F6 F7 */ 161848b8605Smrg FSTP_S( DST2 ) /* F7 */ 162848b8605Smrg FSTP_S( DST3 ) /* */ 163848b8605Smrg 164848b8605SmrgLLBL(x86_p4_gr_skip): 165848b8605Smrg 166848b8605Smrg ADD_L( CONST(16), EDI ) 167848b8605Smrg ADD_L( EAX, ESI ) 168848b8605Smrg CMP_L( ECX, EDI ) 169848b8605Smrg JNE( LLBL(x86_p4_gr_loop) ) 170848b8605Smrg 171848b8605SmrgLLBL(x86_p4_gr_done): 172848b8605Smrg 173848b8605Smrg POP_L( EDI ) 174848b8605Smrg POP_L( ESI ) 175848b8605Smrg RET 176848b8605Smrg#undef FRAME_OFFSET 177848b8605Smrg 178848b8605Smrg 179848b8605Smrg 180848b8605Smrg 181848b8605SmrgALIGNTEXT16 182848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_perspective ) 183848b8605SmrgHIDDEN(_mesa_x86_transform_points4_perspective) 184848b8605SmrgGLNAME( _mesa_x86_transform_points4_perspective ): 185848b8605Smrg 186848b8605Smrg#define FRAME_OFFSET 12 187848b8605Smrg PUSH_L( ESI ) 188848b8605Smrg PUSH_L( EDI ) 189848b8605Smrg PUSH_L( EBX ) 190848b8605Smrg 191848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 192848b8605Smrg MOV_L( ARG_DEST, EDI ) 193848b8605Smrg 194848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 195848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 196848b8605Smrg 197848b8605Smrg TEST_L( ECX, ECX ) 198848b8605Smrg JZ( LLBL(x86_p4_pr_done) ) 199848b8605Smrg 200848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 201848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 202848b8605Smrg 203848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 204848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 205848b8605Smrg 206848b8605Smrg SHL_L( CONST(4), ECX ) 207848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 208848b8605Smrg 209848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 210848b8605Smrg ADD_L( EDI, ECX ) 211848b8605Smrg 212848b8605SmrgALIGNTEXT16 213848b8605SmrgLLBL(x86_p4_pr_loop): 214848b8605Smrg 215848b8605Smrg FLD_S( SRC0 ) /* F4 */ 216848b8605Smrg FMUL_S( MAT0 ) 217848b8605Smrg 218848b8605Smrg FLD_S( SRC1 ) /* F5 F4 */ 219848b8605Smrg FMUL_S( MAT5 ) 220848b8605Smrg 221848b8605Smrg FLD_S( SRC2 ) /* F0 F5 F4 */ 222848b8605Smrg FMUL_S( MAT8 ) 223848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F5 F4 */ 224848b8605Smrg FMUL_S( MAT9 ) 225848b8605Smrg FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */ 226848b8605Smrg FMUL_S( MAT10 ) 227848b8605Smrg 228848b8605Smrg FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ 229848b8605Smrg FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */ 230848b8605Smrg FADDP( ST0, ST(2) ) /* F6 F5 F4 */ 231848b8605Smrg 232848b8605Smrg FLD_S( SRC3 ) /* F2 F6 F5 F4 */ 233848b8605Smrg FMUL_S( MAT14 ) 234848b8605Smrg 235848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 236848b8605Smrg 237848b8605Smrg MOV_L( SRC2, EBX ) 238848b8605Smrg XOR_L( CONST(-2147483648), EBX )/* change sign */ 239848b8605Smrg 240848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 241848b8605Smrg FSTP_S( DST0 ) /* F5 F6 */ 242848b8605Smrg FSTP_S( DST1 ) /* F6 */ 243848b8605Smrg FSTP_S( DST2 ) /* */ 244848b8605Smrg MOV_L( EBX, DST3 ) 245848b8605Smrg 246848b8605SmrgLLBL(x86_p4_pr_skip): 247848b8605Smrg 248848b8605Smrg ADD_L( CONST(16), EDI ) 249848b8605Smrg ADD_L( EAX, ESI ) 250848b8605Smrg CMP_L( ECX, EDI ) 251848b8605Smrg JNE( LLBL(x86_p4_pr_loop) ) 252848b8605Smrg 253848b8605SmrgLLBL(x86_p4_pr_done): 254848b8605Smrg 255848b8605Smrg POP_L( EBX ) 256848b8605Smrg POP_L( EDI ) 257848b8605Smrg POP_L( ESI ) 258848b8605Smrg RET 259848b8605Smrg#undef FRAME_OFFSET 260848b8605Smrg 261848b8605Smrg 262848b8605Smrg 263848b8605Smrg 264848b8605SmrgALIGNTEXT16 265848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_3d ) 266848b8605SmrgHIDDEN(_mesa_x86_transform_points4_3d) 267848b8605SmrgGLNAME( _mesa_x86_transform_points4_3d ): 268848b8605Smrg 269848b8605Smrg#define FRAME_OFFSET 12 270848b8605Smrg PUSH_L( ESI ) 271848b8605Smrg PUSH_L( EDI ) 272848b8605Smrg PUSH_L( EBX ) 273848b8605Smrg 274848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 275848b8605Smrg MOV_L( ARG_DEST, EDI ) 276848b8605Smrg 277848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 278848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 279848b8605Smrg 280848b8605Smrg TEST_L( ECX, ECX ) 281848b8605Smrg JZ( LLBL(x86_p4_3dr_done) ) 282848b8605Smrg 283848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 284848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 285848b8605Smrg 286848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 287848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 288848b8605Smrg 289848b8605Smrg SHL_L( CONST(4), ECX ) 290848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 291848b8605Smrg 292848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 293848b8605Smrg ADD_L( EDI, ECX ) 294848b8605Smrg 295848b8605SmrgALIGNTEXT16 296848b8605SmrgLLBL(x86_p4_3dr_loop): 297848b8605Smrg 298848b8605Smrg FLD_S( SRC0 ) /* F4 */ 299848b8605Smrg FMUL_S( MAT0 ) 300848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 301848b8605Smrg FMUL_S( MAT1 ) 302848b8605Smrg FLD_S( SRC0 ) /* F6 F5 F4 */ 303848b8605Smrg FMUL_S( MAT2 ) 304848b8605Smrg 305848b8605Smrg FLD_S( SRC1 ) /* F0 F6 F5 F4 */ 306848b8605Smrg FMUL_S( MAT4 ) 307848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ 308848b8605Smrg FMUL_S( MAT5 ) 309848b8605Smrg FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ 310848b8605Smrg FMUL_S( MAT6 ) 311848b8605Smrg 312848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 313848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 314848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 315848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 316848b8605Smrg 317848b8605Smrg FLD_S( SRC2 ) /* F0 F6 F5 F4 */ 318848b8605Smrg FMUL_S( MAT8 ) 319848b8605Smrg FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ 320848b8605Smrg FMUL_S( MAT9 ) 321848b8605Smrg FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ 322848b8605Smrg FMUL_S( MAT10 ) 323848b8605Smrg 324848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 325848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 326848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 327848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 328848b8605Smrg 329848b8605Smrg FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 330848b8605Smrg FMUL_S( MAT12 ) 331848b8605Smrg FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 332848b8605Smrg FMUL_S( MAT13 ) 333848b8605Smrg FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 334848b8605Smrg FMUL_S( MAT14 ) 335848b8605Smrg 336848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 337848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 338848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 339848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 340848b8605Smrg 341848b8605Smrg MOV_L( SRC3, EBX ) 342848b8605Smrg 343848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 344848b8605Smrg FSTP_S( DST0 ) /* F5 F6 */ 345848b8605Smrg FSTP_S( DST1 ) /* F6 */ 346848b8605Smrg FSTP_S( DST2 ) /* */ 347848b8605Smrg MOV_L( EBX, DST3 ) 348848b8605Smrg 349848b8605SmrgLLBL(x86_p4_3dr_skip): 350848b8605Smrg 351848b8605Smrg ADD_L( CONST(16), EDI ) 352848b8605Smrg ADD_L( EAX, ESI ) 353848b8605Smrg CMP_L( ECX, EDI ) 354848b8605Smrg JNE( LLBL(x86_p4_3dr_loop) ) 355848b8605Smrg 356848b8605SmrgLLBL(x86_p4_3dr_done): 357848b8605Smrg 358848b8605Smrg POP_L( EBX ) 359848b8605Smrg POP_L( EDI ) 360848b8605Smrg POP_L( ESI ) 361848b8605Smrg RET 362848b8605Smrg#undef FRAME_OFFSET 363848b8605Smrg 364848b8605Smrg 365848b8605Smrg 366848b8605Smrg 367848b8605SmrgALIGNTEXT16 368848b8605SmrgGLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot) 369848b8605SmrgHIDDEN(_mesa_x86_transform_points4_3d_no_rot) 370848b8605SmrgGLNAME(_mesa_x86_transform_points4_3d_no_rot): 371848b8605Smrg 372848b8605Smrg#define FRAME_OFFSET 12 373848b8605Smrg PUSH_L( ESI ) 374848b8605Smrg PUSH_L( EDI ) 375848b8605Smrg PUSH_L( EBX ) 376848b8605Smrg 377848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 378848b8605Smrg MOV_L( ARG_DEST, EDI ) 379848b8605Smrg 380848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 381848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 382848b8605Smrg 383848b8605Smrg TEST_L( ECX, ECX ) 384848b8605Smrg JZ( LLBL(x86_p4_3dnrr_done) ) 385848b8605Smrg 386848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 387848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 388848b8605Smrg 389848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 390848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 391848b8605Smrg 392848b8605Smrg SHL_L( CONST(4), ECX ) 393848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 394848b8605Smrg 395848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 396848b8605Smrg ADD_L( EDI, ECX ) 397848b8605Smrg 398848b8605SmrgALIGNTEXT16 399848b8605SmrgLLBL(x86_p4_3dnrr_loop): 400848b8605Smrg 401848b8605Smrg FLD_S( SRC0 ) /* F4 */ 402848b8605Smrg FMUL_S( MAT0 ) 403848b8605Smrg 404848b8605Smrg FLD_S( SRC1 ) /* F5 F4 */ 405848b8605Smrg FMUL_S( MAT5 ) 406848b8605Smrg 407848b8605Smrg FLD_S( SRC2 ) /* F6 F5 F4 */ 408848b8605Smrg FMUL_S( MAT10 ) 409848b8605Smrg 410848b8605Smrg FLD_S( SRC3 ) /* F0 F6 F5 F4 */ 411848b8605Smrg FMUL_S( MAT12 ) 412848b8605Smrg FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ 413848b8605Smrg FMUL_S( MAT13 ) 414848b8605Smrg FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ 415848b8605Smrg FMUL_S( MAT14 ) 416848b8605Smrg 417848b8605Smrg FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ 418848b8605Smrg FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ 419848b8605Smrg FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ 420848b8605Smrg FADDP( ST0, ST(1) ) /* F6 F5 F4 */ 421848b8605Smrg 422848b8605Smrg MOV_L( SRC3, EBX ) 423848b8605Smrg 424848b8605Smrg FXCH( ST(2) ) /* F4 F5 F6 */ 425848b8605Smrg FSTP_S( DST0 ) /* F5 F6 */ 426848b8605Smrg FSTP_S( DST1 ) /* F6 */ 427848b8605Smrg FSTP_S( DST2 ) /* */ 428848b8605Smrg MOV_L( EBX, DST3 ) 429848b8605Smrg 430848b8605SmrgLLBL(x86_p4_3dnrr_skip): 431848b8605Smrg 432848b8605Smrg ADD_L( CONST(16), EDI ) 433848b8605Smrg ADD_L( EAX, ESI ) 434848b8605Smrg CMP_L( ECX, EDI ) 435848b8605Smrg JNE( LLBL(x86_p4_3dnrr_loop) ) 436848b8605Smrg 437848b8605SmrgLLBL(x86_p4_3dnrr_done): 438848b8605Smrg 439848b8605Smrg POP_L( EBX ) 440848b8605Smrg POP_L( EDI ) 441848b8605Smrg POP_L( ESI ) 442848b8605Smrg RET 443848b8605Smrg#undef FRAME_OFFSET 444848b8605Smrg 445848b8605Smrg 446848b8605Smrg 447848b8605Smrg 448848b8605SmrgALIGNTEXT16 449848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_2d ) 450848b8605SmrgHIDDEN(_mesa_x86_transform_points4_2d) 451848b8605SmrgGLNAME( _mesa_x86_transform_points4_2d ): 452848b8605Smrg 453848b8605Smrg#define FRAME_OFFSET 16 454848b8605Smrg PUSH_L( ESI ) 455848b8605Smrg PUSH_L( EDI ) 456848b8605Smrg PUSH_L( EBX ) 457848b8605Smrg PUSH_L( EBP ) 458848b8605Smrg 459848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 460848b8605Smrg MOV_L( ARG_DEST, EDI ) 461848b8605Smrg 462848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 463848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 464848b8605Smrg 465848b8605Smrg TEST_L( ECX, ECX ) 466848b8605Smrg JZ( LLBL(x86_p4_2dr_done) ) 467848b8605Smrg 468848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 469848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 470848b8605Smrg 471848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 472848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 473848b8605Smrg 474848b8605Smrg SHL_L( CONST(4), ECX ) 475848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 476848b8605Smrg 477848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 478848b8605Smrg ADD_L( EDI, ECX ) 479848b8605Smrg 480848b8605SmrgALIGNTEXT16 481848b8605SmrgLLBL(x86_p4_2dr_loop): 482848b8605Smrg 483848b8605Smrg FLD_S( SRC0 ) /* F4 */ 484848b8605Smrg FMUL_S( MAT0 ) 485848b8605Smrg FLD_S( SRC0 ) /* F5 F4 */ 486848b8605Smrg FMUL_S( MAT1 ) 487848b8605Smrg 488848b8605Smrg FLD_S( SRC1 ) /* F0 F5 F4 */ 489848b8605Smrg FMUL_S( MAT4 ) 490848b8605Smrg FLD_S( SRC1 ) /* F1 F0 F5 F4 */ 491848b8605Smrg FMUL_S( MAT5 ) 492848b8605Smrg 493848b8605Smrg FXCH( ST(1) ) /* F0 F1 F5 F4 */ 494848b8605Smrg FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 495848b8605Smrg FADDP( ST0, ST(1) ) /* F5 F4 */ 496848b8605Smrg 497848b8605Smrg FLD_S( SRC3 ) /* F0 F5 F4 */ 498848b8605Smrg FMUL_S( MAT12 ) 499848b8605Smrg FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 500848b8605Smrg FMUL_S( MAT13 ) 501848b8605Smrg 502848b8605Smrg FXCH( ST(1) ) /* F0 F1 F5 F4 */ 503848b8605Smrg FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 504848b8605Smrg FADDP( ST0, ST(1) ) /* F5 F4 */ 505848b8605Smrg 506848b8605Smrg MOV_L( SRC2, EBX ) 507848b8605Smrg MOV_L( SRC3, EBP ) 508848b8605Smrg 509848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 510848b8605Smrg FSTP_S( DST0 ) /* F5 */ 511848b8605Smrg FSTP_S( DST1 ) /* */ 512848b8605Smrg MOV_L( EBX, DST2 ) 513848b8605Smrg MOV_L( EBP, DST3 ) 514848b8605Smrg 515848b8605SmrgLLBL(x86_p4_2dr_skip): 516848b8605Smrg 517848b8605Smrg ADD_L( CONST(16), EDI ) 518848b8605Smrg ADD_L( EAX, ESI ) 519848b8605Smrg CMP_L( ECX, EDI ) 520848b8605Smrg JNE( LLBL(x86_p4_2dr_loop) ) 521848b8605Smrg 522848b8605SmrgLLBL(x86_p4_2dr_done): 523848b8605Smrg 524848b8605Smrg POP_L( EBP ) 525848b8605Smrg POP_L( EBX ) 526848b8605Smrg POP_L( EDI ) 527848b8605Smrg POP_L( ESI ) 528848b8605Smrg RET 529848b8605Smrg#undef FRAME_OFFSET 530848b8605Smrg 531848b8605Smrg 532848b8605Smrg 533848b8605Smrg 534848b8605SmrgALIGNTEXT16 535848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot ) 536848b8605SmrgHIDDEN(_mesa_x86_transform_points4_2d_no_rot) 537848b8605SmrgGLNAME( _mesa_x86_transform_points4_2d_no_rot ): 538848b8605Smrg 539848b8605Smrg#define FRAME_OFFSET 16 540848b8605Smrg PUSH_L( ESI ) 541848b8605Smrg PUSH_L( EDI ) 542848b8605Smrg PUSH_L( EBX ) 543848b8605Smrg PUSH_L( EBP ) 544848b8605Smrg 545848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 546848b8605Smrg MOV_L( ARG_DEST, EDI ) 547848b8605Smrg 548848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 549848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 550848b8605Smrg 551848b8605Smrg TEST_L( ECX, ECX ) 552848b8605Smrg JZ( LLBL(x86_p4_2dnrr_done) ) 553848b8605Smrg 554848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 555848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 556848b8605Smrg 557848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 558848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 559848b8605Smrg 560848b8605Smrg SHL_L( CONST(4), ECX ) 561848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 562848b8605Smrg 563848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 564848b8605Smrg ADD_L( EDI, ECX ) 565848b8605Smrg 566848b8605SmrgALIGNTEXT16 567848b8605SmrgLLBL(x86_p4_2dnrr_loop): 568848b8605Smrg 569848b8605Smrg FLD_S( SRC0 ) /* F4 */ 570848b8605Smrg FMUL_S( MAT0 ) 571848b8605Smrg 572848b8605Smrg FLD_S( SRC1 ) /* F5 F4 */ 573848b8605Smrg FMUL_S( MAT5 ) 574848b8605Smrg 575848b8605Smrg FLD_S( SRC3 ) /* F0 F5 F4 */ 576848b8605Smrg FMUL_S( MAT12 ) 577848b8605Smrg FLD_S( SRC3 ) /* F1 F0 F5 F4 */ 578848b8605Smrg FMUL_S( MAT13 ) 579848b8605Smrg 580848b8605Smrg FXCH( ST(1) ) /* F0 F1 F5 F4 */ 581848b8605Smrg FADDP( ST0, ST(3) ) /* F1 F5 F4 */ 582848b8605Smrg FADDP( ST0, ST(1) ) /* F5 F4 */ 583848b8605Smrg 584848b8605Smrg MOV_L( SRC2, EBX ) 585848b8605Smrg MOV_L( SRC3, EBP ) 586848b8605Smrg 587848b8605Smrg FXCH( ST(1) ) /* F4 F5 */ 588848b8605Smrg FSTP_S( DST0 ) /* F5 */ 589848b8605Smrg FSTP_S( DST1 ) /* */ 590848b8605Smrg MOV_L( EBX, DST2 ) 591848b8605Smrg MOV_L( EBP, DST3 ) 592848b8605Smrg 593848b8605SmrgLLBL(x86_p4_2dnrr_skip): 594848b8605Smrg 595848b8605Smrg ADD_L( CONST(16), EDI ) 596848b8605Smrg ADD_L( EAX, ESI ) 597848b8605Smrg CMP_L( ECX, EDI ) 598848b8605Smrg JNE( LLBL(x86_p4_2dnrr_loop) ) 599848b8605Smrg 600848b8605SmrgLLBL(x86_p4_2dnrr_done): 601848b8605Smrg 602848b8605Smrg POP_L( EBP ) 603848b8605Smrg POP_L( EBX ) 604848b8605Smrg POP_L( EDI ) 605848b8605Smrg POP_L( ESI ) 606848b8605Smrg RET 607848b8605Smrg#undef FRAME_OFFSET 608848b8605Smrg 609848b8605Smrg 610848b8605Smrg 611848b8605Smrg 612848b8605SmrgALIGNTEXT16 613848b8605SmrgGLOBL GLNAME( _mesa_x86_transform_points4_identity ) 614848b8605SmrgHIDDEN(_mesa_x86_transform_points4_identity) 615848b8605SmrgGLNAME( _mesa_x86_transform_points4_identity ): 616848b8605Smrg 617848b8605Smrg#define FRAME_OFFSET 12 618848b8605Smrg PUSH_L( ESI ) 619848b8605Smrg PUSH_L( EDI ) 620848b8605Smrg PUSH_L( EBX ) 621848b8605Smrg 622848b8605Smrg MOV_L( ARG_SOURCE, ESI ) 623848b8605Smrg MOV_L( ARG_DEST, EDI ) 624848b8605Smrg 625848b8605Smrg MOV_L( ARG_MATRIX, EDX ) 626848b8605Smrg MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 627848b8605Smrg 628848b8605Smrg TEST_L( ECX, ECX ) 629848b8605Smrg JZ( LLBL(x86_p4_ir_done) ) 630848b8605Smrg 631848b8605Smrg MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 632848b8605Smrg OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 633848b8605Smrg 634848b8605Smrg MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 635848b8605Smrg MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 636848b8605Smrg 637848b8605Smrg SHL_L( CONST(4), ECX ) 638848b8605Smrg MOV_L( REGOFF(V4F_START, ESI), ESI ) 639848b8605Smrg 640848b8605Smrg MOV_L( REGOFF(V4F_START, EDI), EDI ) 641848b8605Smrg ADD_L( EDI, ECX ) 642848b8605Smrg 643848b8605Smrg CMP_L( ESI, EDI ) 644848b8605Smrg JE( LLBL(x86_p4_ir_done) ) 645848b8605Smrg 646848b8605SmrgALIGNTEXT16 647848b8605SmrgLLBL(x86_p4_ir_loop): 648848b8605Smrg 649848b8605Smrg MOV_L( SRC0, EBX ) 650848b8605Smrg MOV_L( SRC1, EDX ) 651848b8605Smrg 652848b8605Smrg MOV_L( EBX, DST0 ) 653848b8605Smrg MOV_L( EDX, DST1 ) 654848b8605Smrg 655848b8605Smrg MOV_L( SRC2, EBX ) 656848b8605Smrg MOV_L( SRC3, EDX ) 657848b8605Smrg 658848b8605Smrg MOV_L( EBX, DST2 ) 659848b8605Smrg MOV_L( EDX, DST3 ) 660848b8605Smrg 661848b8605SmrgLLBL(x86_p4_ir_skip): 662848b8605Smrg 663848b8605Smrg ADD_L( CONST(16), EDI ) 664848b8605Smrg ADD_L( EAX, ESI ) 665848b8605Smrg CMP_L( ECX, EDI ) 666848b8605Smrg JNE( LLBL(x86_p4_ir_loop) ) 667848b8605Smrg 668848b8605SmrgLLBL(x86_p4_ir_done): 669848b8605Smrg 670848b8605Smrg POP_L( EBX ) 671848b8605Smrg POP_L( EDI ) 672848b8605Smrg POP_L( ESI ) 673848b8605Smrg RET 674848b8605Smrg 675848b8605Smrg#if defined (__ELF__) && defined (__linux__) 676848b8605Smrg .section .note.GNU-stack,"",%progbits 677848b8605Smrg#endif 678