translate_sse.c revision cdc920a0
1/* 2 * Copyright 2003 Tungsten Graphics, inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Keith Whitwell <keithw@tungstengraphics.com> 26 */ 27 28 29#include "pipe/p_config.h" 30#include "pipe/p_compiler.h" 31#include "util/u_memory.h" 32#include "util/u_math.h" 33 34#include "translate.h" 35 36 37#if defined(PIPE_ARCH_X86) 38 39#include "rtasm/rtasm_cpu.h" 40#include "rtasm/rtasm_x86sse.h" 41 42 43#define X 0 44#define Y 1 45#define Z 2 46#define W 3 47 48 49typedef void (PIPE_CDECL *run_func)( struct translate *translate, 50 unsigned start, 51 unsigned count, 52 unsigned instance_id, 53 void *output_buffer); 54 55typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, 56 const unsigned *elts, 57 unsigned count, 58 unsigned instance_id, 59 void *output_buffer); 60 61struct translate_buffer { 62 const void *base_ptr; 63 unsigned stride; 64}; 65 66struct translate_buffer_varient { 67 unsigned buffer_index; 68 unsigned instance_divisor; 69 void *ptr; /* updated either per vertex or per instance */ 70}; 71 72 73#define ELEMENT_BUFFER_INSTANCE_ID 1001 74 75 76struct translate_sse { 77 struct translate translate; 78 79 struct x86_function linear_func; 80 struct x86_function elt_func; 81 struct x86_function *func; 82 83 boolean loaded_identity; 84 boolean loaded_255; 85 boolean loaded_inv_255; 86 87 float identity[4]; 88 float float_255[4]; 89 float inv_255[4]; 90 91 struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; 92 unsigned nr_buffers; 93 94 /* Multiple buffer varients can map to a single buffer. */ 95 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; 96 unsigned nr_buffer_varients; 97 98 /* Multiple elements can map to a single buffer varient. */ 99 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; 100 101 boolean use_instancing; 102 unsigned instance_id; 103 104 run_func gen_run; 105 run_elts_func gen_run_elts; 106 107 /* these are actually known values, but putting them in a struct 108 * like this is helpful to keep them in sync across the file. 109 */ 110 struct x86_reg tmp_EAX; 111 struct x86_reg idx_EBX; /* either start+i or &elt[i] */ 112 struct x86_reg outbuf_ECX; 113 struct x86_reg machine_EDX; 114 struct x86_reg count_ESI; /* decrements to zero */ 115}; 116 117static int get_offset( const void *a, const void *b ) 118{ 119 return (const char *)b - (const char *)a; 120} 121 122 123 124static struct x86_reg get_identity( struct translate_sse *p ) 125{ 126 struct x86_reg reg = x86_make_reg(file_XMM, 6); 127 128 if (!p->loaded_identity) { 129 p->loaded_identity = TRUE; 130 p->identity[0] = 0; 131 p->identity[1] = 0; 132 p->identity[2] = 0; 133 p->identity[3] = 1; 134 135 sse_movups(p->func, reg, 136 x86_make_disp(p->machine_EDX, 137 get_offset(p, &p->identity[0]))); 138 } 139 140 return reg; 141} 142 143static struct x86_reg get_255( struct translate_sse *p ) 144{ 145 struct x86_reg reg = x86_make_reg(file_XMM, 7); 146 147 if (!p->loaded_255) { 148 p->loaded_255 = TRUE; 149 p->float_255[0] = 150 p->float_255[1] = 151 p->float_255[2] = 152 p->float_255[3] = 255.0f; 153 154 sse_movups(p->func, reg, 155 x86_make_disp(p->machine_EDX, 156 get_offset(p, &p->float_255[0]))); 157 } 158 159 return reg; 160} 161 162static struct x86_reg get_inv_255( struct translate_sse *p ) 163{ 164 struct x86_reg reg = x86_make_reg(file_XMM, 5); 165 166 if (!p->loaded_inv_255) { 167 p->loaded_inv_255 = TRUE; 168 p->inv_255[0] = 169 p->inv_255[1] = 170 p->inv_255[2] = 171 p->inv_255[3] = 1.0f / 255.0f; 172 173 sse_movups(p->func, reg, 174 x86_make_disp(p->machine_EDX, 175 get_offset(p, &p->inv_255[0]))); 176 } 177 178 return reg; 179} 180 181 182static void emit_load_R32G32B32A32( struct translate_sse *p, 183 struct x86_reg data, 184 struct x86_reg arg0 ) 185{ 186 sse_movups(p->func, data, arg0); 187} 188 189static void emit_load_R32G32B32( struct translate_sse *p, 190 struct x86_reg data, 191 struct x86_reg arg0 ) 192{ 193 /* Have to jump through some hoops: 194 * 195 * c 0 0 0 196 * c 0 0 1 197 * 0 0 c 1 198 * a b c 1 199 */ 200 sse_movss(p->func, data, x86_make_disp(arg0, 8)); 201 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); 202 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); 203 sse_movlps(p->func, data, arg0); 204} 205 206static void emit_load_R32G32( struct translate_sse *p, 207 struct x86_reg data, 208 struct x86_reg arg0 ) 209{ 210 /* 0 0 0 1 211 * a b 0 1 212 */ 213 sse_movups(p->func, data, get_identity(p) ); 214 sse_movlps(p->func, data, arg0); 215} 216 217 218static void emit_load_R32( struct translate_sse *p, 219 struct x86_reg data, 220 struct x86_reg arg0 ) 221{ 222 /* a 0 0 0 223 * a 0 0 1 224 */ 225 sse_movss(p->func, data, arg0); 226 sse_orps(p->func, data, get_identity(p) ); 227} 228 229 230static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, 231 struct x86_reg data, 232 struct x86_reg src ) 233{ 234 235 /* Load and unpack twice: 236 */ 237 sse_movss(p->func, data, src); 238 sse2_punpcklbw(p->func, data, get_identity(p)); 239 sse2_punpcklbw(p->func, data, get_identity(p)); 240 241 /* Convert to float: 242 */ 243 sse2_cvtdq2ps(p->func, data, data); 244 245 246 /* Scale by 1/255.0 247 */ 248 sse_mulps(p->func, data, get_inv_255(p)); 249} 250 251 252 253 254static void emit_store_R32G32B32A32( struct translate_sse *p, 255 struct x86_reg dest, 256 struct x86_reg dataXMM ) 257{ 258 sse_movups(p->func, dest, dataXMM); 259} 260 261static void emit_store_R32G32B32( struct translate_sse *p, 262 struct x86_reg dest, 263 struct x86_reg dataXMM ) 264{ 265 /* Emit two, shuffle, emit one. 266 */ 267 sse_movlps(p->func, dest, dataXMM); 268 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ 269 sse_movss(p->func, x86_make_disp(dest,8), dataXMM); 270} 271 272static void emit_store_R32G32( struct translate_sse *p, 273 struct x86_reg dest, 274 struct x86_reg dataXMM ) 275{ 276 sse_movlps(p->func, dest, dataXMM); 277} 278 279static void emit_store_R32( struct translate_sse *p, 280 struct x86_reg dest, 281 struct x86_reg dataXMM ) 282{ 283 sse_movss(p->func, dest, dataXMM); 284} 285 286 287 288static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, 289 struct x86_reg dest, 290 struct x86_reg dataXMM ) 291{ 292 /* Scale by 255.0 293 */ 294 sse_mulps(p->func, dataXMM, get_255(p)); 295 296 /* Pack and emit: 297 */ 298 sse2_cvtps2dq(p->func, dataXMM, dataXMM); 299 sse2_packssdw(p->func, dataXMM, dataXMM); 300 sse2_packuswb(p->func, dataXMM, dataXMM); 301 sse_movss(p->func, dest, dataXMM); 302} 303 304 305 306 307 308/* Extended swizzles? Maybe later. 309 */ 310static void emit_swizzle( struct translate_sse *p, 311 struct x86_reg dest, 312 struct x86_reg src, 313 unsigned char shuffle ) 314{ 315 sse_shufps(p->func, dest, src, shuffle); 316} 317 318 319static boolean translate_attr( struct translate_sse *p, 320 const struct translate_element *a, 321 struct x86_reg srcECX, 322 struct x86_reg dstEAX) 323{ 324 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); 325 326 switch (a->input_format) { 327 case PIPE_FORMAT_R32_FLOAT: 328 emit_load_R32(p, dataXMM, srcECX); 329 break; 330 case PIPE_FORMAT_R32G32_FLOAT: 331 emit_load_R32G32(p, dataXMM, srcECX); 332 break; 333 case PIPE_FORMAT_R32G32B32_FLOAT: 334 emit_load_R32G32B32(p, dataXMM, srcECX); 335 break; 336 case PIPE_FORMAT_R32G32B32A32_FLOAT: 337 emit_load_R32G32B32A32(p, dataXMM, srcECX); 338 break; 339 case PIPE_FORMAT_A8R8G8B8_UNORM: 340 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 341 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 342 break; 343 case PIPE_FORMAT_R8G8B8A8_UNORM: 344 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 345 break; 346 default: 347 return FALSE; 348 } 349 350 switch (a->output_format) { 351 case PIPE_FORMAT_R32_FLOAT: 352 emit_store_R32(p, dstEAX, dataXMM); 353 break; 354 case PIPE_FORMAT_R32G32_FLOAT: 355 emit_store_R32G32(p, dstEAX, dataXMM); 356 break; 357 case PIPE_FORMAT_R32G32B32_FLOAT: 358 emit_store_R32G32B32(p, dstEAX, dataXMM); 359 break; 360 case PIPE_FORMAT_R32G32B32A32_FLOAT: 361 emit_store_R32G32B32A32(p, dstEAX, dataXMM); 362 break; 363 case PIPE_FORMAT_A8R8G8B8_UNORM: 364 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 365 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 366 break; 367 case PIPE_FORMAT_R8G8B8A8_UNORM: 368 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 369 break; 370 default: 371 return FALSE; 372 } 373 374 return TRUE; 375} 376 377 378static boolean init_inputs( struct translate_sse *p, 379 boolean linear ) 380{ 381 unsigned i; 382 struct x86_reg instance_id = x86_make_disp(p->machine_EDX, 383 get_offset(p, &p->instance_id)); 384 385 for (i = 0; i < p->nr_buffer_varients; i++) { 386 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 387 struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; 388 389 if (linear || varient->instance_divisor) { 390 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 391 get_offset(p, &buffer->stride)); 392 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 393 get_offset(p, &varient->ptr)); 394 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, 395 get_offset(p, &buffer->base_ptr)); 396 struct x86_reg elt = p->idx_EBX; 397 struct x86_reg tmp_EAX = p->tmp_EAX; 398 399 /* Calculate pointer to first attrib: 400 * base_ptr + stride * index, where index depends on instance divisor 401 */ 402 if (varient->instance_divisor) { 403 /* Our index is instance ID divided by instance divisor. 404 */ 405 x86_mov(p->func, tmp_EAX, instance_id); 406 407 if (varient->instance_divisor != 1) { 408 struct x86_reg tmp_EDX = p->machine_EDX; 409 struct x86_reg tmp_ECX = p->outbuf_ECX; 410 411 /* TODO: Add x86_shr() to rtasm and use it whenever 412 * instance divisor is power of two. 413 */ 414 415 x86_push(p->func, tmp_EDX); 416 x86_push(p->func, tmp_ECX); 417 x86_xor(p->func, tmp_EDX, tmp_EDX); 418 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); 419 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ 420 x86_pop(p->func, tmp_ECX); 421 x86_pop(p->func, tmp_EDX); 422 } 423 } else { 424 x86_mov(p->func, tmp_EAX, elt); 425 } 426 x86_imul(p->func, tmp_EAX, buf_stride); 427 x86_add(p->func, tmp_EAX, buf_base_ptr); 428 429 430 /* In the linear case, keep the buffer pointer instead of the 431 * index number. 432 */ 433 if (linear && p->nr_buffer_varients == 1) 434 x86_mov(p->func, elt, tmp_EAX); 435 else 436 x86_mov(p->func, buf_ptr, tmp_EAX); 437 } 438 } 439 440 return TRUE; 441} 442 443 444static struct x86_reg get_buffer_ptr( struct translate_sse *p, 445 boolean linear, 446 unsigned var_idx, 447 struct x86_reg elt ) 448{ 449 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { 450 return x86_make_disp(p->machine_EDX, 451 get_offset(p, &p->instance_id)); 452 } 453 if (linear && p->nr_buffer_varients == 1) { 454 return p->idx_EBX; 455 } 456 else if (linear || p->buffer_varient[var_idx].instance_divisor) { 457 struct x86_reg ptr = p->tmp_EAX; 458 struct x86_reg buf_ptr = 459 x86_make_disp(p->machine_EDX, 460 get_offset(p, &p->buffer_varient[var_idx].ptr)); 461 462 x86_mov(p->func, ptr, buf_ptr); 463 return ptr; 464 } 465 else { 466 struct x86_reg ptr = p->tmp_EAX; 467 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; 468 469 struct x86_reg buf_stride = 470 x86_make_disp(p->machine_EDX, 471 get_offset(p, &p->buffer[varient->buffer_index].stride)); 472 473 struct x86_reg buf_base_ptr = 474 x86_make_disp(p->machine_EDX, 475 get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); 476 477 478 479 /* Calculate pointer to current attrib: 480 */ 481 x86_mov(p->func, ptr, buf_stride); 482 x86_imul(p->func, ptr, elt); 483 x86_add(p->func, ptr, buf_base_ptr); 484 return ptr; 485 } 486} 487 488 489 490static boolean incr_inputs( struct translate_sse *p, 491 boolean linear ) 492{ 493 if (linear && p->nr_buffer_varients == 1) { 494 struct x86_reg stride = x86_make_disp(p->machine_EDX, 495 get_offset(p, &p->buffer[0].stride)); 496 497 if (p->buffer_varient[0].instance_divisor == 0) { 498 x86_add(p->func, p->idx_EBX, stride); 499 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); 500 } 501 } 502 else if (linear) { 503 unsigned i; 504 505 /* Is this worthwhile?? 506 */ 507 for (i = 0; i < p->nr_buffer_varients; i++) { 508 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 509 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 510 get_offset(p, &varient->ptr)); 511 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 512 get_offset(p, &p->buffer[varient->buffer_index].stride)); 513 514 if (varient->instance_divisor == 0) { 515 x86_mov(p->func, p->tmp_EAX, buf_ptr); 516 x86_add(p->func, p->tmp_EAX, buf_stride); 517 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); 518 x86_mov(p->func, buf_ptr, p->tmp_EAX); 519 } 520 } 521 } 522 else { 523 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4)); 524 } 525 526 return TRUE; 527} 528 529 530/* Build run( struct translate *machine, 531 * unsigned start, 532 * unsigned count, 533 * void *output_buffer ) 534 * or 535 * run_elts( struct translate *machine, 536 * unsigned *elts, 537 * unsigned count, 538 * void *output_buffer ) 539 * 540 * Lots of hardcoding 541 * 542 * EAX -- pointer to current output vertex 543 * ECX -- pointer to current attribute 544 * 545 */ 546static boolean build_vertex_emit( struct translate_sse *p, 547 struct x86_function *func, 548 boolean linear ) 549{ 550 int fixup, label; 551 unsigned j; 552 553 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); 554 p->idx_EBX = x86_make_reg(file_REG32, reg_BX); 555 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX); 556 p->machine_EDX = x86_make_reg(file_REG32, reg_DX); 557 p->count_ESI = x86_make_reg(file_REG32, reg_SI); 558 559 p->func = func; 560 p->loaded_inv_255 = FALSE; 561 p->loaded_255 = FALSE; 562 p->loaded_identity = FALSE; 563 564 x86_init_func(p->func); 565 566 /* Push a few regs? 567 */ 568 x86_push(p->func, p->idx_EBX); 569 x86_push(p->func, p->count_ESI); 570 571 /* Load arguments into regs: 572 */ 573 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); 574 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); 575 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); 576 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); 577 578 /* Load instance ID. 579 */ 580 if (p->use_instancing) { 581 x86_mov(p->func, 582 p->tmp_EAX, 583 x86_fn_arg(p->func, 4)); 584 x86_mov(p->func, 585 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), 586 p->tmp_EAX); 587 } 588 589 /* Get vertex count, compare to zero 590 */ 591 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); 592 x86_cmp(p->func, p->count_ESI, p->tmp_EAX); 593 fixup = x86_jcc_forward(p->func, cc_E); 594 595 /* always load, needed or not: 596 */ 597 init_inputs(p, linear); 598 599 /* Note address for loop jump 600 */ 601 label = x86_get_label(p->func); 602 { 603 struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); 604 int last_varient = -1; 605 struct x86_reg vb; 606 607 for (j = 0; j < p->translate.key.nr_elements; j++) { 608 const struct translate_element *a = &p->translate.key.element[j]; 609 unsigned varient = p->element_to_buffer_varient[j]; 610 611 /* Figure out source pointer address: 612 */ 613 if (varient != last_varient) { 614 last_varient = varient; 615 vb = get_buffer_ptr(p, linear, varient, elt); 616 } 617 618 if (!translate_attr( p, a, 619 x86_make_disp(vb, a->input_offset), 620 x86_make_disp(p->outbuf_ECX, a->output_offset))) 621 return FALSE; 622 } 623 624 /* Next output vertex: 625 */ 626 x86_lea(p->func, 627 p->outbuf_ECX, 628 x86_make_disp(p->outbuf_ECX, 629 p->translate.key.output_stride)); 630 631 /* Incr index 632 */ 633 incr_inputs( p, linear ); 634 } 635 636 /* decr count, loop if not zero 637 */ 638 x86_dec(p->func, p->count_ESI); 639 x86_jcc(p->func, cc_NZ, label); 640 641 /* Exit mmx state? 642 */ 643 if (p->func->need_emms) 644 mmx_emms(p->func); 645 646 /* Land forward jump here: 647 */ 648 x86_fixup_fwd_jump(p->func, fixup); 649 650 /* Pop regs and return 651 */ 652 653 x86_pop(p->func, p->count_ESI); 654 x86_pop(p->func, p->idx_EBX); 655 x86_ret(p->func); 656 657 return TRUE; 658} 659 660 661 662 663 664 665 666static void translate_sse_set_buffer( struct translate *translate, 667 unsigned buf, 668 const void *ptr, 669 unsigned stride ) 670{ 671 struct translate_sse *p = (struct translate_sse *)translate; 672 673 if (buf < p->nr_buffers) { 674 p->buffer[buf].base_ptr = (char *)ptr; 675 p->buffer[buf].stride = stride; 676 } 677 678 if (0) debug_printf("%s %d/%d: %p %d\n", 679 __FUNCTION__, buf, 680 p->nr_buffers, 681 ptr, stride); 682} 683 684 685static void translate_sse_release( struct translate *translate ) 686{ 687 struct translate_sse *p = (struct translate_sse *)translate; 688 689 x86_release_func( &p->linear_func ); 690 x86_release_func( &p->elt_func ); 691 692 FREE(p); 693} 694 695static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, 696 const unsigned *elts, 697 unsigned count, 698 unsigned instance_id, 699 void *output_buffer ) 700{ 701 struct translate_sse *p = (struct translate_sse *)translate; 702 703 p->gen_run_elts( translate, 704 elts, 705 count, 706 instance_id, 707 output_buffer); 708} 709 710static void PIPE_CDECL translate_sse_run( struct translate *translate, 711 unsigned start, 712 unsigned count, 713 unsigned instance_id, 714 void *output_buffer ) 715{ 716 struct translate_sse *p = (struct translate_sse *)translate; 717 718 p->gen_run( translate, 719 start, 720 count, 721 instance_id, 722 output_buffer); 723} 724 725 726struct translate *translate_sse2_create( const struct translate_key *key ) 727{ 728 struct translate_sse *p = NULL; 729 unsigned i; 730 731 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) 732 goto fail; 733 734 p = CALLOC_STRUCT( translate_sse ); 735 if (p == NULL) 736 goto fail; 737 738 p->translate.key = *key; 739 p->translate.release = translate_sse_release; 740 p->translate.set_buffer = translate_sse_set_buffer; 741 p->translate.run_elts = translate_sse_run_elts; 742 p->translate.run = translate_sse_run; 743 744 for (i = 0; i < key->nr_elements; i++) { 745 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { 746 unsigned j; 747 748 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); 749 750 if (key->element[i].instance_divisor) { 751 p->use_instancing = TRUE; 752 } 753 754 /* 755 * Map vertex element to vertex buffer varient. 756 */ 757 for (j = 0; j < p->nr_buffer_varients; j++) { 758 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && 759 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { 760 break; 761 } 762 } 763 if (j == p->nr_buffer_varients) { 764 p->buffer_varient[j].buffer_index = key->element[i].input_buffer; 765 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; 766 p->nr_buffer_varients++; 767 } 768 p->element_to_buffer_varient[i] = j; 769 } else { 770 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); 771 772 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; 773 } 774 } 775 776 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); 777 778 if (!build_vertex_emit(p, &p->linear_func, TRUE)) 779 goto fail; 780 781 if (!build_vertex_emit(p, &p->elt_func, FALSE)) 782 goto fail; 783 784 p->gen_run = (run_func)x86_get_func(&p->linear_func); 785 if (p->gen_run == NULL) 786 goto fail; 787 788 p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); 789 if (p->gen_run_elts == NULL) 790 goto fail; 791 792 return &p->translate; 793 794 fail: 795 if (p) 796 translate_sse_release( &p->translate ); 797 798 return NULL; 799} 800 801 802 803#else 804 805struct translate *translate_sse2_create( const struct translate_key *key ) 806{ 807 return NULL; 808} 809 810#endif 811