1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "util/u_prim.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_exec.h" 47#include "tgsi/tgsi_info.h" 48#include "tgsi/tgsi_parse.h" 49#include "tgsi/tgsi_util.h" 50#include "tgsi/tgsi_scan.h" 51#include "tgsi/tgsi_strings.h" 52#include "lp_bld_tgsi_action.h" 53#include "lp_bld_type.h" 54#include "lp_bld_const.h" 55#include "lp_bld_arit.h" 56#include "lp_bld_bitarit.h" 57#include "lp_bld_gather.h" 58#include "lp_bld_init.h" 59#include "lp_bld_logic.h" 60#include "lp_bld_swizzle.h" 61#include "lp_bld_flow.h" 62#include "lp_bld_quad.h" 63#include "lp_bld_tgsi.h" 64#include "lp_bld_limits.h" 65#include "lp_bld_debug.h" 66#include "lp_bld_printf.h" 67#include "lp_bld_sample.h" 68#include "lp_bld_struct.h" 69 70/* SM 4.0 says that subroutines can nest 32 deep and 71 * we need one more for our main function */ 72#define LP_MAX_NUM_FUNCS 33 73 74#define DUMP_GS_EMITS 0 75 76/* 77 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI 78 * instruction. 79 * 80 * TODO: 81 * - take execution masks in consideration 82 * - debug control-flow instructions 83 */ 84#define DEBUG_EXECUTION 0 85 86 87/* 88 * Emit code to print a register value. 89 */ 90static void 91emit_dump_reg(struct gallivm_state *gallivm, 92 unsigned file, 93 unsigned index, 94 unsigned chan, 95 LLVMValueRef value) 96{ 97 char buf[32]; 98 99 util_snprintf(buf, sizeof buf, " %s[%u].%c = ", 100 tgsi_file_name(file), 101 index, "xyzw"[chan]); 102 103 lp_build_print_value(gallivm, buf, value); 104} 105 106/* 107 * Return the context for the current function. 108 * (always 'main', if shader doesn't do any function calls) 109 */ 110static inline struct function_ctx * 111func_ctx(struct lp_exec_mask *mask) 112{ 113 assert(mask->function_stack_size > 0); 114 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 115 return &mask->function_stack[mask->function_stack_size - 1]; 116} 117 118/* 119 * Returns true if we're in a loop. 120 * It's global, meaning that it returns true even if there's 121 * no loop inside the current function, but we were inside 122 * a loop inside another function, from which this one was called. 123 */ 124static inline boolean 125mask_has_loop(struct lp_exec_mask *mask) 126{ 127 int i; 128 for (i = mask->function_stack_size - 1; i >= 0; --i) { 129 const struct function_ctx *ctx = &mask->function_stack[i]; 130 if (ctx->loop_stack_size > 0) 131 return TRUE; 132 } 133 return FALSE; 134} 135 136/* 137 * Returns true if we're inside a switch statement. 138 * It's global, meaning that it returns true even if there's 139 * no switch in the current function, but we were inside 140 * a switch inside another function, from which this one was called. 141 */ 142static inline boolean 143mask_has_switch(struct lp_exec_mask *mask) 144{ 145 int i; 146 for (i = mask->function_stack_size - 1; i >= 0; --i) { 147 const struct function_ctx *ctx = &mask->function_stack[i]; 148 if (ctx->switch_stack_size > 0) 149 return TRUE; 150 } 151 return FALSE; 152} 153 154/* 155 * Returns true if we're inside a conditional. 156 * It's global, meaning that it returns true even if there's 157 * no conditional in the current function, but we were inside 158 * a conditional inside another function, from which this one was called. 159 */ 160static inline boolean 161mask_has_cond(struct lp_exec_mask *mask) 162{ 163 int i; 164 for (i = mask->function_stack_size - 1; i >= 0; --i) { 165 const struct function_ctx *ctx = &mask->function_stack[i]; 166 if (ctx->cond_stack_size > 0) 167 return TRUE; 168 } 169 return FALSE; 170} 171 172 173/* 174 * Initialize a function context at the specified index. 175 */ 176static void 177lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx) 178{ 179 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 180 LLVMBuilderRef builder = mask->bld->gallivm->builder; 181 struct function_ctx *ctx = &mask->function_stack[function_idx]; 182 183 ctx->cond_stack_size = 0; 184 ctx->loop_stack_size = 0; 185 ctx->switch_stack_size = 0; 186 187 if (function_idx == 0) { 188 ctx->ret_mask = mask->ret_mask; 189 } 190 191 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm, 192 int_type, "looplimiter"); 193 LLVMBuildStore( 194 builder, 195 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 196 ctx->loop_limiter); 197} 198 199static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 200{ 201 mask->bld = bld; 202 mask->has_mask = FALSE; 203 mask->ret_in_main = FALSE; 204 /* For the main function */ 205 mask->function_stack_size = 1; 206 207 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 208 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = 209 mask->cond_mask = mask->switch_mask = 210 LLVMConstAllOnes(mask->int_vec_type); 211 212 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS, 213 sizeof(mask->function_stack[0])); 214 lp_exec_mask_function_init(mask, 0); 215} 216 217static void 218lp_exec_mask_fini(struct lp_exec_mask *mask) 219{ 220 FREE(mask->function_stack); 221} 222 223static void lp_exec_mask_update(struct lp_exec_mask *mask) 224{ 225 LLVMBuilderRef builder = mask->bld->gallivm->builder; 226 boolean has_loop_mask = mask_has_loop(mask); 227 boolean has_cond_mask = mask_has_cond(mask); 228 boolean has_switch_mask = mask_has_switch(mask); 229 boolean has_ret_mask = mask->function_stack_size > 1 || 230 mask->ret_in_main; 231 232 if (has_loop_mask) { 233 /*for loops we need to update the entire mask at runtime */ 234 LLVMValueRef tmp; 235 assert(mask->break_mask); 236 tmp = LLVMBuildAnd(builder, 237 mask->cont_mask, 238 mask->break_mask, 239 "maskcb"); 240 mask->exec_mask = LLVMBuildAnd(builder, 241 mask->cond_mask, 242 tmp, 243 "maskfull"); 244 } else 245 mask->exec_mask = mask->cond_mask; 246 247 if (has_switch_mask) { 248 mask->exec_mask = LLVMBuildAnd(builder, 249 mask->exec_mask, 250 mask->switch_mask, 251 "switchmask"); 252 } 253 254 if (has_ret_mask) { 255 mask->exec_mask = LLVMBuildAnd(builder, 256 mask->exec_mask, 257 mask->ret_mask, 258 "callmask"); 259 } 260 261 mask->has_mask = (has_cond_mask || 262 has_loop_mask || 263 has_switch_mask || 264 has_ret_mask); 265} 266 267static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 268 LLVMValueRef val) 269{ 270 LLVMBuilderRef builder = mask->bld->gallivm->builder; 271 struct function_ctx *ctx = func_ctx(mask); 272 273 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) { 274 ctx->cond_stack_size++; 275 return; 276 } 277 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) { 278 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 279 } 280 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask; 281 assert(LLVMTypeOf(val) == mask->int_vec_type); 282 mask->cond_mask = LLVMBuildAnd(builder, 283 mask->cond_mask, 284 val, 285 ""); 286 lp_exec_mask_update(mask); 287} 288 289static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 290{ 291 LLVMBuilderRef builder = mask->bld->gallivm->builder; 292 struct function_ctx *ctx = func_ctx(mask); 293 LLVMValueRef prev_mask; 294 LLVMValueRef inv_mask; 295 296 assert(ctx->cond_stack_size); 297 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 298 return; 299 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1]; 300 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) { 301 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 302 } 303 304 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 305 306 mask->cond_mask = LLVMBuildAnd(builder, 307 inv_mask, 308 prev_mask, ""); 309 lp_exec_mask_update(mask); 310} 311 312static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 313{ 314 struct function_ctx *ctx = func_ctx(mask); 315 assert(ctx->cond_stack_size); 316 --ctx->cond_stack_size; 317 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 318 return; 319 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size]; 320 lp_exec_mask_update(mask); 321} 322 323static void lp_exec_bgnloop(struct lp_exec_mask *mask) 324{ 325 LLVMBuilderRef builder = mask->bld->gallivm->builder; 326 struct function_ctx *ctx = func_ctx(mask); 327 328 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) { 329 ++ctx->loop_stack_size; 330 return; 331 } 332 333 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 334 ctx->break_type; 335 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP; 336 337 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block; 338 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask; 339 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask; 340 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var; 341 ++ctx->loop_stack_size; 342 343 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 344 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 345 346 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 347 348 LLVMBuildBr(builder, ctx->loop_block); 349 LLVMPositionBuilderAtEnd(builder, ctx->loop_block); 350 351 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, ""); 352 353 lp_exec_mask_update(mask); 354} 355 356static void lp_exec_break(struct lp_exec_mask *mask, 357 struct lp_build_tgsi_context * bld_base) 358{ 359 LLVMBuilderRef builder = mask->bld->gallivm->builder; 360 struct function_ctx *ctx = func_ctx(mask); 361 362 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { 363 LLVMValueRef exec_mask = LLVMBuildNot(builder, 364 mask->exec_mask, 365 "break"); 366 367 mask->break_mask = LLVMBuildAnd(builder, 368 mask->break_mask, 369 exec_mask, "break_full"); 370 } 371 else { 372 enum tgsi_opcode opcode = 373 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; 374 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH || 375 opcode == TGSI_OPCODE_CASE); 376 377 378 if (ctx->switch_in_default) { 379 /* 380 * stop default execution but only if this is an unconditional switch. 381 * (The condition here is not perfect since dead code after break is 382 * allowed but should be sufficient since false negatives are just 383 * unoptimized - so we don't have to pre-evaluate that). 384 */ 385 if(break_always && ctx->switch_pc) { 386 bld_base->pc = ctx->switch_pc; 387 return; 388 } 389 } 390 391 if (break_always) { 392 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type); 393 } 394 else { 395 LLVMValueRef exec_mask = LLVMBuildNot(builder, 396 mask->exec_mask, 397 "break"); 398 mask->switch_mask = LLVMBuildAnd(builder, 399 mask->switch_mask, 400 exec_mask, "break_switch"); 401 } 402 } 403 404 lp_exec_mask_update(mask); 405} 406 407static void lp_exec_continue(struct lp_exec_mask *mask) 408{ 409 LLVMBuilderRef builder = mask->bld->gallivm->builder; 410 LLVMValueRef exec_mask = LLVMBuildNot(builder, 411 mask->exec_mask, 412 ""); 413 414 mask->cont_mask = LLVMBuildAnd(builder, 415 mask->cont_mask, 416 exec_mask, ""); 417 418 lp_exec_mask_update(mask); 419} 420 421 422static void lp_exec_endloop(struct gallivm_state *gallivm, 423 struct lp_exec_mask *mask) 424{ 425 LLVMBuilderRef builder = mask->bld->gallivm->builder; 426 struct function_ctx *ctx = func_ctx(mask); 427 LLVMBasicBlockRef endloop; 428 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 429 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 430 mask->bld->type.width * 431 mask->bld->type.length); 432 LLVMValueRef i1cond, i2cond, icond, limiter; 433 434 assert(mask->break_mask); 435 436 437 assert(ctx->loop_stack_size); 438 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 439 --ctx->loop_stack_size; 440 return; 441 } 442 443 /* 444 * Restore the cont_mask, but don't pop 445 */ 446 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask; 447 lp_exec_mask_update(mask); 448 449 /* 450 * Unlike the continue mask, the break_mask must be preserved across loop 451 * iterations 452 */ 453 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 454 455 /* Decrement the loop limiter */ 456 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, ""); 457 458 limiter = LLVMBuildSub( 459 builder, 460 limiter, 461 LLVMConstInt(int_type, 1, false), 462 ""); 463 464 LLVMBuildStore(builder, limiter, ctx->loop_limiter); 465 466 /* i1cond = (mask != 0) */ 467 i1cond = LLVMBuildICmp( 468 builder, 469 LLVMIntNE, 470 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 471 LLVMConstNull(reg_type), "i1cond"); 472 473 /* i2cond = (looplimiter > 0) */ 474 i2cond = LLVMBuildICmp( 475 builder, 476 LLVMIntSGT, 477 limiter, 478 LLVMConstNull(int_type), "i2cond"); 479 480 /* if( i1cond && i2cond ) */ 481 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 482 483 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 484 485 LLVMBuildCondBr(builder, 486 icond, ctx->loop_block, endloop); 487 488 LLVMPositionBuilderAtEnd(builder, endloop); 489 490 assert(ctx->loop_stack_size); 491 --ctx->loop_stack_size; 492 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask; 493 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask; 494 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block; 495 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var; 496 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + 497 ctx->switch_stack_size]; 498 499 lp_exec_mask_update(mask); 500} 501 502static void lp_exec_switch(struct lp_exec_mask *mask, 503 LLVMValueRef switchval) 504{ 505 struct function_ctx *ctx = func_ctx(mask); 506 507 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || 508 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 509 ctx->switch_stack_size++; 510 return; 511 } 512 513 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 514 ctx->break_type; 515 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; 516 517 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; 518 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; 519 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; 520 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; 521 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; 522 ctx->switch_stack_size++; 523 524 mask->switch_mask = LLVMConstNull(mask->int_vec_type); 525 ctx->switch_val = switchval; 526 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); 527 ctx->switch_in_default = false; 528 ctx->switch_pc = 0; 529 530 lp_exec_mask_update(mask); 531} 532 533static void lp_exec_endswitch(struct lp_exec_mask *mask, 534 struct lp_build_tgsi_context * bld_base) 535{ 536 LLVMBuilderRef builder = mask->bld->gallivm->builder; 537 struct function_ctx *ctx = func_ctx(mask); 538 539 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 540 ctx->switch_stack_size--; 541 return; 542 } 543 544 /* check if there's deferred default if so do it now */ 545 if (ctx->switch_pc && !ctx->switch_in_default) { 546 LLVMValueRef prevmask, defaultmask; 547 unsigned tmp_pc; 548 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 549 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 550 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 551 ctx->switch_in_default = true; 552 553 lp_exec_mask_update(mask); 554 555 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == 556 TGSI_OPCODE_DEFAULT); 557 558 tmp_pc = bld_base->pc; 559 bld_base->pc = ctx->switch_pc; 560 /* 561 * re-purpose switch_pc to point to here again, since we stop execution of 562 * the deferred default after next break. 563 */ 564 ctx->switch_pc = tmp_pc - 1; 565 566 return; 567 } 568 569 else if (ctx->switch_pc && ctx->switch_in_default) { 570 assert(bld_base->pc == ctx->switch_pc + 1); 571 } 572 573 ctx->switch_stack_size--; 574 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; 575 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; 576 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; 577 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; 578 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; 579 580 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; 581 582 lp_exec_mask_update(mask); 583} 584 585static void lp_exec_case(struct lp_exec_mask *mask, 586 LLVMValueRef caseval) 587{ 588 LLVMBuilderRef builder = mask->bld->gallivm->builder; 589 struct function_ctx *ctx = func_ctx(mask); 590 591 LLVMValueRef casemask, prevmask; 592 593 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 594 return; 595 } 596 597 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ 598 if (!ctx->switch_in_default) { 599 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 600 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); 601 ctx->switch_mask_default = LLVMBuildOr(builder, casemask, 602 ctx->switch_mask_default, "sw_default_mask"); 603 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); 604 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); 605 606 lp_exec_mask_update(mask); 607 } 608} 609 610/* 611 * Analyse default statement in a switch. 612 * \return true if default is last statement, false otherwise 613 * \param default_pc_start contains pc of instruction to jump to 614 * if default wasn't last but there's no 615 * fallthrough into default. 616 */ 617static boolean default_analyse_is_last(struct lp_exec_mask *mask, 618 struct lp_build_tgsi_context * bld_base, 619 int *default_pc_start) 620{ 621 unsigned pc = bld_base->pc; 622 struct function_ctx *ctx = func_ctx(mask); 623 int curr_switch_stack = ctx->switch_stack_size; 624 625 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 626 return false; 627 } 628 629 /* skip over case statements which are together with default */ 630 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { 631 pc++; 632 } 633 634 while (pc != ~0u && pc < bld_base->num_instructions) { 635 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode; 636 switch (opcode) { 637 case TGSI_OPCODE_CASE: 638 if (curr_switch_stack == ctx->switch_stack_size) { 639 *default_pc_start = pc - 1; 640 return false; 641 } 642 break; 643 case TGSI_OPCODE_SWITCH: 644 curr_switch_stack++; 645 break; 646 case TGSI_OPCODE_ENDSWITCH: 647 if (curr_switch_stack == ctx->switch_stack_size) { 648 *default_pc_start = pc - 1; 649 return true; 650 } 651 curr_switch_stack--; 652 break; 653 default: 654 ; /* nothing */ 655 } 656 pc++; 657 } 658 /* should never arrive here */ 659 assert(0); 660 return true; 661} 662 663static void lp_exec_default(struct lp_exec_mask *mask, 664 struct lp_build_tgsi_context * bld_base) 665{ 666 LLVMBuilderRef builder = mask->bld->gallivm->builder; 667 struct function_ctx *ctx = func_ctx(mask); 668 669 int default_exec_pc; 670 boolean default_is_last; 671 672 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 673 return; 674 } 675 676 /* 677 * This is a messy opcode, because it may not be always at the end and 678 * there can be fallthrough in and out of it. 679 */ 680 681 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); 682 /* 683 * If it is last statement in switch (note that case statements appearing 684 * "at the same time" as default don't change that) everything is just fine, 685 * update switch mask and go on. This means we can handle default with 686 * fallthrough INTO it without overhead, if it is last. 687 */ 688 if (default_is_last) { 689 LLVMValueRef prevmask, defaultmask; 690 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 691 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 692 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); 693 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 694 ctx->switch_in_default = true; 695 696 lp_exec_mask_update(mask); 697 } 698 else { 699 /* 700 * Technically, "case" immediately before default isn't really a 701 * fallthrough, however we still have to count them as such as we 702 * already have updated the masks. 703 * If that happens in practice could add a switch optimizer pass 704 * which just gets rid of all case statements appearing together with 705 * default (or could do switch analysis at switch start time instead). 706 */ 707 enum tgsi_opcode opcode = 708 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; 709 boolean ft_into = (opcode != TGSI_OPCODE_BRK && 710 opcode != TGSI_OPCODE_SWITCH); 711 /* 712 * If it is not last statement and there was no fallthrough into it, 713 * we record the PC and continue execution at next case (again, those 714 * case encountered at the same time don't count). At endswitch 715 * time, we update switchmask, and go back executing the code we skipped 716 * until the next break (possibly re-executing some code with changed mask 717 * if there was a fallthrough out of default). 718 * Finally, if it is not last statement and there was a fallthrough into it, 719 * do the same as with the former case, except instead of skipping the code 720 * just execute it without updating the mask, then go back and re-execute. 721 */ 722 ctx->switch_pc = bld_base->pc; 723 if (!ft_into) { 724 bld_base->pc = default_exec_pc; 725 } 726 } 727} 728 729 730/* stores val into an address pointed to by dst_ptr. 731 * mask->exec_mask is used to figure out which bits of val 732 * should be stored into the address 733 * (0 means don't store this bit, 1 means do store). 734 */ 735static void lp_exec_mask_store(struct lp_exec_mask *mask, 736 struct lp_build_context *bld_store, 737 LLVMValueRef val, 738 LLVMValueRef dst_ptr) 739{ 740 LLVMBuilderRef builder = mask->bld->gallivm->builder; 741 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL; 742 743 assert(lp_check_value(bld_store->type, val)); 744 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); 745 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) || 746 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind); 747 748 if (exec_mask) { 749 LLVMValueRef res, dst; 750 751 dst = LLVMBuildLoad(builder, dst_ptr, ""); 752 res = lp_build_select(bld_store, exec_mask, val, dst); 753 LLVMBuildStore(builder, res, dst_ptr); 754 } else 755 LLVMBuildStore(builder, val, dst_ptr); 756} 757 758static void lp_exec_mask_call(struct lp_exec_mask *mask, 759 int func, 760 int *pc) 761{ 762 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { 763 return; 764 } 765 766 lp_exec_mask_function_init(mask, mask->function_stack_size); 767 mask->function_stack[mask->function_stack_size].pc = *pc; 768 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; 769 mask->function_stack_size++; 770 *pc = func; 771} 772 773static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 774{ 775 LLVMBuilderRef builder = mask->bld->gallivm->builder; 776 struct function_ctx *ctx = func_ctx(mask); 777 LLVMValueRef exec_mask; 778 779 if (ctx->cond_stack_size == 0 && 780 ctx->loop_stack_size == 0 && 781 ctx->switch_stack_size == 0 && 782 mask->function_stack_size == 1) { 783 /* returning from main() */ 784 *pc = -1; 785 return; 786 } 787 788 if (mask->function_stack_size == 1) { 789 /* 790 * This requires special handling since we need to ensure 791 * we don't drop the mask even if we have no call stack 792 * (e.g. after a ret in a if clause after the endif) 793 */ 794 mask->ret_in_main = TRUE; 795 } 796 797 exec_mask = LLVMBuildNot(builder, 798 mask->exec_mask, 799 "ret"); 800 801 mask->ret_mask = LLVMBuildAnd(builder, 802 mask->ret_mask, 803 exec_mask, "ret_full"); 804 805 lp_exec_mask_update(mask); 806} 807 808static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 809{ 810} 811 812static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 813{ 814 struct function_ctx *ctx; 815 816 assert(mask->function_stack_size > 1); 817 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 818 819 ctx = func_ctx(mask); 820 mask->function_stack_size--; 821 822 *pc = ctx->pc; 823 mask->ret_mask = ctx->ret_mask; 824 825 lp_exec_mask_update(mask); 826} 827 828 829static LLVMValueRef 830get_file_ptr(struct lp_build_tgsi_soa_context *bld, 831 unsigned file, 832 int index, 833 unsigned chan) 834{ 835 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 836 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; 837 LLVMValueRef var_of_array; 838 839 switch (file) { 840 case TGSI_FILE_TEMPORARY: 841 array_of_vars = bld->temps; 842 var_of_array = bld->temps_array; 843 break; 844 case TGSI_FILE_OUTPUT: 845 array_of_vars = bld->outputs; 846 var_of_array = bld->outputs_array; 847 break; 848 default: 849 assert(0); 850 return NULL; 851 } 852 853 assert(chan < 4); 854 855 if (bld->indirect_files & (1 << file)) { 856 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 857 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) { 858 LLVMValueRef gep[2]; 859 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0); 860 gep[1] = lindex; 861 return LLVMBuildGEP(builder, var_of_array, gep, 2, ""); 862 } else { 863 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); 864 } 865 } 866 else { 867 assert(index <= bld->bld_base.info->file_max[file]); 868 return array_of_vars[index][chan]; 869 } 870} 871 872 873/** 874 * Return pointer to a temporary register channel (src or dest). 875 * Note that indirect addressing cannot be handled here. 876 * \param index which temporary register 877 * \param chan which channel of the temp register. 878 */ 879LLVMValueRef 880lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 881 unsigned index, 882 unsigned chan) 883{ 884 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); 885} 886 887/** 888 * Return pointer to a output register channel (src or dest). 889 * Note that indirect addressing cannot be handled here. 890 * \param index which output register 891 * \param chan which channel of the output register. 892 */ 893LLVMValueRef 894lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 895 unsigned index, 896 unsigned chan) 897{ 898 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); 899} 900 901/* 902 * If we have indirect addressing in outputs copy our alloca array 903 * to the outputs slots specified by the caller to make sure 904 * our outputs are delivered consistently via the same interface. 905 */ 906static void 907gather_outputs(struct lp_build_tgsi_soa_context * bld) 908{ 909 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 910 unsigned index, chan; 911 assert(bld->bld_base.info->num_outputs <= 912 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); 913 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { 914 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 915 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 916 } 917 } 918 } 919} 920 921/** 922 * Gather vector. 923 * XXX the lp_build_gather() function should be capable of doing this 924 * with a little work. 925 */ 926static LLVMValueRef 927build_gather(struct lp_build_tgsi_context *bld_base, 928 LLVMValueRef base_ptr, 929 LLVMValueRef indexes, 930 LLVMValueRef overflow_mask, 931 LLVMValueRef indexes2) 932{ 933 struct gallivm_state *gallivm = bld_base->base.gallivm; 934 LLVMBuilderRef builder = gallivm->builder; 935 struct lp_build_context *uint_bld = &bld_base->uint_bld; 936 struct lp_build_context *bld = &bld_base->base; 937 LLVMValueRef res; 938 unsigned i; 939 940 if (indexes2) 941 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 942 else 943 res = bld->undef; 944 /* 945 * overflow_mask is a vector telling us which channels 946 * in the vector overflowed. We use the overflow behavior for 947 * constant buffers which is defined as: 948 * Out of bounds access to constant buffer returns 0 in all 949 * components. Out of bounds behavior is always with respect 950 * to the size of the buffer bound at that slot. 951 */ 952 953 if (overflow_mask) { 954 /* 955 * We avoid per-element control flow here (also due to llvm going crazy, 956 * though I suspect it's better anyway since overflow is likely rare). 957 * Note that since we still fetch from buffers even if num_elements was 958 * zero (in this case we'll fetch from index zero) the jit func callers 959 * MUST provide valid fake constant buffers of size 4x32 (the values do 960 * not matter), otherwise we'd still need (not per element though) 961 * control flow. 962 */ 963 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); 964 if (indexes2) 965 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); 966 } 967 968 /* 969 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 970 */ 971 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { 972 LLVMValueRef si, di; 973 LLVMValueRef index; 974 LLVMValueRef scalar_ptr, scalar; 975 976 di = lp_build_const_int32(bld->gallivm, i); 977 if (indexes2) 978 si = lp_build_const_int32(bld->gallivm, i >> 1); 979 else 980 si = di; 981 982 if (indexes2 && (i & 1)) { 983 index = LLVMBuildExtractElement(builder, 984 indexes2, si, ""); 985 } else { 986 index = LLVMBuildExtractElement(builder, 987 indexes, si, ""); 988 } 989 scalar_ptr = LLVMBuildGEP(builder, base_ptr, 990 &index, 1, "gather_ptr"); 991 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 992 993 res = LLVMBuildInsertElement(builder, res, scalar, di, ""); 994 } 995 996 if (overflow_mask) { 997 if (indexes2) { 998 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); 999 overflow_mask = LLVMBuildSExt(builder, overflow_mask, 1000 bld_base->dbl_bld.int_vec_type, ""); 1001 res = lp_build_select(&bld_base->dbl_bld, overflow_mask, 1002 bld_base->dbl_bld.zero, res); 1003 } else 1004 res = lp_build_select(bld, overflow_mask, bld->zero, res); 1005 } 1006 1007 return res; 1008} 1009 1010 1011/** 1012 * Scatter/store vector. 1013 */ 1014static void 1015emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 1016 LLVMValueRef base_ptr, 1017 LLVMValueRef indexes, 1018 LLVMValueRef values, 1019 struct lp_exec_mask *mask) 1020{ 1021 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1022 LLVMBuilderRef builder = gallivm->builder; 1023 unsigned i; 1024 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL; 1025 1026 /* 1027 * Loop over elements of index_vec, store scalar value. 1028 */ 1029 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1030 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1031 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 1032 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 1033 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 1034 LLVMValueRef scalar_pred = pred ? 1035 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 1036 1037 if (0) 1038 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 1039 ii, val, index, scalar_ptr); 1040 1041 if (scalar_pred) { 1042 LLVMValueRef real_val, dst_val; 1043 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 1044 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 1045 LLVMBuildStore(builder, real_val, scalar_ptr); 1046 } 1047 else { 1048 LLVMBuildStore(builder, val, scalar_ptr); 1049 } 1050 } 1051} 1052 1053 1054/** 1055 * Read the current value of the ADDR register, convert the floats to 1056 * ints, add the base index and return the vector of offsets. 1057 * The offsets will be used to index into the constant buffer or 1058 * temporary register file. 1059 */ 1060static LLVMValueRef 1061get_indirect_index(struct lp_build_tgsi_soa_context *bld, 1062 unsigned reg_file, unsigned reg_index, 1063 const struct tgsi_ind_register *indirect_reg, 1064 int index_limit) 1065{ 1066 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1067 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 1068 /* always use X component of address register */ 1069 unsigned swizzle = indirect_reg->Swizzle; 1070 LLVMValueRef base; 1071 LLVMValueRef rel; 1072 LLVMValueRef max_index; 1073 LLVMValueRef index; 1074 1075 assert(bld->indirect_files & (1 << reg_file)); 1076 1077 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 1078 1079 assert(swizzle < 4); 1080 switch (indirect_reg->File) { 1081 case TGSI_FILE_ADDRESS: 1082 rel = LLVMBuildLoad(builder, 1083 bld->addr[indirect_reg->Index][swizzle], 1084 "load addr reg"); 1085 /* ADDR LLVM values already have LLVM integer type. */ 1086 break; 1087 case TGSI_FILE_TEMPORARY: 1088 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); 1089 rel = LLVMBuildLoad(builder, rel, "load temp reg"); 1090 /* TEMP LLVM values always have LLVM float type, but for indirection, the 1091 * value actually stored is expected to be an integer */ 1092 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); 1093 break; 1094 default: 1095 assert(0); 1096 rel = uint_bld->zero; 1097 } 1098 1099 index = lp_build_add(uint_bld, base, rel); 1100 1101 /* 1102 * emit_fetch_constant handles constant buffer overflow so this code 1103 * is pointless for them. 1104 * Furthermore the D3D10 spec in section 6.5 says: 1105 * If the constant buffer bound to a slot is larger than the size 1106 * declared in the shader for that slot, implementations are allowed 1107 * to return incorrect data (not necessarily 0) for indices that are 1108 * larger than the declared size but smaller than the buffer size. 1109 */ 1110 if (reg_file != TGSI_FILE_CONSTANT) { 1111 assert(index_limit >= 0); 1112 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 1113 uint_bld->type, index_limit); 1114 1115 assert(!uint_bld->type.sign); 1116 index = lp_build_min(uint_bld, index, max_index); 1117 } 1118 1119 return index; 1120} 1121 1122static struct lp_build_context * 1123stype_to_fetch(struct lp_build_tgsi_context * bld_base, 1124 enum tgsi_opcode_type stype) 1125{ 1126 struct lp_build_context *bld_fetch; 1127 1128 switch (stype) { 1129 case TGSI_TYPE_FLOAT: 1130 case TGSI_TYPE_UNTYPED: 1131 bld_fetch = &bld_base->base; 1132 break; 1133 case TGSI_TYPE_UNSIGNED: 1134 bld_fetch = &bld_base->uint_bld; 1135 break; 1136 case TGSI_TYPE_SIGNED: 1137 bld_fetch = &bld_base->int_bld; 1138 break; 1139 case TGSI_TYPE_DOUBLE: 1140 bld_fetch = &bld_base->dbl_bld; 1141 break; 1142 case TGSI_TYPE_UNSIGNED64: 1143 bld_fetch = &bld_base->uint64_bld; 1144 break; 1145 case TGSI_TYPE_SIGNED64: 1146 bld_fetch = &bld_base->int64_bld; 1147 break; 1148 case TGSI_TYPE_VOID: 1149 default: 1150 assert(0); 1151 bld_fetch = NULL; 1152 break; 1153 } 1154 return bld_fetch; 1155} 1156 1157static LLVMValueRef 1158get_soa_array_offsets(struct lp_build_context *uint_bld, 1159 LLVMValueRef indirect_index, 1160 unsigned chan_index, 1161 boolean need_perelement_offset) 1162{ 1163 struct gallivm_state *gallivm = uint_bld->gallivm; 1164 LLVMValueRef chan_vec = 1165 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); 1166 LLVMValueRef length_vec = 1167 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); 1168 LLVMValueRef index_vec; 1169 1170 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1171 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1172 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1173 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1174 1175 if (need_perelement_offset) { 1176 LLVMValueRef pixel_offsets; 1177 unsigned i; 1178 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1179 pixel_offsets = uint_bld->undef; 1180 for (i = 0; i < uint_bld->type.length; i++) { 1181 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1182 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, 1183 ii, ii, ""); 1184 } 1185 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1186 } 1187 return index_vec; 1188} 1189 1190static LLVMValueRef 1191emit_fetch_constant( 1192 struct lp_build_tgsi_context * bld_base, 1193 const struct tgsi_full_src_register * reg, 1194 enum tgsi_opcode_type stype, 1195 unsigned swizzle_in) 1196{ 1197 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1198 struct gallivm_state *gallivm = bld_base->base.gallivm; 1199 LLVMBuilderRef builder = gallivm->builder; 1200 struct lp_build_context *uint_bld = &bld_base->uint_bld; 1201 unsigned dimension = 0; 1202 LLVMValueRef consts_ptr; 1203 LLVMValueRef num_consts; 1204 LLVMValueRef res; 1205 unsigned swizzle = swizzle_in & 0xffff; 1206 1207 /* XXX: Handle fetching xyzw components as a vector */ 1208 assert(swizzle != ~0u); 1209 1210 if (reg->Register.Dimension) { 1211 assert(!reg->Dimension.Indirect); 1212 dimension = reg->Dimension.Index; 1213 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); 1214 } 1215 1216 consts_ptr = bld->consts[dimension]; 1217 num_consts = bld->consts_sizes[dimension]; 1218 1219 if (reg->Register.Indirect) { 1220 LLVMValueRef indirect_index; 1221 LLVMValueRef swizzle_vec = 1222 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 1223 LLVMValueRef index_vec; /* index into the const buffer */ 1224 LLVMValueRef overflow_mask; 1225 LLVMValueRef index_vec2 = NULL; 1226 1227 indirect_index = get_indirect_index(bld, 1228 reg->Register.File, 1229 reg->Register.Index, 1230 ®->Indirect, 1231 bld->bld_base.info->file_max[reg->Register.File]); 1232 1233 /* All fetches are from the same constant buffer, so 1234 * we need to propagate the size to a vector to do a 1235 * vector comparison */ 1236 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); 1237 /* Construct a boolean vector telling us which channels 1238 * overflow the bound constant buffer */ 1239 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, 1240 indirect_index, num_consts); 1241 1242 /* index_vec = indirect_index * 4 + swizzle */ 1243 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1244 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 1245 1246 if (tgsi_type_is_64bit(stype)) { 1247 LLVMValueRef swizzle_vec2; 1248 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16); 1249 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); 1250 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); 1251 } 1252 /* Gather values from the constant buffer */ 1253 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2); 1254 } 1255 else { 1256 LLVMValueRef index; /* index into the const buffer */ 1257 LLVMValueRef scalar, scalar_ptr; 1258 struct lp_build_context *bld_broad = &bld_base->base; 1259 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 1260 1261 scalar_ptr = LLVMBuildGEP(builder, consts_ptr, 1262 &index, 1, ""); 1263 1264 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) { 1265 1266 LLVMValueRef scalar2, scalar2_ptr; 1267 LLVMValueRef shuffles[2]; 1268 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16)); 1269 1270 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr, 1271 &index, 1, ""); 1272 1273 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 1274 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, ""); 1275 shuffles[0] = lp_build_const_int32(gallivm, 0); 1276 shuffles[1] = lp_build_const_int32(gallivm, 1); 1277 1278 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 1279 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], ""); 1280 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], ""); 1281 } else { 1282 if (stype == TGSI_TYPE_DOUBLE) { 1283 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); 1284 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); 1285 bld_broad = &bld_base->dbl_bld; 1286 } else if (stype == TGSI_TYPE_UNSIGNED64) { 1287 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1288 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); 1289 bld_broad = &bld_base->uint64_bld; 1290 } else if (stype == TGSI_TYPE_SIGNED64) { 1291 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1292 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); 1293 bld_broad = &bld_base->int64_bld; 1294 } 1295 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 1296 res = lp_build_broadcast_scalar(bld_broad, scalar); 1297 } 1298 1299 } 1300 1301 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { 1302 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1303 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1304 } 1305 1306 return res; 1307} 1308 1309/** 1310 * Fetch 64-bit values from two separate channels. 1311 * 64-bit values are stored split across two channels, like xy and zw. 1312 * This function creates a set of vec_length*2 floats, 1313 * extracts the values from the two channels, 1314 * puts them in the correct place, then casts to vec_length 64-bits. 1315 */ 1316static LLVMValueRef 1317emit_fetch_64bit( 1318 struct lp_build_tgsi_context * bld_base, 1319 enum tgsi_opcode_type stype, 1320 LLVMValueRef input, 1321 LLVMValueRef input2) 1322{ 1323 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1324 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1325 LLVMBuilderRef builder = gallivm->builder; 1326 LLVMValueRef res; 1327 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1328 int i; 1329 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; 1330 int len = bld_base->base.type.length * 2; 1331 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); 1332 1333 for (i = 0; i < bld_base->base.type.length * 2; i+=2) { 1334 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 1335 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 1336 } 1337 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 1338 1339 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1340} 1341 1342static LLVMValueRef 1343emit_fetch_immediate( 1344 struct lp_build_tgsi_context * bld_base, 1345 const struct tgsi_full_src_register * reg, 1346 enum tgsi_opcode_type stype, 1347 unsigned swizzle_in) 1348{ 1349 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1350 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1351 LLVMBuilderRef builder = gallivm->builder; 1352 LLVMValueRef res = NULL; 1353 unsigned swizzle = swizzle_in & 0xffff; 1354 1355 if (bld->use_immediates_array || reg->Register.Indirect) { 1356 LLVMValueRef imms_array; 1357 LLVMTypeRef fptr_type; 1358 1359 /* cast imms_array pointer to float* */ 1360 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1361 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); 1362 1363 if (reg->Register.Indirect) { 1364 LLVMValueRef indirect_index; 1365 LLVMValueRef index_vec; /* index into the immediate register array */ 1366 LLVMValueRef index_vec2 = NULL; 1367 indirect_index = get_indirect_index(bld, 1368 reg->Register.File, 1369 reg->Register.Index, 1370 ®->Indirect, 1371 bld->bld_base.info->file_max[reg->Register.File]); 1372 /* 1373 * Unlike for other reg classes, adding pixel offsets is unnecessary - 1374 * immediates are stored as full vectors (FIXME??? - might be better 1375 * to store them the same as constants) but all elements are the same 1376 * in any case. 1377 */ 1378 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1379 indirect_index, 1380 swizzle, 1381 FALSE); 1382 if (tgsi_type_is_64bit(stype)) 1383 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1384 indirect_index, 1385 swizzle_in >> 16, 1386 FALSE); 1387 /* Gather values from the immediate register array */ 1388 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); 1389 } else { 1390 LLVMValueRef gep[2]; 1391 gep[0] = lp_build_const_int32(gallivm, 0); 1392 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 1393 LLVMValueRef imms_ptr = LLVMBuildGEP(builder, 1394 bld->imms_array, gep, 2, ""); 1395 res = LLVMBuildLoad(builder, imms_ptr, ""); 1396 1397 if (tgsi_type_is_64bit(stype)) { 1398 LLVMValueRef imms_ptr2; 1399 LLVMValueRef res2; 1400 gep[1] = lp_build_const_int32(gallivm, 1401 reg->Register.Index * 4 + (swizzle_in >> 16)); 1402 imms_ptr2 = LLVMBuildGEP(builder, 1403 bld->imms_array, gep, 2, ""); 1404 res2 = LLVMBuildLoad(builder, imms_ptr2, ""); 1405 res = emit_fetch_64bit(bld_base, stype, res, res2); 1406 } 1407 } 1408 } 1409 else { 1410 res = bld->immediates[reg->Register.Index][swizzle]; 1411 if (tgsi_type_is_64bit(stype)) 1412 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]); 1413 } 1414 1415 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1416 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1417 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1418 } 1419 return res; 1420} 1421 1422static LLVMValueRef 1423emit_fetch_input( 1424 struct lp_build_tgsi_context * bld_base, 1425 const struct tgsi_full_src_register * reg, 1426 enum tgsi_opcode_type stype, 1427 unsigned swizzle_in) 1428{ 1429 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1430 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1431 LLVMBuilderRef builder = gallivm->builder; 1432 LLVMValueRef res; 1433 unsigned swizzle = swizzle_in & 0xffff; 1434 1435 if (reg->Register.Indirect) { 1436 LLVMValueRef indirect_index; 1437 LLVMValueRef index_vec; /* index into the input reg array */ 1438 LLVMValueRef index_vec2 = NULL; 1439 LLVMValueRef inputs_array; 1440 LLVMTypeRef fptr_type; 1441 1442 indirect_index = get_indirect_index(bld, 1443 reg->Register.File, 1444 reg->Register.Index, 1445 ®->Indirect, 1446 bld->bld_base.info->file_max[reg->Register.File]); 1447 1448 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1449 indirect_index, 1450 swizzle, 1451 TRUE); 1452 if (tgsi_type_is_64bit(stype)) { 1453 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1454 indirect_index, 1455 swizzle_in >> 16, 1456 TRUE); 1457 } 1458 /* cast inputs_array pointer to float* */ 1459 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1460 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); 1461 1462 /* Gather values from the input register array */ 1463 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2); 1464 } else { 1465 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1466 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1467 reg->Register.Index * 4 + swizzle); 1468 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 1469 bld->inputs_array, &lindex, 1, ""); 1470 1471 res = LLVMBuildLoad(builder, input_ptr, ""); 1472 if (tgsi_type_is_64bit(stype)) { 1473 LLVMValueRef lindex1; 1474 LLVMValueRef input_ptr2; 1475 LLVMValueRef res2; 1476 1477 lindex1 = lp_build_const_int32(gallivm, 1478 reg->Register.Index * 4 + (swizzle_in >> 16)); 1479 input_ptr2 = LLVMBuildGEP(builder, 1480 bld->inputs_array, &lindex1, 1, ""); 1481 res2 = LLVMBuildLoad(builder, input_ptr2, ""); 1482 res = emit_fetch_64bit(bld_base, stype, res, res2); 1483 } 1484 } 1485 else { 1486 res = bld->inputs[reg->Register.Index][swizzle]; 1487 if (tgsi_type_is_64bit(stype)) 1488 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]); 1489 } 1490 } 1491 1492 assert(res); 1493 1494 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1495 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1496 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1497 } 1498 1499 return res; 1500} 1501 1502 1503static LLVMValueRef 1504emit_fetch_gs_input( 1505 struct lp_build_tgsi_context * bld_base, 1506 const struct tgsi_full_src_register * reg, 1507 enum tgsi_opcode_type stype, 1508 unsigned swizzle_in) 1509{ 1510 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1511 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1512 const struct tgsi_shader_info *info = bld->bld_base.info; 1513 LLVMBuilderRef builder = gallivm->builder; 1514 LLVMValueRef attrib_index = NULL; 1515 LLVMValueRef vertex_index = NULL; 1516 unsigned swizzle = swizzle_in & 0xffff; 1517 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1518 LLVMValueRef res; 1519 1520 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1521 /* This is really a system value not a regular input */ 1522 assert(!reg->Register.Indirect); 1523 assert(!reg->Dimension.Indirect); 1524 res = bld->system_values.prim_id; 1525 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1526 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1527 } 1528 return res; 1529 } 1530 1531 if (reg->Register.Indirect) { 1532 /* 1533 * XXX: this is possibly not quite the right value, since file_max may be 1534 * larger than the max attrib index, due to it being the max of declared 1535 * inputs AND the max vertices per prim (which is 6 for tri adj). 1536 * It should however be safe to use (since we always allocate 1537 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit). 1538 */ 1539 int index_limit = info->file_max[reg->Register.File]; 1540 attrib_index = get_indirect_index(bld, 1541 reg->Register.File, 1542 reg->Register.Index, 1543 ®->Indirect, 1544 index_limit); 1545 } else { 1546 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1547 } 1548 1549 if (reg->Dimension.Indirect) { 1550 /* 1551 * A fixed 6 should do as well (which is what we allocate). 1552 */ 1553 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]); 1554 vertex_index = get_indirect_index(bld, 1555 reg->Register.File, 1556 reg->Dimension.Index, 1557 ®->DimIndirect, 1558 index_limit); 1559 } else { 1560 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1561 } 1562 1563 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1564 reg->Dimension.Indirect, 1565 vertex_index, 1566 reg->Register.Indirect, 1567 attrib_index, 1568 swizzle_index); 1569 1570 assert(res); 1571 if (tgsi_type_is_64bit(stype)) { 1572 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16); 1573 LLVMValueRef res2; 1574 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1575 reg->Dimension.Indirect, 1576 vertex_index, 1577 reg->Register.Indirect, 1578 attrib_index, 1579 swizzle_index); 1580 assert(res2); 1581 res = emit_fetch_64bit(bld_base, stype, res, res2); 1582 } else if (stype == TGSI_TYPE_UNSIGNED) { 1583 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1584 } else if (stype == TGSI_TYPE_SIGNED) { 1585 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1586 } 1587 1588 return res; 1589} 1590 1591static LLVMValueRef 1592emit_fetch_temporary( 1593 struct lp_build_tgsi_context * bld_base, 1594 const struct tgsi_full_src_register * reg, 1595 enum tgsi_opcode_type stype, 1596 unsigned swizzle_in) 1597{ 1598 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1599 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1600 LLVMBuilderRef builder = gallivm->builder; 1601 LLVMValueRef res; 1602 unsigned swizzle = swizzle_in & 0xffff; 1603 1604 if (reg->Register.Indirect) { 1605 LLVMValueRef indirect_index; 1606 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */ 1607 LLVMValueRef temps_array; 1608 LLVMTypeRef fptr_type; 1609 1610 indirect_index = get_indirect_index(bld, 1611 reg->Register.File, 1612 reg->Register.Index, 1613 ®->Indirect, 1614 bld->bld_base.info->file_max[reg->Register.File]); 1615 1616 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1617 indirect_index, 1618 swizzle, 1619 TRUE); 1620 if (tgsi_type_is_64bit(stype)) { 1621 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1622 indirect_index, 1623 swizzle_in >> 16, 1624 TRUE); 1625 } 1626 1627 /* cast temps_array pointer to float* */ 1628 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1629 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1630 1631 /* Gather values from the temporary register array */ 1632 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2); 1633 } 1634 else { 1635 LLVMValueRef temp_ptr; 1636 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 1637 res = LLVMBuildLoad(builder, temp_ptr, ""); 1638 1639 if (tgsi_type_is_64bit(stype)) { 1640 LLVMValueRef temp_ptr2, res2; 1641 1642 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16); 1643 res2 = LLVMBuildLoad(builder, temp_ptr2, ""); 1644 res = emit_fetch_64bit(bld_base, stype, res, res2); 1645 } 1646 } 1647 1648 if (stype == TGSI_TYPE_SIGNED || 1649 stype == TGSI_TYPE_UNSIGNED || 1650 stype == TGSI_TYPE_DOUBLE || 1651 stype == TGSI_TYPE_SIGNED64 || 1652 stype == TGSI_TYPE_UNSIGNED64) { 1653 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1654 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1655 } 1656 1657 return res; 1658} 1659 1660static LLVMValueRef 1661emit_fetch_system_value( 1662 struct lp_build_tgsi_context * bld_base, 1663 const struct tgsi_full_src_register * reg, 1664 enum tgsi_opcode_type stype, 1665 unsigned swizzle_in) 1666{ 1667 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1668 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1669 const struct tgsi_shader_info *info = bld->bld_base.info; 1670 LLVMBuilderRef builder = gallivm->builder; 1671 LLVMValueRef res; 1672 enum tgsi_opcode_type atype; // Actual type of the value 1673 1674 assert(!reg->Register.Indirect); 1675 1676 switch (info->system_value_semantic_name[reg->Register.Index]) { 1677 case TGSI_SEMANTIC_INSTANCEID: 1678 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 1679 atype = TGSI_TYPE_UNSIGNED; 1680 break; 1681 1682 case TGSI_SEMANTIC_VERTEXID: 1683 res = bld->system_values.vertex_id; 1684 atype = TGSI_TYPE_UNSIGNED; 1685 break; 1686 1687 case TGSI_SEMANTIC_VERTEXID_NOBASE: 1688 res = bld->system_values.vertex_id_nobase; 1689 atype = TGSI_TYPE_UNSIGNED; 1690 break; 1691 1692 case TGSI_SEMANTIC_BASEVERTEX: 1693 res = bld->system_values.basevertex; 1694 atype = TGSI_TYPE_UNSIGNED; 1695 break; 1696 1697 case TGSI_SEMANTIC_PRIMID: 1698 res = bld->system_values.prim_id; 1699 atype = TGSI_TYPE_UNSIGNED; 1700 break; 1701 1702 case TGSI_SEMANTIC_INVOCATIONID: 1703 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); 1704 atype = TGSI_TYPE_UNSIGNED; 1705 break; 1706 1707 default: 1708 assert(!"unexpected semantic in emit_fetch_system_value"); 1709 res = bld_base->base.zero; 1710 atype = TGSI_TYPE_FLOAT; 1711 break; 1712 } 1713 1714 if (atype != stype) { 1715 if (stype == TGSI_TYPE_FLOAT) { 1716 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1717 } else if (stype == TGSI_TYPE_UNSIGNED) { 1718 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1719 } else if (stype == TGSI_TYPE_SIGNED) { 1720 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1721 } 1722 } 1723 1724 return res; 1725} 1726 1727/** 1728 * Register fetch with derivatives. 1729 */ 1730static void 1731emit_fetch_deriv( 1732 struct lp_build_tgsi_soa_context *bld, 1733 LLVMValueRef src, 1734 LLVMValueRef *res, 1735 LLVMValueRef *ddx, 1736 LLVMValueRef *ddy) 1737{ 1738 if (res) 1739 *res = src; 1740 1741 /* TODO: use interpolation coeffs for inputs */ 1742 1743 if (ddx) 1744 *ddx = lp_build_ddx(&bld->bld_base.base, src); 1745 1746 if (ddy) 1747 *ddy = lp_build_ddy(&bld->bld_base.base, src); 1748} 1749 1750/** 1751 * store an array of vec-length 64-bit into two arrays of vec_length floats 1752 * i.e. 1753 * value is d0, d1, d2, d3 etc. 1754 * each 64-bit has high and low pieces x, y 1755 * so gets stored into the separate channels as: 1756 * chan_ptr = d0.x, d1.x, d2.x, d3.x 1757 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y 1758 */ 1759static void 1760emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, 1761 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, 1762 LLVMValueRef value) 1763{ 1764 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1765 struct gallivm_state *gallivm = bld_base->base.gallivm; 1766 LLVMBuilderRef builder = gallivm->builder; 1767 struct lp_build_context *float_bld = &bld_base->base; 1768 unsigned i; 1769 LLVMValueRef temp, temp2; 1770 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; 1771 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; 1772 1773 for (i = 0; i < bld_base->base.type.length; i++) { 1774 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 1775 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 1776 } 1777 1778 temp = LLVMBuildShuffleVector(builder, value, 1779 LLVMGetUndef(LLVMTypeOf(value)), 1780 LLVMConstVector(shuffles, 1781 bld_base->base.type.length), 1782 ""); 1783 temp2 = LLVMBuildShuffleVector(builder, value, 1784 LLVMGetUndef(LLVMTypeOf(value)), 1785 LLVMConstVector(shuffles2, 1786 bld_base->base.type.length), 1787 ""); 1788 1789 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); 1790 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); 1791} 1792 1793/** 1794 * Register store. 1795 */ 1796static void 1797emit_store_chan( 1798 struct lp_build_tgsi_context *bld_base, 1799 const struct tgsi_full_instruction *inst, 1800 unsigned index, 1801 unsigned chan_index, 1802 LLVMValueRef value) 1803{ 1804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1805 struct gallivm_state *gallivm = bld_base->base.gallivm; 1806 LLVMBuilderRef builder = gallivm->builder; 1807 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 1808 struct lp_build_context *float_bld = &bld_base->base; 1809 struct lp_build_context *int_bld = &bld_base->int_bld; 1810 LLVMValueRef indirect_index = NULL; 1811 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1812 1813 /* 1814 * Apply saturation. 1815 * 1816 * It is always assumed to be float. 1817 */ 1818 if (inst->Instruction.Saturate) { 1819 assert(dtype == TGSI_TYPE_FLOAT || 1820 dtype == TGSI_TYPE_UNTYPED); 1821 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1822 value = lp_build_clamp_zero_one_nanzero(float_bld, value); 1823 } 1824 1825 if (reg->Register.Indirect) { 1826 /* 1827 * Currently the mesa/st doesn't generate indirect stores 1828 * to 64-bit values, it normally uses MOV to do indirect stores. 1829 */ 1830 assert(!tgsi_type_is_64bit(dtype)); 1831 indirect_index = get_indirect_index(bld, 1832 reg->Register.File, 1833 reg->Register.Index, 1834 ®->Indirect, 1835 bld->bld_base.info->file_max[reg->Register.File]); 1836 } else { 1837 assert(reg->Register.Index <= 1838 bld_base->info->file_max[reg->Register.File]); 1839 } 1840 1841 if (DEBUG_EXECUTION) { 1842 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); 1843 } 1844 1845 switch( reg->Register.File ) { 1846 case TGSI_FILE_OUTPUT: 1847 /* Outputs are always stored as floats */ 1848 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1849 1850 if (reg->Register.Indirect) { 1851 LLVMValueRef index_vec; /* indexes into the output registers */ 1852 LLVMValueRef outputs_array; 1853 LLVMTypeRef fptr_type; 1854 1855 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1856 indirect_index, 1857 chan_index, 1858 TRUE); 1859 1860 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1861 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); 1862 1863 /* Scatter store values into output registers */ 1864 emit_mask_scatter(bld, outputs_array, index_vec, value, 1865 &bld->exec_mask); 1866 } 1867 else { 1868 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1869 chan_index); 1870 1871 if (tgsi_type_is_64bit(dtype)) { 1872 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 1873 chan_index + 1); 1874 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2, 1875 value); 1876 } else 1877 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr); 1878 } 1879 break; 1880 1881 case TGSI_FILE_TEMPORARY: 1882 /* Temporaries are always stored as floats */ 1883 if (!tgsi_type_is_64bit(dtype)) 1884 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1885 else 1886 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); 1887 1888 if (reg->Register.Indirect) { 1889 LLVMValueRef index_vec; /* indexes into the temp registers */ 1890 LLVMValueRef temps_array; 1891 LLVMTypeRef fptr_type; 1892 1893 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1894 indirect_index, 1895 chan_index, 1896 TRUE); 1897 1898 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1899 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1900 1901 /* Scatter store values into temp registers */ 1902 emit_mask_scatter(bld, temps_array, index_vec, value, 1903 &bld->exec_mask); 1904 } 1905 else { 1906 LLVMValueRef temp_ptr; 1907 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); 1908 1909 if (tgsi_type_is_64bit(dtype)) { 1910 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld, 1911 reg->Register.Index, 1912 chan_index + 1); 1913 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2, 1914 value); 1915 } 1916 else 1917 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr); 1918 } 1919 break; 1920 1921 case TGSI_FILE_ADDRESS: 1922 assert(dtype == TGSI_TYPE_SIGNED); 1923 assert(LLVMTypeOf(value) == int_bld->vec_type); 1924 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); 1925 lp_exec_mask_store(&bld->exec_mask, int_bld, value, 1926 bld->addr[reg->Register.Index][chan_index]); 1927 break; 1928 1929 default: 1930 assert( 0 ); 1931 } 1932 1933 (void)dtype; 1934} 1935 1936/* 1937 * Called at the beginning of the translation of each TGSI instruction, to 1938 * emit some debug code. 1939 */ 1940static void 1941emit_debug( 1942 struct lp_build_tgsi_context * bld_base, 1943 const struct tgsi_full_instruction * inst, 1944 const struct tgsi_opcode_info * info) 1945 1946{ 1947 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1948 1949 if (DEBUG_EXECUTION) { 1950 /* 1951 * Dump the TGSI instruction. 1952 */ 1953 1954 struct gallivm_state *gallivm = bld_base->base.gallivm; 1955 char buf[512]; 1956 buf[0] = '$'; 1957 buf[1] = ' '; 1958 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); 1959 lp_build_printf(gallivm, buf); 1960 1961 /* Dump the execution mask. 1962 */ 1963 if (bld->exec_mask.has_mask) { 1964 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); 1965 } 1966 } 1967} 1968 1969static void 1970emit_store( 1971 struct lp_build_tgsi_context * bld_base, 1972 const struct tgsi_full_instruction * inst, 1973 const struct tgsi_opcode_info * info, 1974 unsigned index, 1975 LLVMValueRef dst[4]) 1976 1977{ 1978 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1979 1980 unsigned writemask = inst->Dst[index].Register.WriteMask; 1981 while (writemask) { 1982 unsigned chan_index = u_bit_scan(&writemask); 1983 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 1984 continue; 1985 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]); 1986 } 1987} 1988 1989static unsigned 1990tgsi_to_pipe_tex_target(unsigned tgsi_target) 1991{ 1992 switch (tgsi_target) { 1993 case TGSI_TEXTURE_BUFFER: 1994 return PIPE_BUFFER; 1995 case TGSI_TEXTURE_1D: 1996 case TGSI_TEXTURE_SHADOW1D: 1997 return PIPE_TEXTURE_1D; 1998 case TGSI_TEXTURE_2D: 1999 case TGSI_TEXTURE_SHADOW2D: 2000 case TGSI_TEXTURE_2D_MSAA: 2001 return PIPE_TEXTURE_2D; 2002 case TGSI_TEXTURE_3D: 2003 return PIPE_TEXTURE_3D; 2004 case TGSI_TEXTURE_CUBE: 2005 case TGSI_TEXTURE_SHADOWCUBE: 2006 return PIPE_TEXTURE_CUBE; 2007 case TGSI_TEXTURE_RECT: 2008 case TGSI_TEXTURE_SHADOWRECT: 2009 return PIPE_TEXTURE_RECT; 2010 case TGSI_TEXTURE_1D_ARRAY: 2011 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2012 return PIPE_TEXTURE_1D_ARRAY; 2013 case TGSI_TEXTURE_2D_ARRAY: 2014 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2015 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2016 return PIPE_TEXTURE_2D_ARRAY; 2017 case TGSI_TEXTURE_CUBE_ARRAY: 2018 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2019 return PIPE_TEXTURE_CUBE_ARRAY; 2020 default: 2021 assert(0); 2022 return PIPE_BUFFER; 2023 } 2024} 2025 2026 2027static enum lp_sampler_lod_property 2028lp_build_lod_property( 2029 struct lp_build_tgsi_context *bld_base, 2030 const struct tgsi_full_instruction *inst, 2031 unsigned src_op) 2032{ 2033 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 2034 enum lp_sampler_lod_property lod_property; 2035 2036 /* 2037 * Not much we can do here. We could try catching inputs declared 2038 * with constant interpolation but not sure it's worth it - since for 2039 * TEX opcodes as well as FETCH/LD the lod comes from same reg as 2040 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just 2041 * like the constant/immediate recognition below. 2042 * What seems to be of more value would be to recognize temps holding 2043 * broadcasted scalars but no way we can do it. 2044 * Tried asking llvm but without any success (using LLVMIsConstant 2045 * even though this isn't exactly what we'd need), even as simple as 2046 * IMM[0] UINT32 (0,-1,0,0) 2047 * MOV TEMP[0] IMM[0].yyyy 2048 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] 2049 * doesn't work. 2050 * This means there's ZERO chance this will ever catch a scalar lod 2051 * with traditional tex opcodes as well as texel fetches, since the lod 2052 * comes from the same reg as coords (except some test shaders using 2053 * constant coords maybe). 2054 * There's at least hope for sample opcodes as well as size queries. 2055 */ 2056 if (reg->Register.File == TGSI_FILE_CONSTANT || 2057 reg->Register.File == TGSI_FILE_IMMEDIATE) { 2058 lod_property = LP_SAMPLER_LOD_SCALAR; 2059 } 2060 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) { 2061 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2062 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2063 } 2064 else { 2065 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2066 } 2067 } 2068 else { 2069 /* never use scalar (per-quad) lod the results are just too wrong. */ 2070 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2071 } 2072 return lod_property; 2073} 2074 2075 2076/** 2077 * High-level instruction translators. 2078 */ 2079 2080static void 2081emit_tex( struct lp_build_tgsi_soa_context *bld, 2082 const struct tgsi_full_instruction *inst, 2083 enum lp_build_tex_modifier modifier, 2084 LLVMValueRef *texel, 2085 unsigned sampler_reg, 2086 enum lp_sampler_op_type sampler_op) 2087{ 2088 unsigned unit = inst->Src[sampler_reg].Register.Index; 2089 LLVMValueRef oow = NULL; 2090 LLVMValueRef lod = NULL; 2091 LLVMValueRef coords[5]; 2092 LLVMValueRef offsets[3] = { NULL }; 2093 struct lp_derivatives derivs; 2094 struct lp_sampler_params params; 2095 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2096 unsigned num_derivs, num_offsets, i; 2097 unsigned shadow_coord = 0; 2098 unsigned layer_coord = 0; 2099 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; 2100 2101 memset(¶ms, 0, sizeof(params)); 2102 2103 if (!bld->sampler) { 2104 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2105 for (i = 0; i < 4; i++) { 2106 texel[i] = bld->bld_base.base.undef; 2107 } 2108 return; 2109 } 2110 2111 switch (inst->Texture.Texture) { 2112 case TGSI_TEXTURE_1D_ARRAY: 2113 layer_coord = 1; 2114 /* fallthrough */ 2115 case TGSI_TEXTURE_1D: 2116 num_offsets = 1; 2117 num_derivs = 1; 2118 break; 2119 case TGSI_TEXTURE_2D_ARRAY: 2120 layer_coord = 2; 2121 /* fallthrough */ 2122 case TGSI_TEXTURE_2D: 2123 case TGSI_TEXTURE_RECT: 2124 num_offsets = 2; 2125 num_derivs = 2; 2126 break; 2127 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2128 layer_coord = 1; 2129 /* fallthrough */ 2130 case TGSI_TEXTURE_SHADOW1D: 2131 shadow_coord = 2; 2132 num_offsets = 1; 2133 num_derivs = 1; 2134 break; 2135 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2136 layer_coord = 2; 2137 shadow_coord = 3; 2138 num_offsets = 2; 2139 num_derivs = 2; 2140 break; 2141 case TGSI_TEXTURE_SHADOW2D: 2142 case TGSI_TEXTURE_SHADOWRECT: 2143 shadow_coord = 2; 2144 num_offsets = 2; 2145 num_derivs = 2; 2146 break; 2147 case TGSI_TEXTURE_CUBE: 2148 num_offsets = 2; 2149 num_derivs = 3; 2150 break; 2151 case TGSI_TEXTURE_3D: 2152 num_offsets = 3; 2153 num_derivs = 3; 2154 break; 2155 case TGSI_TEXTURE_SHADOWCUBE: 2156 shadow_coord = 3; 2157 num_offsets = 2; 2158 num_derivs = 3; 2159 break; 2160 case TGSI_TEXTURE_CUBE_ARRAY: 2161 num_offsets = 2; 2162 num_derivs = 3; 2163 layer_coord = 3; 2164 break; 2165 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2166 num_offsets = 2; 2167 num_derivs = 3; 2168 layer_coord = 3; 2169 shadow_coord = 4; /* shadow coord special different reg */ 2170 break; 2171 case TGSI_TEXTURE_2D_MSAA: 2172 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2173 default: 2174 assert(0); 2175 return; 2176 } 2177 2178 /* Note lod and especially projected are illegal in a LOT of cases */ 2179 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2180 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2181 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 2182 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { 2183 /* note that shadow cube array with bias/explicit lod does not exist */ 2184 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2185 } 2186 else { 2187 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2188 } 2189 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2190 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2191 } 2192 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2193 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2194 } 2195 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2196 } 2197 2198 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 2199 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2200 oow = lp_build_rcp(&bld->bld_base.base, oow); 2201 } 2202 2203 for (i = 0; i < num_derivs; i++) { 2204 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2205 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2206 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 2207 } 2208 for (i = num_derivs; i < 5; i++) { 2209 coords[i] = bld->bld_base.base.undef; 2210 } 2211 2212 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2213 if (layer_coord) { 2214 if (layer_coord == 3) { 2215 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2216 } 2217 else { 2218 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2219 } 2220 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2221 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); 2222 } 2223 /* Shadow coord occupies always 5th slot. */ 2224 if (shadow_coord) { 2225 sample_key |= LP_SAMPLER_SHADOW; 2226 if (shadow_coord == 4) { 2227 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2228 } 2229 else { 2230 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); 2231 } 2232 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2233 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); 2234 } 2235 2236 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2237 unsigned dim; 2238 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2239 for (dim = 0; dim < num_derivs; ++dim) { 2240 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); 2241 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); 2242 } 2243 params.derivs = &derivs; 2244 /* 2245 * could also check all src regs if constant but I doubt such 2246 * cases exist in practice. 2247 */ 2248 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2249 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2250 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2251 } 2252 else { 2253 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2254 } 2255 } 2256 else { 2257 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2258 } 2259 } 2260 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2261 2262 /* we don't handle the 4 offset version of tg4 */ 2263 if (inst->Texture.NumOffsets == 1) { 2264 unsigned dim; 2265 sample_key |= LP_SAMPLER_OFFSETS; 2266 for (dim = 0; dim < num_offsets; dim++) { 2267 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2268 } 2269 } 2270 2271 params.type = bld->bld_base.base.type; 2272 params.sample_key = sample_key; 2273 params.texture_index = unit; 2274 params.sampler_index = unit; 2275 params.context_ptr = bld->context_ptr; 2276 params.thread_data_ptr = bld->thread_data_ptr; 2277 params.coords = coords; 2278 params.offsets = offsets; 2279 params.lod = lod; 2280 params.texel = texel; 2281 2282 bld->sampler->emit_tex_sample(bld->sampler, 2283 bld->bld_base.base.gallivm, 2284 ¶ms); 2285} 2286 2287static void 2288emit_sample(struct lp_build_tgsi_soa_context *bld, 2289 const struct tgsi_full_instruction *inst, 2290 enum lp_build_tex_modifier modifier, 2291 boolean compare, 2292 enum lp_sampler_op_type sample_type, 2293 LLVMValueRef *texel) 2294{ 2295 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2296 unsigned texture_unit, sampler_unit; 2297 LLVMValueRef lod = NULL; 2298 LLVMValueRef coords[5]; 2299 LLVMValueRef offsets[3] = { NULL }; 2300 struct lp_derivatives derivs; 2301 struct lp_sampler_params params; 2302 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2303 2304 unsigned num_offsets, num_derivs, i; 2305 unsigned layer_coord = 0; 2306 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT; 2307 2308 memset(¶ms, 0, sizeof(params)); 2309 2310 if (!bld->sampler) { 2311 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2312 for (i = 0; i < 4; i++) { 2313 texel[i] = bld->bld_base.base.undef; 2314 } 2315 return; 2316 } 2317 2318 /* 2319 * unlike old-style tex opcodes the texture/sampler indices 2320 * always come from src1 and src2 respectively. 2321 */ 2322 texture_unit = inst->Src[1].Register.Index; 2323 sampler_unit = inst->Src[2].Register.Index; 2324 2325 /* 2326 * Note inst->Texture.Texture will contain the number of offsets, 2327 * however the target information is NOT there and comes from the 2328 * declared sampler views instead. 2329 */ 2330 switch (bld->sv[texture_unit].Resource) { 2331 case TGSI_TEXTURE_1D: 2332 num_offsets = 1; 2333 num_derivs = 1; 2334 break; 2335 case TGSI_TEXTURE_1D_ARRAY: 2336 layer_coord = 1; 2337 num_offsets = 1; 2338 num_derivs = 1; 2339 break; 2340 case TGSI_TEXTURE_2D: 2341 case TGSI_TEXTURE_RECT: 2342 num_offsets = 2; 2343 num_derivs = 2; 2344 break; 2345 case TGSI_TEXTURE_2D_ARRAY: 2346 layer_coord = 2; 2347 num_offsets = 2; 2348 num_derivs = 2; 2349 break; 2350 case TGSI_TEXTURE_CUBE: 2351 num_offsets = 2; 2352 num_derivs = 3; 2353 break; 2354 case TGSI_TEXTURE_3D: 2355 num_offsets = 3; 2356 num_derivs = 3; 2357 break; 2358 case TGSI_TEXTURE_CUBE_ARRAY: 2359 layer_coord = 3; 2360 num_offsets = 2; 2361 num_derivs = 3; 2362 break; 2363 default: 2364 assert(0); 2365 return; 2366 } 2367 2368 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2369 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2370 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2371 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2372 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2373 } 2374 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2375 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2376 } 2377 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2378 } 2379 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { 2380 /* XXX might be better to explicitly pass the level zero information */ 2381 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2382 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); 2383 } 2384 2385 for (i = 0; i < num_derivs; i++) { 2386 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2387 } 2388 for (i = num_derivs; i < 5; i++) { 2389 coords[i] = bld->bld_base.base.undef; 2390 } 2391 2392 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2393 if (layer_coord) { 2394 if (layer_coord == 3) 2395 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2396 else 2397 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2398 } 2399 /* Shadow coord occupies always 5th slot. */ 2400 if (compare) { 2401 sample_key |= LP_SAMPLER_SHADOW; 2402 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2403 } 2404 2405 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2406 unsigned dim; 2407 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2408 for (dim = 0; dim < num_derivs; ++dim) { 2409 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); 2410 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); 2411 } 2412 params.derivs = &derivs; 2413 /* 2414 * could also check all src regs if constant but I doubt such 2415 * cases exist in practice. 2416 */ 2417 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2418 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2419 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2420 } 2421 else { 2422 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2423 } 2424 } 2425 else { 2426 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2427 } 2428 } 2429 2430 /* some advanced gather instructions (txgo) would require 4 offsets */ 2431 if (inst->Texture.NumOffsets == 1) { 2432 unsigned dim; 2433 sample_key |= LP_SAMPLER_OFFSETS; 2434 for (dim = 0; dim < num_offsets; dim++) { 2435 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2436 } 2437 } 2438 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2439 2440 params.type = bld->bld_base.base.type; 2441 params.sample_key = sample_key; 2442 params.texture_index = texture_unit; 2443 params.sampler_index = sampler_unit; 2444 params.context_ptr = bld->context_ptr; 2445 params.thread_data_ptr = bld->thread_data_ptr; 2446 params.coords = coords; 2447 params.offsets = offsets; 2448 params.lod = lod; 2449 params.texel = texel; 2450 2451 bld->sampler->emit_tex_sample(bld->sampler, 2452 bld->bld_base.base.gallivm, 2453 ¶ms); 2454 2455 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2456 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2457 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2458 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) { 2459 unsigned char swizzles[4]; 2460 swizzles[0] = inst->Src[1].Register.SwizzleX; 2461 swizzles[1] = inst->Src[1].Register.SwizzleY; 2462 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2463 swizzles[3] = inst->Src[1].Register.SwizzleW; 2464 2465 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2466 } 2467} 2468 2469static void 2470emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, 2471 const struct tgsi_full_instruction *inst, 2472 LLVMValueRef *texel, 2473 boolean is_samplei) 2474{ 2475 unsigned unit, target; 2476 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 2477 LLVMValueRef explicit_lod = NULL; 2478 LLVMValueRef coords[5]; 2479 LLVMValueRef offsets[3] = { NULL }; 2480 struct lp_sampler_params params; 2481 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2482 unsigned dims, i; 2483 unsigned layer_coord = 0; 2484 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; 2485 2486 memset(¶ms, 0, sizeof(params)); 2487 2488 if (!bld->sampler) { 2489 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2490 for (i = 0; i < 4; i++) { 2491 texel[i] = coord_undef; 2492 } 2493 return; 2494 } 2495 2496 unit = inst->Src[1].Register.Index; 2497 2498 if (is_samplei) { 2499 target = bld->sv[unit].Resource; 2500 } 2501 else { 2502 target = inst->Texture.Texture; 2503 } 2504 2505 switch (target) { 2506 case TGSI_TEXTURE_1D: 2507 case TGSI_TEXTURE_BUFFER: 2508 dims = 1; 2509 break; 2510 case TGSI_TEXTURE_1D_ARRAY: 2511 layer_coord = 1; 2512 dims = 1; 2513 break; 2514 case TGSI_TEXTURE_2D: 2515 case TGSI_TEXTURE_RECT: 2516 case TGSI_TEXTURE_2D_MSAA: 2517 dims = 2; 2518 break; 2519 case TGSI_TEXTURE_2D_ARRAY: 2520 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2521 layer_coord = 2; 2522 dims = 2; 2523 break; 2524 case TGSI_TEXTURE_3D: 2525 dims = 3; 2526 break; 2527 default: 2528 assert(0); 2529 return; 2530 } 2531 2532 /* always have lod except for buffers and msaa targets ? */ 2533 if (target != TGSI_TEXTURE_BUFFER && 2534 target != TGSI_TEXTURE_2D_MSAA && 2535 target != TGSI_TEXTURE_2D_ARRAY_MSAA) { 2536 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2537 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2538 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2539 } 2540 /* 2541 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) 2542 * would be the sample index. 2543 */ 2544 2545 for (i = 0; i < dims; i++) { 2546 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2547 } 2548 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ 2549 for (i = dims; i < 5; i++) { 2550 coords[i] = coord_undef; 2551 } 2552 if (layer_coord) 2553 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2554 2555 if (inst->Texture.NumOffsets == 1) { 2556 unsigned dim; 2557 sample_key |= LP_SAMPLER_OFFSETS; 2558 for (dim = 0; dim < dims; dim++) { 2559 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2560 } 2561 } 2562 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2563 2564 params.type = bld->bld_base.base.type; 2565 params.sample_key = sample_key; 2566 params.texture_index = unit; 2567 /* 2568 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS 2569 * and trigger some assertions with d3d10 where the sampler view number 2570 * can exceed this. 2571 */ 2572 params.sampler_index = 0; 2573 params.context_ptr = bld->context_ptr; 2574 params.thread_data_ptr = bld->thread_data_ptr; 2575 params.coords = coords; 2576 params.offsets = offsets; 2577 params.derivs = NULL; 2578 params.lod = explicit_lod; 2579 params.texel = texel; 2580 2581 bld->sampler->emit_tex_sample(bld->sampler, 2582 bld->bld_base.base.gallivm, 2583 ¶ms); 2584 2585 if (is_samplei && 2586 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2587 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2588 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2589 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) { 2590 unsigned char swizzles[4]; 2591 swizzles[0] = inst->Src[1].Register.SwizzleX; 2592 swizzles[1] = inst->Src[1].Register.SwizzleY; 2593 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2594 swizzles[3] = inst->Src[1].Register.SwizzleW; 2595 2596 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2597 } 2598} 2599 2600static void 2601emit_size_query( struct lp_build_tgsi_soa_context *bld, 2602 const struct tgsi_full_instruction *inst, 2603 LLVMValueRef *sizes_out, 2604 boolean is_sviewinfo) 2605{ 2606 LLVMValueRef explicit_lod; 2607 enum lp_sampler_lod_property lod_property; 2608 unsigned has_lod; 2609 unsigned i; 2610 unsigned unit = inst->Src[1].Register.Index; 2611 unsigned target, pipe_target; 2612 struct lp_sampler_size_query_params params; 2613 2614 if (is_sviewinfo) { 2615 target = bld->sv[unit].Resource; 2616 } 2617 else { 2618 target = inst->Texture.Texture; 2619 } 2620 switch (target) { 2621 case TGSI_TEXTURE_BUFFER: 2622 case TGSI_TEXTURE_RECT: 2623 case TGSI_TEXTURE_SHADOWRECT: 2624 has_lod = 0; 2625 break; 2626 default: 2627 has_lod = 1; 2628 break; 2629 } 2630 2631 if (!bld->sampler) { 2632 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 2633 for (i = 0; i < 4; i++) 2634 sizes_out[i] = bld->bld_base.int_bld.undef; 2635 return; 2636 } 2637 2638 if (has_lod) { 2639 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); 2640 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2641 } 2642 else { 2643 explicit_lod = NULL; 2644 lod_property = LP_SAMPLER_LOD_SCALAR; 2645 } 2646 2647 2648 pipe_target = tgsi_to_pipe_tex_target(target); 2649 2650 params.int_type = bld->bld_base.int_bld.type; 2651 params.texture_unit = unit; 2652 params.target = pipe_target; 2653 params.context_ptr = bld->context_ptr; 2654 params.is_sviewinfo = TRUE; 2655 params.lod_property = lod_property; 2656 params.explicit_lod = explicit_lod; 2657 params.sizes_out = sizes_out; 2658 2659 bld->sampler->emit_size_query(bld->sampler, 2660 bld->bld_base.base.gallivm, 2661 ¶ms); 2662} 2663 2664static boolean 2665near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 2666 int pc) 2667{ 2668 unsigned i; 2669 2670 for (i = 0; i < 5; i++) { 2671 enum tgsi_opcode opcode; 2672 2673 if (pc + i >= bld->bld_base.info->num_instructions) 2674 return TRUE; 2675 2676 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 2677 2678 if (opcode == TGSI_OPCODE_END) 2679 return TRUE; 2680 2681 if (opcode == TGSI_OPCODE_TEX || 2682 opcode == TGSI_OPCODE_TXP || 2683 opcode == TGSI_OPCODE_TXD || 2684 opcode == TGSI_OPCODE_TXB || 2685 opcode == TGSI_OPCODE_TXL || 2686 opcode == TGSI_OPCODE_TXF || 2687 opcode == TGSI_OPCODE_TXQ || 2688 opcode == TGSI_OPCODE_TEX2 || 2689 opcode == TGSI_OPCODE_TXB2 || 2690 opcode == TGSI_OPCODE_TXL2 || 2691 opcode == TGSI_OPCODE_SAMPLE || 2692 opcode == TGSI_OPCODE_SAMPLE_B || 2693 opcode == TGSI_OPCODE_SAMPLE_C || 2694 opcode == TGSI_OPCODE_SAMPLE_C_LZ || 2695 opcode == TGSI_OPCODE_SAMPLE_D || 2696 opcode == TGSI_OPCODE_SAMPLE_I || 2697 opcode == TGSI_OPCODE_SAMPLE_I_MS || 2698 opcode == TGSI_OPCODE_SAMPLE_L || 2699 opcode == TGSI_OPCODE_SVIEWINFO || 2700 opcode == TGSI_OPCODE_CAL || 2701 opcode == TGSI_OPCODE_IF || 2702 opcode == TGSI_OPCODE_UIF || 2703 opcode == TGSI_OPCODE_BGNLOOP || 2704 opcode == TGSI_OPCODE_SWITCH) 2705 return FALSE; 2706 } 2707 2708 return TRUE; 2709} 2710 2711 2712 2713/** 2714 * Kill fragment if any of the src register values are negative. 2715 */ 2716static void 2717emit_kill_if( 2718 struct lp_build_tgsi_soa_context *bld, 2719 const struct tgsi_full_instruction *inst, 2720 int pc) 2721{ 2722 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2723 const struct tgsi_full_src_register *reg = &inst->Src[0]; 2724 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 2725 LLVMValueRef mask; 2726 unsigned chan_index; 2727 2728 memset(&terms, 0, sizeof terms); 2729 2730 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2731 unsigned swizzle; 2732 2733 /* Unswizzle channel */ 2734 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 2735 2736 /* Check if the component has not been already tested. */ 2737 assert(swizzle < TGSI_NUM_CHANNELS); 2738 if( !terms[swizzle] ) 2739 /* TODO: change the comparison operator instead of setting the sign */ 2740 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 2741 } 2742 2743 mask = NULL; 2744 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2745 if(terms[chan_index]) { 2746 LLVMValueRef chan_mask; 2747 2748 /* 2749 * If term < 0 then mask = 0 else mask = ~0. 2750 */ 2751 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 2752 2753 if(mask) 2754 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 2755 else 2756 mask = chan_mask; 2757 } 2758 } 2759 2760 if (bld->exec_mask.has_mask) { 2761 LLVMValueRef invmask; 2762 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2763 mask = LLVMBuildOr(builder, mask, invmask, ""); 2764 } 2765 2766 lp_build_mask_update(bld->mask, mask); 2767 if (!near_end_of_shader(bld, pc)) 2768 lp_build_mask_check(bld->mask); 2769} 2770 2771 2772/** 2773 * Unconditional fragment kill. 2774 * The only predication is the execution mask which will apply if 2775 * we're inside a loop or conditional. 2776 */ 2777static void 2778emit_kill(struct lp_build_tgsi_soa_context *bld, 2779 int pc) 2780{ 2781 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2782 LLVMValueRef mask; 2783 2784 /* For those channels which are "alive", disable fragment shader 2785 * execution. 2786 */ 2787 if (bld->exec_mask.has_mask) { 2788 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2789 } 2790 else { 2791 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 2792 mask = zero; 2793 } 2794 2795 lp_build_mask_update(bld->mask, mask); 2796 2797 if (!near_end_of_shader(bld, pc)) 2798 lp_build_mask_check(bld->mask); 2799} 2800 2801 2802/** 2803 * Emit code which will dump the value of all the temporary registers 2804 * to stdout. 2805 */ 2806static void 2807emit_dump_file(struct lp_build_tgsi_soa_context *bld, 2808 unsigned file) 2809{ 2810 const struct tgsi_shader_info *info = bld->bld_base.info; 2811 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2812 LLVMBuilderRef builder = gallivm->builder; 2813 LLVMValueRef reg_ptr; 2814 int index; 2815 int max_index = info->file_max[file]; 2816 2817 /* 2818 * Some register files, particularly constants, can be very large, 2819 * and dumping everything could make this unusably slow. 2820 */ 2821 max_index = MIN2(max_index, 32); 2822 2823 for (index = 0; index <= max_index; index++) { 2824 LLVMValueRef res; 2825 unsigned mask; 2826 int chan; 2827 2828 if (index < 8 * sizeof(unsigned) && 2829 (info->file_mask[file] & (1u << index)) == 0) { 2830 /* This was not declared.*/ 2831 continue; 2832 } 2833 2834 if (file == TGSI_FILE_INPUT) { 2835 mask = info->input_usage_mask[index]; 2836 } else { 2837 mask = TGSI_WRITEMASK_XYZW; 2838 } 2839 2840 for (chan = 0; chan < 4; chan++) { 2841 if ((mask & (1 << chan)) == 0) { 2842 /* This channel is not used.*/ 2843 continue; 2844 } 2845 2846 if (file == TGSI_FILE_CONSTANT) { 2847 struct tgsi_full_src_register reg; 2848 memset(®, 0, sizeof reg); 2849 reg.Register.File = file; 2850 reg.Register.Index = index; 2851 reg.Register.SwizzleX = 0; 2852 reg.Register.SwizzleY = 1; 2853 reg.Register.SwizzleZ = 2; 2854 reg.Register.SwizzleW = 3; 2855 2856 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); 2857 if (!res) { 2858 continue; 2859 } 2860 } else if (file == TGSI_FILE_INPUT) { 2861 res = bld->inputs[index][chan]; 2862 if (!res) { 2863 continue; 2864 } 2865 } else if (file == TGSI_FILE_TEMPORARY) { 2866 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); 2867 assert(reg_ptr); 2868 res = LLVMBuildLoad(builder, reg_ptr, ""); 2869 } else if (file == TGSI_FILE_OUTPUT) { 2870 reg_ptr = lp_get_output_ptr(bld, index, chan); 2871 assert(reg_ptr); 2872 res = LLVMBuildLoad(builder, reg_ptr, ""); 2873 } else { 2874 assert(0); 2875 continue; 2876 } 2877 2878 emit_dump_reg(gallivm, file, index, chan, res); 2879 } 2880 } 2881} 2882 2883 2884 2885void 2886lp_emit_declaration_soa( 2887 struct lp_build_tgsi_context *bld_base, 2888 const struct tgsi_full_declaration *decl) 2889{ 2890 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2891 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2892 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 2893 const unsigned first = decl->Range.First; 2894 const unsigned last = decl->Range.Last; 2895 unsigned idx, i; 2896 2897 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 2898 2899 switch (decl->Declaration.File) { 2900 case TGSI_FILE_TEMPORARY: 2901 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 2902 assert(last < LP_MAX_INLINED_TEMPS); 2903 for (idx = first; idx <= last; ++idx) { 2904 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2905 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 2906 } 2907 } 2908 break; 2909 2910 case TGSI_FILE_OUTPUT: 2911 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 2912 for (idx = first; idx <= last; ++idx) { 2913 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2914 bld->outputs[idx][i] = lp_build_alloca(gallivm, 2915 vec_type, "output"); 2916 } 2917 } 2918 break; 2919 2920 case TGSI_FILE_ADDRESS: 2921 /* ADDR registers are only allocated with an integer LLVM IR type, 2922 * as they are guaranteed to always have integers. 2923 * XXX: Not sure if this exception is worthwhile (or the whole idea of 2924 * an ADDR register for that matter). 2925 */ 2926 assert(last < LP_MAX_TGSI_ADDRS); 2927 for (idx = first; idx <= last; ++idx) { 2928 assert(idx < LP_MAX_TGSI_ADDRS); 2929 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2930 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 2931 } 2932 break; 2933 2934 case TGSI_FILE_SAMPLER_VIEW: 2935 /* 2936 * The target stored here MUST match whatever there actually 2937 * is in the set sampler views (what about return type?). 2938 */ 2939 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); 2940 for (idx = first; idx <= last; ++idx) { 2941 bld->sv[idx] = decl->SamplerView; 2942 } 2943 break; 2944 2945 case TGSI_FILE_CONSTANT: 2946 { 2947 /* 2948 * We could trivially fetch the per-buffer pointer when fetching the 2949 * constant, relying on llvm to figure out it's always the same pointer 2950 * anyway. However, doing so results in a huge (more than factor of 10) 2951 * slowdown in llvm compilation times for some (but not all) shaders 2952 * (more specifically, the IR optimization spends way more time in 2953 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. 2954 */ 2955 unsigned idx2D = decl->Dim.Index2D; 2956 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); 2957 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); 2958 bld->consts[idx2D] = 2959 lp_build_array_get(gallivm, bld->consts_ptr, index2D); 2960 bld->consts_sizes[idx2D] = 2961 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); 2962 } 2963 break; 2964 2965 default: 2966 /* don't need to declare other vars */ 2967 break; 2968 } 2969} 2970 2971 2972void lp_emit_immediate_soa( 2973 struct lp_build_tgsi_context *bld_base, 2974 const struct tgsi_full_immediate *imm) 2975{ 2976 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2977 struct gallivm_state * gallivm = bld_base->base.gallivm; 2978 LLVMValueRef imms[4]; 2979 unsigned i; 2980 const uint size = imm->Immediate.NrTokens - 1; 2981 assert(size <= 4); 2982 switch (imm->Immediate.DataType) { 2983 case TGSI_IMM_FLOAT32: 2984 for( i = 0; i < size; ++i ) 2985 imms[i] = 2986 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 2987 2988 break; 2989 case TGSI_IMM_FLOAT64: 2990 case TGSI_IMM_UINT64: 2991 case TGSI_IMM_INT64: 2992 case TGSI_IMM_UINT32: 2993 for( i = 0; i < size; ++i ) { 2994 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 2995 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 2996 } 2997 2998 break; 2999 case TGSI_IMM_INT32: 3000 for( i = 0; i < size; ++i ) { 3001 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 3002 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 3003 } 3004 3005 break; 3006 } 3007 for( i = size; i < 4; ++i ) 3008 imms[i] = bld_base->base.undef; 3009 3010 if (bld->use_immediates_array) { 3011 unsigned index = bld->num_immediates; 3012 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3013 LLVMBuilderRef builder = gallivm->builder; 3014 LLVMValueRef gep[2]; 3015 gep[0] = lp_build_const_int32(gallivm, 0); 3016 3017 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); 3018 for (i = 0; i < 4; ++i ) { 3019 gep[1] = lp_build_const_int32(gallivm, index * 4 + i); 3020 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3021 bld->imms_array, gep, 2, ""); 3022 LLVMBuildStore(builder, imms[i], imm_ptr); 3023 } 3024 } else { 3025 /* simply copy the immediate values into the next immediates[] slot */ 3026 unsigned i; 3027 assert(imm->Immediate.NrTokens - 1 <= 4); 3028 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); 3029 3030 for(i = 0; i < 4; ++i ) 3031 bld->immediates[bld->num_immediates][i] = imms[i]; 3032 3033 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3034 unsigned index = bld->num_immediates; 3035 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3036 LLVMBuilderRef builder = gallivm->builder; 3037 LLVMValueRef gep[2]; 3038 gep[0] = lp_build_const_int32(gallivm, 0); 3039 for (i = 0; i < 4; ++i ) { 3040 gep[1] = lp_build_const_int32(gallivm, index * 4 + i); 3041 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3042 bld->imms_array, gep, 2, ""); 3043 LLVMBuildStore(builder, 3044 bld->immediates[index][i], 3045 imm_ptr); 3046 } 3047 } 3048 } 3049 3050 bld->num_immediates++; 3051} 3052 3053static void 3054ddx_emit( 3055 const struct lp_build_tgsi_action * action, 3056 struct lp_build_tgsi_context * bld_base, 3057 struct lp_build_emit_data * emit_data) 3058{ 3059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3060 3061 emit_fetch_deriv(bld, emit_data->args[0], NULL, 3062 &emit_data->output[emit_data->chan], NULL); 3063} 3064 3065static void 3066ddy_emit( 3067 const struct lp_build_tgsi_action * action, 3068 struct lp_build_tgsi_context * bld_base, 3069 struct lp_build_emit_data * emit_data) 3070{ 3071 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3072 3073 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 3074 &emit_data->output[emit_data->chan]); 3075} 3076 3077static void 3078kill_emit( 3079 const struct lp_build_tgsi_action * action, 3080 struct lp_build_tgsi_context * bld_base, 3081 struct lp_build_emit_data * emit_data) 3082{ 3083 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3084 3085 emit_kill(bld, bld_base->pc - 1); 3086} 3087 3088static void 3089kill_if_emit( 3090 const struct lp_build_tgsi_action * action, 3091 struct lp_build_tgsi_context * bld_base, 3092 struct lp_build_emit_data * emit_data) 3093{ 3094 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3095 3096 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); 3097} 3098 3099static void 3100tex_emit( 3101 const struct lp_build_tgsi_action * action, 3102 struct lp_build_tgsi_context * bld_base, 3103 struct lp_build_emit_data * emit_data) 3104{ 3105 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3106 3107 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3108 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3109} 3110 3111static void 3112tex2_emit( 3113 const struct lp_build_tgsi_action * action, 3114 struct lp_build_tgsi_context * bld_base, 3115 struct lp_build_emit_data * emit_data) 3116{ 3117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3118 3119 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3120 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3121} 3122 3123static void 3124txb_emit( 3125 const struct lp_build_tgsi_action * action, 3126 struct lp_build_tgsi_context * bld_base, 3127 struct lp_build_emit_data * emit_data) 3128{ 3129 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3130 3131 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3132 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3133} 3134 3135static void 3136txb2_emit( 3137 const struct lp_build_tgsi_action * action, 3138 struct lp_build_tgsi_context * bld_base, 3139 struct lp_build_emit_data * emit_data) 3140{ 3141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3142 3143 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3144 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3145} 3146 3147static void 3148txd_emit( 3149 const struct lp_build_tgsi_action * action, 3150 struct lp_build_tgsi_context * bld_base, 3151 struct lp_build_emit_data * emit_data) 3152{ 3153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3154 3155 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3156 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); 3157} 3158 3159static void 3160txl_emit( 3161 const struct lp_build_tgsi_action * action, 3162 struct lp_build_tgsi_context * bld_base, 3163 struct lp_build_emit_data * emit_data) 3164{ 3165 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3166 3167 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3168 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3169} 3170 3171static void 3172txl2_emit( 3173 const struct lp_build_tgsi_action * action, 3174 struct lp_build_tgsi_context * bld_base, 3175 struct lp_build_emit_data * emit_data) 3176{ 3177 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3178 3179 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3180 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3181} 3182 3183static void 3184txp_emit( 3185 const struct lp_build_tgsi_action * action, 3186 struct lp_build_tgsi_context * bld_base, 3187 struct lp_build_emit_data * emit_data) 3188{ 3189 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3190 3191 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 3192 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3193} 3194 3195static void 3196tg4_emit( 3197 const struct lp_build_tgsi_action * action, 3198 struct lp_build_tgsi_context * bld_base, 3199 struct lp_build_emit_data * emit_data) 3200{ 3201 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3202 3203 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3204 emit_data->output, 2, LP_SAMPLER_OP_GATHER); 3205} 3206 3207static void 3208lodq_emit( 3209 const struct lp_build_tgsi_action * action, 3210 struct lp_build_tgsi_context * bld_base, 3211 struct lp_build_emit_data * emit_data) 3212{ 3213 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3214 3215 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3216 emit_data->output, 1, LP_SAMPLER_OP_LODQ); 3217} 3218 3219static void 3220txq_emit( 3221 const struct lp_build_tgsi_action * action, 3222 struct lp_build_tgsi_context * bld_base, 3223 struct lp_build_emit_data * emit_data) 3224{ 3225 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3226 3227 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); 3228} 3229 3230static void 3231txf_emit( 3232 const struct lp_build_tgsi_action * action, 3233 struct lp_build_tgsi_context * bld_base, 3234 struct lp_build_emit_data * emit_data) 3235{ 3236 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3237 3238 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); 3239} 3240 3241static void 3242sample_i_emit( 3243 const struct lp_build_tgsi_action * action, 3244 struct lp_build_tgsi_context * bld_base, 3245 struct lp_build_emit_data * emit_data) 3246{ 3247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3248 3249 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); 3250} 3251 3252static void 3253sample_emit( 3254 const struct lp_build_tgsi_action * action, 3255 struct lp_build_tgsi_context * bld_base, 3256 struct lp_build_emit_data * emit_data) 3257{ 3258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3259 3260 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3261 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3262} 3263 3264static void 3265sample_b_emit( 3266 const struct lp_build_tgsi_action * action, 3267 struct lp_build_tgsi_context * bld_base, 3268 struct lp_build_emit_data * emit_data) 3269{ 3270 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3271 3272 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3273 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3274} 3275 3276static void 3277sample_c_emit( 3278 const struct lp_build_tgsi_action * action, 3279 struct lp_build_tgsi_context * bld_base, 3280 struct lp_build_emit_data * emit_data) 3281{ 3282 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3283 3284 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3285 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3286} 3287 3288static void 3289sample_c_lz_emit( 3290 const struct lp_build_tgsi_action * action, 3291 struct lp_build_tgsi_context * bld_base, 3292 struct lp_build_emit_data * emit_data) 3293{ 3294 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3295 3296 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, 3297 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3298} 3299 3300static void 3301sample_d_emit( 3302 const struct lp_build_tgsi_action * action, 3303 struct lp_build_tgsi_context * bld_base, 3304 struct lp_build_emit_data * emit_data) 3305{ 3306 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3307 3308 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3309 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3310} 3311 3312static void 3313sample_l_emit( 3314 const struct lp_build_tgsi_action * action, 3315 struct lp_build_tgsi_context * bld_base, 3316 struct lp_build_emit_data * emit_data) 3317{ 3318 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3319 3320 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3321 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3322} 3323 3324static void 3325gather4_emit( 3326 const struct lp_build_tgsi_action * action, 3327 struct lp_build_tgsi_context * bld_base, 3328 struct lp_build_emit_data * emit_data) 3329{ 3330 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3331 3332 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3333 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output); 3334} 3335 3336static void 3337sviewinfo_emit( 3338 const struct lp_build_tgsi_action * action, 3339 struct lp_build_tgsi_context * bld_base, 3340 struct lp_build_emit_data * emit_data) 3341{ 3342 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3343 3344 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); 3345} 3346 3347static void 3348lod_emit( 3349 const struct lp_build_tgsi_action * action, 3350 struct lp_build_tgsi_context * bld_base, 3351 struct lp_build_emit_data * emit_data) 3352{ 3353 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3354 3355 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3356 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); 3357} 3358 3359static LLVMValueRef 3360mask_vec(struct lp_build_tgsi_context *bld_base) 3361{ 3362 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3363 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3364 struct lp_exec_mask *exec_mask = &bld->exec_mask; 3365 3366 if (!exec_mask->has_mask) { 3367 return lp_build_mask_value(bld->mask); 3368 } 3369 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), 3370 exec_mask->exec_mask, ""); 3371} 3372 3373static void 3374increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, 3375 LLVMValueRef ptr, 3376 LLVMValueRef mask) 3377{ 3378 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3379 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3380 3381 current_vec = LLVMBuildSub(builder, current_vec, mask, ""); 3382 3383 LLVMBuildStore(builder, current_vec, ptr); 3384} 3385 3386static void 3387clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, 3388 LLVMValueRef ptr, 3389 LLVMValueRef mask) 3390{ 3391 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3392 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3393 3394 current_vec = lp_build_select(&bld_base->uint_bld, 3395 mask, 3396 bld_base->uint_bld.zero, 3397 current_vec); 3398 3399 LLVMBuildStore(builder, current_vec, ptr); 3400} 3401 3402static LLVMValueRef 3403clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, 3404 LLVMValueRef current_mask_vec, 3405 LLVMValueRef total_emitted_vertices_vec) 3406{ 3407 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3408 struct lp_build_context *int_bld = &bld->bld_base.int_bld; 3409 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, 3410 total_emitted_vertices_vec, 3411 bld->max_output_vertices_vec); 3412 3413 return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); 3414} 3415 3416static void 3417emit_vertex( 3418 const struct lp_build_tgsi_action * action, 3419 struct lp_build_tgsi_context * bld_base, 3420 struct lp_build_emit_data * emit_data) 3421{ 3422 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3423 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3424 3425 if (bld->gs_iface->emit_vertex) { 3426 LLVMValueRef mask = mask_vec(bld_base); 3427 LLVMValueRef total_emitted_vertices_vec = 3428 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3429 mask = clamp_mask_to_max_output_vertices(bld, mask, 3430 total_emitted_vertices_vec); 3431 gather_outputs(bld); 3432 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, 3433 bld->outputs, 3434 total_emitted_vertices_vec); 3435 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, 3436 mask); 3437 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, 3438 mask); 3439#if DUMP_GS_EMITS 3440 lp_build_print_value(bld->bld_base.base.gallivm, 3441 " +++ emit vertex masked ones = ", 3442 mask); 3443 lp_build_print_value(bld->bld_base.base.gallivm, 3444 " +++ emit vertex emitted = ", 3445 total_emitted_vertices_vec); 3446#endif 3447 } 3448} 3449 3450 3451static void 3452end_primitive_masked(struct lp_build_tgsi_context * bld_base, 3453 LLVMValueRef mask) 3454{ 3455 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3456 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3457 3458 if (bld->gs_iface->end_primitive) { 3459 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3460 LLVMValueRef emitted_vertices_vec = 3461 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); 3462 LLVMValueRef emitted_prims_vec = 3463 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3464 3465 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3466 emitted_vertices_vec, 3467 uint_bld->zero); 3468 /* We need to combine the current execution mask with the mask 3469 telling us which, if any, execution slots actually have 3470 unemitted primitives, this way we make sure that end_primitives 3471 executes only on the paths that have unflushed vertices */ 3472 mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); 3473 3474 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, 3475 emitted_vertices_vec, 3476 emitted_prims_vec); 3477 3478#if DUMP_GS_EMITS 3479 lp_build_print_value(bld->bld_base.base.gallivm, 3480 " +++ end prim masked ones = ", 3481 mask); 3482 lp_build_print_value(bld->bld_base.base.gallivm, 3483 " +++ end prim emitted verts1 = ", 3484 emitted_vertices_vec); 3485 lp_build_print_value(bld->bld_base.base.gallivm, 3486 " +++ end prim emitted prims1 = ", 3487 LLVMBuildLoad(builder, 3488 bld->emitted_prims_vec_ptr, "")); 3489#endif 3490 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, 3491 mask); 3492 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, 3493 mask); 3494#if DUMP_GS_EMITS 3495 lp_build_print_value(bld->bld_base.base.gallivm, 3496 " +++ end prim emitted verts2 = ", 3497 LLVMBuildLoad(builder, 3498 bld->emitted_vertices_vec_ptr, "")); 3499#endif 3500 } 3501 3502} 3503 3504static void 3505end_primitive( 3506 const struct lp_build_tgsi_action * action, 3507 struct lp_build_tgsi_context * bld_base, 3508 struct lp_build_emit_data * emit_data) 3509{ 3510 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3511 3512 if (bld->gs_iface->end_primitive) { 3513 LLVMValueRef mask = mask_vec(bld_base); 3514 end_primitive_masked(bld_base, mask); 3515 } 3516} 3517 3518static void 3519cal_emit( 3520 const struct lp_build_tgsi_action * action, 3521 struct lp_build_tgsi_context * bld_base, 3522 struct lp_build_emit_data * emit_data) 3523{ 3524 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3525 3526 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 3527 &bld_base->pc); 3528} 3529 3530static void 3531ret_emit( 3532 const struct lp_build_tgsi_action * action, 3533 struct lp_build_tgsi_context * bld_base, 3534 struct lp_build_emit_data * emit_data) 3535{ 3536 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3537 3538 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 3539} 3540 3541static void 3542brk_emit( 3543 const struct lp_build_tgsi_action * action, 3544 struct lp_build_tgsi_context * bld_base, 3545 struct lp_build_emit_data * emit_data) 3546{ 3547 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3548 3549 lp_exec_break(&bld->exec_mask, bld_base); 3550} 3551 3552static void 3553if_emit( 3554 const struct lp_build_tgsi_action * action, 3555 struct lp_build_tgsi_context * bld_base, 3556 struct lp_build_emit_data * emit_data) 3557{ 3558 LLVMValueRef tmp; 3559 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3560 3561 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 3562 emit_data->args[0], bld->bld_base.base.zero); 3563 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3564} 3565 3566static void 3567uif_emit( 3568 const struct lp_build_tgsi_action * action, 3569 struct lp_build_tgsi_context * bld_base, 3570 struct lp_build_emit_data * emit_data) 3571{ 3572 LLVMValueRef tmp; 3573 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3574 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3575 3576 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3577 emit_data->args[0], uint_bld->zero); 3578 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3579} 3580 3581static void 3582case_emit( 3583 const struct lp_build_tgsi_action * action, 3584 struct lp_build_tgsi_context * bld_base, 3585 struct lp_build_emit_data * emit_data) 3586{ 3587 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3588 3589 lp_exec_case(&bld->exec_mask, emit_data->args[0]); 3590} 3591 3592static void 3593default_emit( 3594 const struct lp_build_tgsi_action * action, 3595 struct lp_build_tgsi_context * bld_base, 3596 struct lp_build_emit_data * emit_data) 3597{ 3598 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3599 3600 lp_exec_default(&bld->exec_mask, bld_base); 3601} 3602 3603static void 3604switch_emit( 3605 const struct lp_build_tgsi_action * action, 3606 struct lp_build_tgsi_context * bld_base, 3607 struct lp_build_emit_data * emit_data) 3608{ 3609 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3610 3611 lp_exec_switch(&bld->exec_mask, emit_data->args[0]); 3612} 3613 3614static void 3615endswitch_emit( 3616 const struct lp_build_tgsi_action * action, 3617 struct lp_build_tgsi_context * bld_base, 3618 struct lp_build_emit_data * emit_data) 3619{ 3620 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3621 3622 lp_exec_endswitch(&bld->exec_mask, bld_base); 3623} 3624 3625static void 3626bgnloop_emit( 3627 const struct lp_build_tgsi_action * action, 3628 struct lp_build_tgsi_context * bld_base, 3629 struct lp_build_emit_data * emit_data) 3630{ 3631 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3632 3633 lp_exec_bgnloop(&bld->exec_mask); 3634} 3635 3636static void 3637bgnsub_emit( 3638 const struct lp_build_tgsi_action * action, 3639 struct lp_build_tgsi_context * bld_base, 3640 struct lp_build_emit_data * emit_data) 3641{ 3642 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3643 3644 lp_exec_mask_bgnsub(&bld->exec_mask); 3645} 3646 3647static void 3648else_emit( 3649 const struct lp_build_tgsi_action * action, 3650 struct lp_build_tgsi_context * bld_base, 3651 struct lp_build_emit_data * emit_data) 3652{ 3653 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3654 3655 lp_exec_mask_cond_invert(&bld->exec_mask); 3656} 3657 3658static void 3659endif_emit( 3660 const struct lp_build_tgsi_action * action, 3661 struct lp_build_tgsi_context * bld_base, 3662 struct lp_build_emit_data * emit_data) 3663{ 3664 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3665 3666 lp_exec_mask_cond_pop(&bld->exec_mask); 3667} 3668 3669static void 3670endloop_emit( 3671 const struct lp_build_tgsi_action * action, 3672 struct lp_build_tgsi_context * bld_base, 3673 struct lp_build_emit_data * emit_data) 3674{ 3675 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3676 3677 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 3678} 3679 3680static void 3681endsub_emit( 3682 const struct lp_build_tgsi_action * action, 3683 struct lp_build_tgsi_context * bld_base, 3684 struct lp_build_emit_data * emit_data) 3685{ 3686 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3687 3688 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 3689} 3690 3691static void 3692cont_emit( 3693 const struct lp_build_tgsi_action * action, 3694 struct lp_build_tgsi_context * bld_base, 3695 struct lp_build_emit_data * emit_data) 3696{ 3697 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3698 3699 lp_exec_continue(&bld->exec_mask); 3700} 3701 3702static void emit_prologue(struct lp_build_tgsi_context * bld_base) 3703{ 3704 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3705 struct gallivm_state * gallivm = bld_base->base.gallivm; 3706 3707 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 3708 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4; 3709 bld->temps_array = lp_build_alloca_undef(gallivm, 3710 LLVMArrayType(bld_base->base.vec_type, array_size), 3711 "temp_array"); 3712 } 3713 3714 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3715 LLVMValueRef array_size = 3716 lp_build_const_int32(gallivm, 3717 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 3718 bld->outputs_array = lp_build_array_alloca(gallivm, 3719 bld_base->base.vec_type, array_size, 3720 "output_array"); 3721 } 3722 3723 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3724 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4; 3725 bld->imms_array = lp_build_alloca_undef(gallivm, 3726 LLVMArrayType(bld_base->base.vec_type, array_size), 3727 "imms_array"); 3728 } 3729 3730 /* If we have indirect addressing in inputs we need to copy them into 3731 * our alloca array to be able to iterate over them */ 3732 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { 3733 unsigned index, chan; 3734 LLVMTypeRef vec_type = bld_base->base.vec_type; 3735 LLVMValueRef array_size = lp_build_const_int32(gallivm, 3736 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 3737 bld->inputs_array = lp_build_array_alloca(gallivm, 3738 vec_type, array_size, 3739 "input_array"); 3740 3741 assert(bld_base->info->num_inputs 3742 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 3743 3744 for (index = 0; index < bld_base->info->num_inputs; ++index) { 3745 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 3746 LLVMValueRef lindex = 3747 lp_build_const_int32(gallivm, index * 4 + chan); 3748 LLVMValueRef input_ptr = 3749 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 3750 &lindex, 1, ""); 3751 LLVMValueRef value = bld->inputs[index][chan]; 3752 if (value) 3753 LLVMBuildStore(gallivm->builder, value, input_ptr); 3754 } 3755 } 3756 } 3757 3758 if (bld->gs_iface) { 3759 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 3760 bld->emitted_prims_vec_ptr = 3761 lp_build_alloca(gallivm, 3762 uint_bld->vec_type, 3763 "emitted_prims_ptr"); 3764 bld->emitted_vertices_vec_ptr = 3765 lp_build_alloca(gallivm, 3766 uint_bld->vec_type, 3767 "emitted_vertices_ptr"); 3768 bld->total_emitted_vertices_vec_ptr = 3769 lp_build_alloca(gallivm, 3770 uint_bld->vec_type, 3771 "total_emitted_vertices_ptr"); 3772 3773 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3774 bld->emitted_prims_vec_ptr); 3775 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3776 bld->emitted_vertices_vec_ptr); 3777 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3778 bld->total_emitted_vertices_vec_ptr); 3779 } 3780 3781 if (DEBUG_EXECUTION) { 3782 lp_build_printf(gallivm, "\n"); 3783 emit_dump_file(bld, TGSI_FILE_CONSTANT); 3784 if (!bld->gs_iface) 3785 emit_dump_file(bld, TGSI_FILE_INPUT); 3786 } 3787} 3788 3789static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 3790{ 3791 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3792 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3793 3794 if (DEBUG_EXECUTION) { 3795 /* for debugging */ 3796 if (0) { 3797 emit_dump_file(bld, TGSI_FILE_TEMPORARY); 3798 } 3799 emit_dump_file(bld, TGSI_FILE_OUTPUT); 3800 lp_build_printf(bld_base->base.gallivm, "\n"); 3801 } 3802 3803 /* If we have indirect addressing in outputs we need to copy our alloca array 3804 * to the outputs slots specified by the caller */ 3805 if (bld->gs_iface) { 3806 LLVMValueRef total_emitted_vertices_vec; 3807 LLVMValueRef emitted_prims_vec; 3808 /* implicit end_primitives, needed in case there are any unflushed 3809 vertices in the cache. Note must not call end_primitive here 3810 since the exec_mask is not valid at this point. */ 3811 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); 3812 3813 total_emitted_vertices_vec = 3814 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3815 emitted_prims_vec = 3816 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3817 3818 bld->gs_iface->gs_epilogue(bld->gs_iface, 3819 &bld->bld_base, 3820 total_emitted_vertices_vec, 3821 emitted_prims_vec); 3822 } else { 3823 gather_outputs(bld); 3824 } 3825} 3826 3827void 3828lp_build_tgsi_soa(struct gallivm_state *gallivm, 3829 const struct tgsi_token *tokens, 3830 struct lp_type type, 3831 struct lp_build_mask_context *mask, 3832 LLVMValueRef consts_ptr, 3833 LLVMValueRef const_sizes_ptr, 3834 const struct lp_bld_tgsi_system_values *system_values, 3835 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 3836 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 3837 LLVMValueRef context_ptr, 3838 LLVMValueRef thread_data_ptr, 3839 const struct lp_build_sampler_soa *sampler, 3840 const struct tgsi_shader_info *info, 3841 const struct lp_build_tgsi_gs_iface *gs_iface) 3842{ 3843 struct lp_build_tgsi_soa_context bld; 3844 3845 struct lp_type res_type; 3846 3847 assert(type.length <= LP_MAX_VECTOR_LENGTH); 3848 memset(&res_type, 0, sizeof res_type); 3849 res_type.width = type.width; 3850 res_type.length = type.length; 3851 res_type.sign = 1; 3852 3853 /* Setup build context */ 3854 memset(&bld, 0, sizeof bld); 3855 lp_build_context_init(&bld.bld_base.base, gallivm, type); 3856 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 3857 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 3858 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 3859 { 3860 struct lp_type dbl_type; 3861 dbl_type = type; 3862 dbl_type.width *= 2; 3863 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); 3864 } 3865 { 3866 struct lp_type uint64_type; 3867 uint64_type = lp_uint_type(type); 3868 uint64_type.width *= 2; 3869 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); 3870 } 3871 { 3872 struct lp_type int64_type; 3873 int64_type = lp_int_type(type); 3874 int64_type.width *= 2; 3875 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); 3876 } 3877 bld.mask = mask; 3878 bld.inputs = inputs; 3879 bld.outputs = outputs; 3880 bld.consts_ptr = consts_ptr; 3881 bld.const_sizes_ptr = const_sizes_ptr; 3882 bld.sampler = sampler; 3883 bld.bld_base.info = info; 3884 bld.indirect_files = info->indirect_files; 3885 bld.context_ptr = context_ptr; 3886 bld.thread_data_ptr = thread_data_ptr; 3887 3888 /* 3889 * If the number of temporaries is rather large then we just 3890 * allocate them as an array right from the start and treat 3891 * like indirect temporaries. 3892 */ 3893 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { 3894 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); 3895 } 3896 /* 3897 * For performance reason immediates are always backed in a static 3898 * array, but if their number is too great, we have to use just 3899 * a dynamically allocated array. 3900 */ 3901 bld.use_immediates_array = 3902 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); 3903 if (bld.use_immediates_array) { 3904 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); 3905 } 3906 3907 3908 bld.bld_base.soa = TRUE; 3909 bld.bld_base.emit_debug = emit_debug; 3910 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 3911 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 3912 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 3913 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 3914 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 3915 bld.bld_base.emit_store = emit_store; 3916 3917 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 3918 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 3919 3920 bld.bld_base.emit_prologue = emit_prologue; 3921 bld.bld_base.emit_epilogue = emit_epilogue; 3922 3923 /* Set opcode actions */ 3924 lp_set_default_actions_cpu(&bld.bld_base); 3925 3926 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 3927 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 3928 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 3929 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 3930 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; 3931 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 3932 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 3933 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 3934 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; 3935 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 3936 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 3937 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 3938 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 3939 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; 3940 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 3941 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 3942 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; 3943 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; 3944 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 3945 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; 3946 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 3947 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 3948 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 3949 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 3950 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 3951 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 3952 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; 3953 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; 3954 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; 3955 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; 3956 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; 3957 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit; 3958 /* DX10 sampling ops */ 3959 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; 3960 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; 3961 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; 3962 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; 3963 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; 3964 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; 3965 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; 3966 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; 3967 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; 3968 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; 3969 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; 3970 3971 3972 if (gs_iface) { 3973 /* There's no specific value for this because it should always 3974 * be set, but apps using ext_geometry_shader4 quite often 3975 * were forgetting so we're using MAX_VERTEX_VARYING from 3976 * that spec even though we could debug_assert if it's not 3977 * set, but that's a lot uglier. */ 3978 uint max_output_vertices; 3979 3980 /* inputs are always indirect with gs */ 3981 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 3982 bld.gs_iface = gs_iface; 3983 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; 3984 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; 3985 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; 3986 3987 max_output_vertices = 3988 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 3989 if (!max_output_vertices) 3990 max_output_vertices = 32; 3991 3992 bld.max_output_vertices_vec = 3993 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, 3994 max_output_vertices); 3995 } 3996 3997 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); 3998 3999 bld.system_values = *system_values; 4000 4001 lp_build_tgsi_llvm(&bld.bld_base, tokens); 4002 4003 if (0) { 4004 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 4005 LLVMValueRef function = LLVMGetBasicBlockParent(block); 4006 debug_printf("11111111111111111111111111111 \n"); 4007 tgsi_dump(tokens, 0); 4008 lp_debug_dump_value(function); 4009 debug_printf("2222222222222222222222222222 \n"); 4010 } 4011 4012 if (0) { 4013 LLVMModuleRef module = LLVMGetGlobalParent( 4014 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 4015 LLVMDumpModule(module); 4016 4017 } 4018 lp_exec_mask_fini(&bld.exec_mask); 4019} 4020