1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "util/u_prim.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_exec.h" 47#include "tgsi/tgsi_info.h" 48#include "tgsi/tgsi_parse.h" 49#include "tgsi/tgsi_util.h" 50#include "tgsi/tgsi_scan.h" 51#include "tgsi/tgsi_strings.h" 52#include "lp_bld_tgsi_action.h" 53#include "lp_bld_type.h" 54#include "lp_bld_const.h" 55#include "lp_bld_arit.h" 56#include "lp_bld_bitarit.h" 57#include "lp_bld_gather.h" 58#include "lp_bld_init.h" 59#include "lp_bld_logic.h" 60#include "lp_bld_misc.h" 61#include "lp_bld_swizzle.h" 62#include "lp_bld_flow.h" 63#include "lp_bld_coro.h" 64#include "lp_bld_quad.h" 65#include "lp_bld_tgsi.h" 66#include "lp_bld_limits.h" 67#include "lp_bld_debug.h" 68#include "lp_bld_printf.h" 69#include "lp_bld_sample.h" 70#include "lp_bld_struct.h" 71 72#define DUMP_GS_EMITS 0 73 74/* 75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI 76 * instruction. 77 * 78 * TODO: 79 * - take execution masks in consideration 80 * - debug control-flow instructions 81 */ 82#define DEBUG_EXECUTION 0 83 84 85/* 86 * Emit code to print a register value. 87 */ 88static void 89emit_dump_reg(struct gallivm_state *gallivm, 90 unsigned file, 91 unsigned index, 92 unsigned chan, 93 LLVMValueRef value) 94{ 95 char buf[32]; 96 97 snprintf(buf, sizeof buf, " %s[%u].%c = ", 98 tgsi_file_name(file), 99 index, "xyzw"[chan]); 100 101 lp_build_print_value(gallivm, buf, value); 102} 103 104static inline struct function_ctx * 105func_ctx(struct lp_exec_mask *mask) 106{ 107 assert(mask->function_stack_size > 0); 108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 109 return &mask->function_stack[mask->function_stack_size - 1]; 110} 111 112/* 113 * combine the execution mask if there is one with the current mask. 114 */ 115static LLVMValueRef 116mask_vec(struct lp_build_tgsi_context *bld_base) 117{ 118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 120 struct lp_exec_mask *exec_mask = &bld->exec_mask; 121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL; 122 if (!exec_mask->has_mask) { 123 return bld_mask; 124 } 125 if (!bld_mask) 126 return exec_mask->exec_mask; 127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), 128 exec_mask->exec_mask, ""); 129} 130 131static void lp_exec_tgsi_break(struct lp_exec_mask *mask, 132 struct lp_build_tgsi_context * bld_base) 133{ 134 enum tgsi_opcode opcode = 135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; 136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH || 137 opcode == TGSI_OPCODE_CASE); 138 lp_exec_break(mask, &bld_base->pc, break_always); 139} 140 141static void lp_exec_switch(struct lp_exec_mask *mask, 142 LLVMValueRef switchval) 143{ 144 struct function_ctx *ctx = func_ctx(mask); 145 146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || 147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 148 ctx->switch_stack_size++; 149 return; 150 } 151 152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 153 ctx->break_type; 154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; 155 156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; 157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; 158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; 159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; 160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; 161 ctx->switch_stack_size++; 162 163 mask->switch_mask = LLVMConstNull(mask->int_vec_type); 164 ctx->switch_val = switchval; 165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); 166 ctx->switch_in_default = false; 167 ctx->switch_pc = 0; 168 169 lp_exec_mask_update(mask); 170} 171 172static void lp_exec_endswitch(struct lp_exec_mask *mask, 173 struct lp_build_tgsi_context * bld_base) 174{ 175 LLVMBuilderRef builder = mask->bld->gallivm->builder; 176 struct function_ctx *ctx = func_ctx(mask); 177 178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 179 ctx->switch_stack_size--; 180 return; 181 } 182 183 /* check if there's deferred default if so do it now */ 184 if (ctx->switch_pc && !ctx->switch_in_default) { 185 LLVMValueRef prevmask, defaultmask; 186 unsigned tmp_pc; 187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 190 ctx->switch_in_default = true; 191 192 lp_exec_mask_update(mask); 193 194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == 195 TGSI_OPCODE_DEFAULT); 196 197 tmp_pc = bld_base->pc; 198 bld_base->pc = ctx->switch_pc; 199 /* 200 * re-purpose switch_pc to point to here again, since we stop execution of 201 * the deferred default after next break. 202 */ 203 ctx->switch_pc = tmp_pc - 1; 204 205 return; 206 } 207 208 else if (ctx->switch_pc && ctx->switch_in_default) { 209 assert(bld_base->pc == ctx->switch_pc + 1); 210 } 211 212 ctx->switch_stack_size--; 213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; 214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; 215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; 216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; 217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; 218 219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; 220 221 lp_exec_mask_update(mask); 222} 223 224static void lp_exec_case(struct lp_exec_mask *mask, 225 LLVMValueRef caseval) 226{ 227 LLVMBuilderRef builder = mask->bld->gallivm->builder; 228 struct function_ctx *ctx = func_ctx(mask); 229 230 LLVMValueRef casemask, prevmask; 231 232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 233 return; 234 } 235 236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ 237 if (!ctx->switch_in_default) { 238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); 240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask, 241 ctx->switch_mask_default, "sw_default_mask"); 242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); 243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); 244 245 lp_exec_mask_update(mask); 246 } 247} 248 249/* 250 * Analyse default statement in a switch. 251 * \return true if default is last statement, false otherwise 252 * \param default_pc_start contains pc of instruction to jump to 253 * if default wasn't last but there's no 254 * fallthrough into default. 255 */ 256static boolean default_analyse_is_last(struct lp_exec_mask *mask, 257 struct lp_build_tgsi_context * bld_base, 258 int *default_pc_start) 259{ 260 unsigned pc = bld_base->pc; 261 struct function_ctx *ctx = func_ctx(mask); 262 int curr_switch_stack = ctx->switch_stack_size; 263 264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 265 return false; 266 } 267 268 /* skip over case statements which are together with default */ 269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { 270 pc++; 271 } 272 273 while (pc != ~0u && pc < bld_base->num_instructions) { 274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode; 275 switch (opcode) { 276 case TGSI_OPCODE_CASE: 277 if (curr_switch_stack == ctx->switch_stack_size) { 278 *default_pc_start = pc - 1; 279 return false; 280 } 281 break; 282 case TGSI_OPCODE_SWITCH: 283 curr_switch_stack++; 284 break; 285 case TGSI_OPCODE_ENDSWITCH: 286 if (curr_switch_stack == ctx->switch_stack_size) { 287 *default_pc_start = pc - 1; 288 return true; 289 } 290 curr_switch_stack--; 291 break; 292 default: 293 ; /* nothing */ 294 } 295 pc++; 296 } 297 /* should never arrive here */ 298 assert(0); 299 return true; 300} 301 302static void lp_exec_default(struct lp_exec_mask *mask, 303 struct lp_build_tgsi_context * bld_base) 304{ 305 LLVMBuilderRef builder = mask->bld->gallivm->builder; 306 struct function_ctx *ctx = func_ctx(mask); 307 308 int default_exec_pc = 0; 309 boolean default_is_last; 310 311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 312 return; 313 } 314 315 /* 316 * This is a messy opcode, because it may not be always at the end and 317 * there can be fallthrough in and out of it. 318 */ 319 320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); 321 /* 322 * If it is last statement in switch (note that case statements appearing 323 * "at the same time" as default don't change that) everything is just fine, 324 * update switch mask and go on. This means we can handle default with 325 * fallthrough INTO it without overhead, if it is last. 326 */ 327 if (default_is_last) { 328 LLVMValueRef prevmask, defaultmask; 329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); 332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 333 ctx->switch_in_default = true; 334 335 lp_exec_mask_update(mask); 336 } 337 else { 338 /* 339 * Technically, "case" immediately before default isn't really a 340 * fallthrough, however we still have to count them as such as we 341 * already have updated the masks. 342 * If that happens in practice could add a switch optimizer pass 343 * which just gets rid of all case statements appearing together with 344 * default (or could do switch analysis at switch start time instead). 345 */ 346 enum tgsi_opcode opcode = 347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; 348 boolean ft_into = (opcode != TGSI_OPCODE_BRK && 349 opcode != TGSI_OPCODE_SWITCH); 350 /* 351 * If it is not last statement and there was no fallthrough into it, 352 * we record the PC and continue execution at next case (again, those 353 * case encountered at the same time don't count). At endswitch 354 * time, we update switchmask, and go back executing the code we skipped 355 * until the next break (possibly re-executing some code with changed mask 356 * if there was a fallthrough out of default). 357 * Finally, if it is not last statement and there was a fallthrough into it, 358 * do the same as with the former case, except instead of skipping the code 359 * just execute it without updating the mask, then go back and re-execute. 360 */ 361 ctx->switch_pc = bld_base->pc; 362 if (!ft_into) { 363 bld_base->pc = default_exec_pc; 364 } 365 } 366} 367 368 369static void lp_exec_mask_call(struct lp_exec_mask *mask, 370 int func, 371 int *pc) 372{ 373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { 374 return; 375 } 376 377 lp_exec_mask_function_init(mask, mask->function_stack_size); 378 mask->function_stack[mask->function_stack_size].pc = *pc; 379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; 380 mask->function_stack_size++; 381 *pc = func; 382} 383 384static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 385{ 386 LLVMBuilderRef builder = mask->bld->gallivm->builder; 387 struct function_ctx *ctx = func_ctx(mask); 388 LLVMValueRef exec_mask; 389 390 if (ctx->cond_stack_size == 0 && 391 ctx->loop_stack_size == 0 && 392 ctx->switch_stack_size == 0 && 393 mask->function_stack_size == 1) { 394 /* returning from main() */ 395 *pc = -1; 396 return; 397 } 398 399 if (mask->function_stack_size == 1) { 400 /* 401 * This requires special handling since we need to ensure 402 * we don't drop the mask even if we have no call stack 403 * (e.g. after a ret in a if clause after the endif) 404 */ 405 mask->ret_in_main = TRUE; 406 } 407 408 exec_mask = LLVMBuildNot(builder, 409 mask->exec_mask, 410 "ret"); 411 412 mask->ret_mask = LLVMBuildAnd(builder, 413 mask->ret_mask, 414 exec_mask, "ret_full"); 415 416 lp_exec_mask_update(mask); 417} 418 419static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 420{ 421} 422 423static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 424{ 425 struct function_ctx *ctx; 426 427 assert(mask->function_stack_size > 1); 428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 429 430 ctx = func_ctx(mask); 431 mask->function_stack_size--; 432 433 *pc = ctx->pc; 434 mask->ret_mask = ctx->ret_mask; 435 436 lp_exec_mask_update(mask); 437} 438 439 440static LLVMValueRef 441get_file_ptr(struct lp_build_tgsi_soa_context *bld, 442 unsigned file, 443 int index, 444 unsigned chan) 445{ 446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; 448 LLVMValueRef var_of_array; 449 450 switch (file) { 451 case TGSI_FILE_TEMPORARY: 452 array_of_vars = bld->temps; 453 var_of_array = bld->temps_array; 454 break; 455 case TGSI_FILE_OUTPUT: 456 array_of_vars = bld->outputs; 457 var_of_array = bld->outputs_array; 458 break; 459 default: 460 assert(0); 461 return NULL; 462 } 463 464 assert(chan < 4); 465 466 if (bld->indirect_files & (1 << file)) { 467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) { 469 LLVMValueRef gep[2]; 470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0); 471 gep[1] = lindex; 472 return LLVMBuildGEP(builder, var_of_array, gep, 2, ""); 473 } else { 474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); 475 } 476 } 477 else { 478 assert(index <= bld->bld_base.info->file_max[file]); 479 return array_of_vars[index][chan]; 480 } 481} 482 483 484/** 485 * Return pointer to a temporary register channel (src or dest). 486 * Note that indirect addressing cannot be handled here. 487 * \param index which temporary register 488 * \param chan which channel of the temp register. 489 */ 490LLVMValueRef 491lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 492 unsigned index, 493 unsigned chan) 494{ 495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); 496} 497 498/** 499 * Return pointer to a output register channel (src or dest). 500 * Note that indirect addressing cannot be handled here. 501 * \param index which output register 502 * \param chan which channel of the output register. 503 */ 504LLVMValueRef 505lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 506 unsigned index, 507 unsigned chan) 508{ 509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); 510} 511 512/* 513 * If we have indirect addressing in outputs copy our alloca array 514 * to the outputs slots specified by the caller to make sure 515 * our outputs are delivered consistently via the same interface. 516 */ 517static void 518gather_outputs(struct lp_build_tgsi_soa_context * bld) 519{ 520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 521 unsigned index, chan; 522 assert(bld->bld_base.info->num_outputs <= 523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); 524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { 525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 527 } 528 } 529 } 530} 531 532/** 533 * Gather vector. 534 * XXX the lp_build_gather() function should be capable of doing this 535 * with a little work. 536 */ 537static LLVMValueRef 538build_gather(struct lp_build_tgsi_context *bld_base, 539 LLVMValueRef base_ptr, 540 LLVMValueRef indexes, 541 LLVMValueRef overflow_mask, 542 LLVMValueRef indexes2) 543{ 544 struct gallivm_state *gallivm = bld_base->base.gallivm; 545 LLVMBuilderRef builder = gallivm->builder; 546 struct lp_build_context *uint_bld = &bld_base->uint_bld; 547 struct lp_build_context *bld = &bld_base->base; 548 LLVMValueRef res; 549 unsigned i; 550 551 if (indexes2) 552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 553 else 554 res = bld->undef; 555 /* 556 * overflow_mask is a vector telling us which channels 557 * in the vector overflowed. We use the overflow behavior for 558 * constant buffers which is defined as: 559 * Out of bounds access to constant buffer returns 0 in all 560 * components. Out of bounds behavior is always with respect 561 * to the size of the buffer bound at that slot. 562 */ 563 564 if (overflow_mask) { 565 /* 566 * We avoid per-element control flow here (also due to llvm going crazy, 567 * though I suspect it's better anyway since overflow is likely rare). 568 * Note that since we still fetch from buffers even if num_elements was 569 * zero (in this case we'll fetch from index zero) the jit func callers 570 * MUST provide valid fake constant buffers of size 4x32 (the values do 571 * not matter), otherwise we'd still need (not per element though) 572 * control flow. 573 */ 574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); 575 if (indexes2) 576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); 577 } 578 579 /* 580 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 581 */ 582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { 583 LLVMValueRef si, di; 584 LLVMValueRef index; 585 LLVMValueRef scalar_ptr, scalar; 586 587 di = lp_build_const_int32(bld->gallivm, i); 588 if (indexes2) 589 si = lp_build_const_int32(bld->gallivm, i >> 1); 590 else 591 si = di; 592 593 if (indexes2 && (i & 1)) { 594 index = LLVMBuildExtractElement(builder, 595 indexes2, si, ""); 596 } else { 597 index = LLVMBuildExtractElement(builder, 598 indexes, si, ""); 599 } 600 scalar_ptr = LLVMBuildGEP(builder, base_ptr, 601 &index, 1, "gather_ptr"); 602 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 603 604 res = LLVMBuildInsertElement(builder, res, scalar, di, ""); 605 } 606 607 if (overflow_mask) { 608 if (indexes2) { 609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); 610 overflow_mask = LLVMBuildSExt(builder, overflow_mask, 611 bld_base->dbl_bld.int_vec_type, ""); 612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask, 613 bld_base->dbl_bld.zero, res); 614 } else 615 res = lp_build_select(bld, overflow_mask, bld->zero, res); 616 } 617 618 return res; 619} 620 621 622/** 623 * Scatter/store vector. 624 */ 625static void 626emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 627 LLVMValueRef base_ptr, 628 LLVMValueRef indexes, 629 LLVMValueRef values, 630 struct lp_exec_mask *mask) 631{ 632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 633 LLVMBuilderRef builder = gallivm->builder; 634 unsigned i; 635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL; 636 637 /* 638 * Loop over elements of index_vec, store scalar value. 639 */ 640 for (i = 0; i < bld->bld_base.base.type.length; i++) { 641 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 645 LLVMValueRef scalar_pred = pred ? 646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 647 648 if (0) 649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 650 ii, val, index, scalar_ptr); 651 652 if (scalar_pred) { 653 LLVMValueRef real_val, dst_val; 654 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 656 LLVMBuildStore(builder, real_val, scalar_ptr); 657 } 658 else { 659 LLVMBuildStore(builder, val, scalar_ptr); 660 } 661 } 662} 663 664 665/** 666 * Read the current value of the ADDR register, convert the floats to 667 * ints, add the base index and return the vector of offsets. 668 * The offsets will be used to index into the constant buffer or 669 * temporary register file. 670 */ 671static LLVMValueRef 672get_indirect_index(struct lp_build_tgsi_soa_context *bld, 673 unsigned reg_file, unsigned reg_index, 674 const struct tgsi_ind_register *indirect_reg, 675 int index_limit) 676{ 677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 679 /* always use X component of address register */ 680 unsigned swizzle = indirect_reg->Swizzle; 681 LLVMValueRef base; 682 LLVMValueRef rel; 683 LLVMValueRef max_index; 684 LLVMValueRef index; 685 686 assert(bld->indirect_files & (1 << reg_file)); 687 688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 689 690 assert(swizzle < 4); 691 switch (indirect_reg->File) { 692 case TGSI_FILE_ADDRESS: 693 rel = LLVMBuildLoad(builder, 694 bld->addr[indirect_reg->Index][swizzle], 695 "load addr reg"); 696 /* ADDR LLVM values already have LLVM integer type. */ 697 break; 698 case TGSI_FILE_TEMPORARY: 699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); 700 rel = LLVMBuildLoad(builder, rel, "load temp reg"); 701 /* TEMP LLVM values always have LLVM float type, but for indirection, the 702 * value actually stored is expected to be an integer */ 703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); 704 break; 705 default: 706 assert(0); 707 rel = uint_bld->zero; 708 } 709 710 index = lp_build_add(uint_bld, base, rel); 711 712 /* 713 * emit_fetch_constant handles constant buffer overflow so this code 714 * is pointless for them. 715 * Furthermore the D3D10 spec in section 6.5 says: 716 * If the constant buffer bound to a slot is larger than the size 717 * declared in the shader for that slot, implementations are allowed 718 * to return incorrect data (not necessarily 0) for indices that are 719 * larger than the declared size but smaller than the buffer size. 720 */ 721 if (reg_file != TGSI_FILE_CONSTANT) { 722 assert(index_limit >= 0); 723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 724 uint_bld->type, index_limit); 725 726 assert(!uint_bld->type.sign); 727 index = lp_build_min(uint_bld, index, max_index); 728 } 729 730 return index; 731} 732 733static struct lp_build_context * 734stype_to_fetch(struct lp_build_tgsi_context * bld_base, 735 enum tgsi_opcode_type stype) 736{ 737 struct lp_build_context *bld_fetch; 738 739 switch (stype) { 740 case TGSI_TYPE_FLOAT: 741 case TGSI_TYPE_UNTYPED: 742 bld_fetch = &bld_base->base; 743 break; 744 case TGSI_TYPE_UNSIGNED: 745 bld_fetch = &bld_base->uint_bld; 746 break; 747 case TGSI_TYPE_SIGNED: 748 bld_fetch = &bld_base->int_bld; 749 break; 750 case TGSI_TYPE_DOUBLE: 751 bld_fetch = &bld_base->dbl_bld; 752 break; 753 case TGSI_TYPE_UNSIGNED64: 754 bld_fetch = &bld_base->uint64_bld; 755 break; 756 case TGSI_TYPE_SIGNED64: 757 bld_fetch = &bld_base->int64_bld; 758 break; 759 case TGSI_TYPE_VOID: 760 default: 761 assert(0); 762 bld_fetch = NULL; 763 break; 764 } 765 return bld_fetch; 766} 767 768static LLVMValueRef 769get_soa_array_offsets(struct lp_build_context *uint_bld, 770 LLVMValueRef indirect_index, 771 unsigned chan_index, 772 boolean need_perelement_offset) 773{ 774 struct gallivm_state *gallivm = uint_bld->gallivm; 775 LLVMValueRef chan_vec = 776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); 777 LLVMValueRef length_vec = 778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); 779 LLVMValueRef index_vec; 780 781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 785 786 if (need_perelement_offset) { 787 LLVMValueRef pixel_offsets; 788 unsigned i; 789 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 790 pixel_offsets = uint_bld->undef; 791 for (i = 0; i < uint_bld->type.length; i++) { 792 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, 794 ii, ii, ""); 795 } 796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 797 } 798 return index_vec; 799} 800 801static LLVMValueRef 802emit_fetch_constant( 803 struct lp_build_tgsi_context * bld_base, 804 const struct tgsi_full_src_register * reg, 805 enum tgsi_opcode_type stype, 806 unsigned swizzle_in) 807{ 808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 809 struct gallivm_state *gallivm = bld_base->base.gallivm; 810 LLVMBuilderRef builder = gallivm->builder; 811 struct lp_build_context *uint_bld = &bld_base->uint_bld; 812 unsigned dimension = 0; 813 LLVMValueRef consts_ptr; 814 LLVMValueRef num_consts; 815 LLVMValueRef res; 816 unsigned swizzle = swizzle_in & 0xffff; 817 818 /* XXX: Handle fetching xyzw components as a vector */ 819 assert(swizzle != ~0u); 820 821 if (reg->Register.Dimension) { 822 assert(!reg->Dimension.Indirect); 823 dimension = reg->Dimension.Index; 824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); 825 } 826 827 consts_ptr = bld->consts[dimension]; 828 num_consts = bld->consts_sizes[dimension]; 829 830 if (reg->Register.Indirect) { 831 LLVMValueRef indirect_index; 832 LLVMValueRef swizzle_vec = 833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 834 LLVMValueRef index_vec; /* index into the const buffer */ 835 LLVMValueRef overflow_mask; 836 LLVMValueRef index_vec2 = NULL; 837 838 indirect_index = get_indirect_index(bld, 839 reg->Register.File, 840 reg->Register.Index, 841 ®->Indirect, 842 bld->bld_base.info->file_max[reg->Register.File]); 843 844 /* All fetches are from the same constant buffer, so 845 * we need to propagate the size to a vector to do a 846 * vector comparison */ 847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); 848 /* Construct a boolean vector telling us which channels 849 * overflow the bound constant buffer */ 850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, 851 indirect_index, num_consts); 852 853 /* index_vec = indirect_index * 4 + swizzle */ 854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 856 857 if (tgsi_type_is_64bit(stype)) { 858 LLVMValueRef swizzle_vec2; 859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16); 860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); 861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); 862 } 863 /* Gather values from the constant buffer */ 864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2); 865 } 866 else { 867 LLVMValueRef index; /* index into the const buffer */ 868 LLVMValueRef scalar, scalar_ptr; 869 struct lp_build_context *bld_broad = &bld_base->base; 870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 871 872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr, 873 &index, 1, ""); 874 875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) { 876 877 LLVMValueRef scalar2, scalar2_ptr; 878 LLVMValueRef shuffles[2]; 879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16)); 880 881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr, 882 &index, 1, ""); 883 884 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, ""); 886 shuffles[0] = lp_build_const_int32(gallivm, 0); 887 shuffles[1] = lp_build_const_int32(gallivm, 1); 888 889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], ""); 891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], ""); 892 } else { 893 if (stype == TGSI_TYPE_DOUBLE) { 894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); 895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); 896 bld_broad = &bld_base->dbl_bld; 897 } else if (stype == TGSI_TYPE_UNSIGNED64) { 898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); 900 bld_broad = &bld_base->uint64_bld; 901 } else if (stype == TGSI_TYPE_SIGNED64) { 902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); 904 bld_broad = &bld_base->int64_bld; 905 } 906 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 907 res = lp_build_broadcast_scalar(bld_broad, scalar); 908 } 909 910 } 911 912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { 913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 915 } 916 917 return res; 918} 919 920/** 921 * Fetch 64-bit values from two separate channels. 922 * 64-bit values are stored split across two channels, like xy and zw. 923 * This function creates a set of vec_length*2 floats, 924 * extracts the values from the two channels, 925 * puts them in the correct place, then casts to vec_length 64-bits. 926 */ 927static LLVMValueRef 928emit_fetch_64bit( 929 struct lp_build_tgsi_context * bld_base, 930 enum tgsi_opcode_type stype, 931 LLVMValueRef input, 932 LLVMValueRef input2) 933{ 934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 936 LLVMBuilderRef builder = gallivm->builder; 937 LLVMValueRef res; 938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 939 int i; 940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; 941 int len = bld_base->base.type.length * 2; 942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); 943 944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) { 945 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 947 } 948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 949 950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 951} 952 953static LLVMValueRef 954emit_fetch_immediate( 955 struct lp_build_tgsi_context * bld_base, 956 const struct tgsi_full_src_register * reg, 957 enum tgsi_opcode_type stype, 958 unsigned swizzle_in) 959{ 960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 962 LLVMBuilderRef builder = gallivm->builder; 963 LLVMValueRef res = NULL; 964 unsigned swizzle = swizzle_in & 0xffff; 965 966 if (bld->use_immediates_array || reg->Register.Indirect) { 967 LLVMValueRef imms_array; 968 LLVMTypeRef fptr_type; 969 970 /* cast imms_array pointer to float* */ 971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); 973 974 if (reg->Register.Indirect) { 975 LLVMValueRef indirect_index; 976 LLVMValueRef index_vec; /* index into the immediate register array */ 977 LLVMValueRef index_vec2 = NULL; 978 indirect_index = get_indirect_index(bld, 979 reg->Register.File, 980 reg->Register.Index, 981 ®->Indirect, 982 bld->bld_base.info->file_max[reg->Register.File]); 983 /* 984 * Unlike for other reg classes, adding pixel offsets is unnecessary - 985 * immediates are stored as full vectors (FIXME??? - might be better 986 * to store them the same as constants) but all elements are the same 987 * in any case. 988 */ 989 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 990 indirect_index, 991 swizzle, 992 FALSE); 993 if (tgsi_type_is_64bit(stype)) 994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 995 indirect_index, 996 swizzle_in >> 16, 997 FALSE); 998 /* Gather values from the immediate register array */ 999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); 1000 } else { 1001 LLVMValueRef gep[2]; 1002 gep[0] = lp_build_const_int32(gallivm, 0); 1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder, 1005 bld->imms_array, gep, 2, ""); 1006 res = LLVMBuildLoad(builder, imms_ptr, ""); 1007 1008 if (tgsi_type_is_64bit(stype)) { 1009 LLVMValueRef imms_ptr2; 1010 LLVMValueRef res2; 1011 gep[1] = lp_build_const_int32(gallivm, 1012 reg->Register.Index * 4 + (swizzle_in >> 16)); 1013 imms_ptr2 = LLVMBuildGEP(builder, 1014 bld->imms_array, gep, 2, ""); 1015 res2 = LLVMBuildLoad(builder, imms_ptr2, ""); 1016 res = emit_fetch_64bit(bld_base, stype, res, res2); 1017 } 1018 } 1019 } 1020 else { 1021 res = bld->immediates[reg->Register.Index][swizzle]; 1022 if (tgsi_type_is_64bit(stype)) 1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]); 1024 } 1025 1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1029 } 1030 return res; 1031} 1032 1033static LLVMValueRef 1034emit_fetch_input( 1035 struct lp_build_tgsi_context * bld_base, 1036 const struct tgsi_full_src_register * reg, 1037 enum tgsi_opcode_type stype, 1038 unsigned swizzle_in) 1039{ 1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1042 LLVMBuilderRef builder = gallivm->builder; 1043 LLVMValueRef res; 1044 unsigned swizzle = swizzle_in & 0xffff; 1045 1046 if (reg->Register.Indirect) { 1047 LLVMValueRef indirect_index; 1048 LLVMValueRef index_vec; /* index into the input reg array */ 1049 LLVMValueRef index_vec2 = NULL; 1050 LLVMValueRef inputs_array; 1051 LLVMTypeRef fptr_type; 1052 1053 indirect_index = get_indirect_index(bld, 1054 reg->Register.File, 1055 reg->Register.Index, 1056 ®->Indirect, 1057 bld->bld_base.info->file_max[reg->Register.File]); 1058 1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1060 indirect_index, 1061 swizzle, 1062 TRUE); 1063 if (tgsi_type_is_64bit(stype)) { 1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1065 indirect_index, 1066 swizzle_in >> 16, 1067 TRUE); 1068 } 1069 /* cast inputs_array pointer to float* */ 1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); 1072 1073 /* Gather values from the input register array */ 1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2); 1075 } else { 1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1077 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1078 reg->Register.Index * 4 + swizzle); 1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 1080 bld->inputs_array, &lindex, 1, ""); 1081 1082 res = LLVMBuildLoad(builder, input_ptr, ""); 1083 if (tgsi_type_is_64bit(stype)) { 1084 LLVMValueRef lindex1; 1085 LLVMValueRef input_ptr2; 1086 LLVMValueRef res2; 1087 1088 lindex1 = lp_build_const_int32(gallivm, 1089 reg->Register.Index * 4 + (swizzle_in >> 16)); 1090 input_ptr2 = LLVMBuildGEP(builder, 1091 bld->inputs_array, &lindex1, 1, ""); 1092 res2 = LLVMBuildLoad(builder, input_ptr2, ""); 1093 res = emit_fetch_64bit(bld_base, stype, res, res2); 1094 } 1095 } 1096 else { 1097 res = bld->inputs[reg->Register.Index][swizzle]; 1098 if (tgsi_type_is_64bit(stype)) 1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]); 1100 } 1101 } 1102 1103 assert(res); 1104 1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1108 } 1109 1110 return res; 1111} 1112 1113 1114static LLVMValueRef 1115emit_fetch_gs_input( 1116 struct lp_build_tgsi_context * bld_base, 1117 const struct tgsi_full_src_register * reg, 1118 enum tgsi_opcode_type stype, 1119 unsigned swizzle_in) 1120{ 1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1123 const struct tgsi_shader_info *info = bld->bld_base.info; 1124 LLVMBuilderRef builder = gallivm->builder; 1125 LLVMValueRef attrib_index = NULL; 1126 LLVMValueRef vertex_index = NULL; 1127 unsigned swizzle = swizzle_in & 0xffff; 1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1129 LLVMValueRef res; 1130 1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1132 /* This is really a system value not a regular input */ 1133 assert(!reg->Register.Indirect); 1134 assert(!reg->Dimension.Indirect); 1135 res = bld->system_values.prim_id; 1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1138 } 1139 return res; 1140 } 1141 1142 if (reg->Register.Indirect) { 1143 /* 1144 * XXX: this is possibly not quite the right value, since file_max may be 1145 * larger than the max attrib index, due to it being the max of declared 1146 * inputs AND the max vertices per prim (which is 6 for tri adj). 1147 * It should however be safe to use (since we always allocate 1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit). 1149 */ 1150 int index_limit = info->file_max[reg->Register.File]; 1151 attrib_index = get_indirect_index(bld, 1152 reg->Register.File, 1153 reg->Register.Index, 1154 ®->Indirect, 1155 index_limit); 1156 } else { 1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1158 } 1159 1160 if (reg->Dimension.Indirect) { 1161 /* 1162 * A fixed 6 should do as well (which is what we allocate). 1163 */ 1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]); 1165 vertex_index = get_indirect_index(bld, 1166 reg->Register.File, 1167 reg->Dimension.Index, 1168 ®->DimIndirect, 1169 index_limit); 1170 } else { 1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1172 } 1173 1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base, 1175 reg->Dimension.Indirect, 1176 vertex_index, 1177 reg->Register.Indirect, 1178 attrib_index, 1179 swizzle_index); 1180 1181 assert(res); 1182 if (tgsi_type_is_64bit(stype)) { 1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16); 1184 LLVMValueRef res2; 1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base, 1186 reg->Dimension.Indirect, 1187 vertex_index, 1188 reg->Register.Indirect, 1189 attrib_index, 1190 swizzle_index); 1191 assert(res2); 1192 res = emit_fetch_64bit(bld_base, stype, res, res2); 1193 } else if (stype == TGSI_TYPE_UNSIGNED) { 1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1195 } else if (stype == TGSI_TYPE_SIGNED) { 1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1197 } 1198 1199 return res; 1200} 1201 1202static LLVMValueRef 1203emit_fetch_tcs_input( 1204 struct lp_build_tgsi_context * bld_base, 1205 const struct tgsi_full_src_register * reg, 1206 enum tgsi_opcode_type stype, 1207 unsigned swizzle_in) 1208{ 1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1211 const struct tgsi_shader_info *info = bld->bld_base.info; 1212 LLVMBuilderRef builder = gallivm->builder; 1213 LLVMValueRef attrib_index = NULL; 1214 LLVMValueRef vertex_index = NULL; 1215 unsigned swizzle = swizzle_in & 0xffff; 1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1217 LLVMValueRef res; 1218 1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1220 /* This is really a system value not a regular input */ 1221 assert(!reg->Register.Indirect); 1222 assert(!reg->Dimension.Indirect); 1223 res = bld->system_values.prim_id; 1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1226 } 1227 return res; 1228 } 1229 1230 if (reg->Register.Indirect) { 1231 int index_limit = info->file_max[reg->Register.File]; 1232 attrib_index = get_indirect_index(bld, 1233 reg->Register.File, 1234 reg->Register.Index, 1235 ®->Indirect, 1236 index_limit); 1237 } else { 1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1239 } 1240 1241 if (reg->Dimension.Indirect) { 1242 vertex_index = get_indirect_index(bld, 1243 reg->Register.File, 1244 reg->Dimension.Index, 1245 ®->DimIndirect, 1246 PIPE_MAX_SHADER_INPUTS); 1247 } else { 1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1249 } 1250 1251 // TCS can read from its own outputs 1252 if (reg->Register.File == TGSI_FILE_OUTPUT) { 1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base, 1254 reg->Dimension.Indirect, 1255 vertex_index, 1256 reg->Register.Indirect, 1257 attrib_index, 1258 FALSE, 1259 swizzle_index, 1260 bld_base->info->output_semantic_name[reg->Register.Index]); 1261 } else { 1262 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base, 1263 reg->Dimension.Indirect, 1264 vertex_index, 1265 reg->Register.Indirect, 1266 attrib_index, 1267 FALSE, 1268 swizzle_index); 1269 } 1270 1271 1272 assert(res); 1273 if (tgsi_type_is_64bit(stype)) { 1274 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16); 1275 LLVMValueRef res2; 1276 if (reg->Register.File == TGSI_FILE_OUTPUT) { 1277 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base, 1278 reg->Dimension.Indirect, 1279 vertex_index, 1280 reg->Register.Indirect, 1281 attrib_index, 1282 FALSE, 1283 swizzle_index, 1284 bld_base->info->output_semantic_name[reg->Register.Index]); 1285 } else { 1286 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base, 1287 reg->Dimension.Indirect, 1288 vertex_index, 1289 reg->Register.Indirect, 1290 attrib_index, 1291 FALSE, 1292 swizzle_index); 1293 } 1294 assert(res2); 1295 res = emit_fetch_64bit(bld_base, stype, res, res2); 1296 } else if (stype == TGSI_TYPE_UNSIGNED) { 1297 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1298 } else if (stype == TGSI_TYPE_SIGNED) { 1299 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1300 } 1301 1302 return res; 1303} 1304 1305static LLVMValueRef 1306emit_fetch_tes_input( 1307 struct lp_build_tgsi_context * bld_base, 1308 const struct tgsi_full_src_register * reg, 1309 enum tgsi_opcode_type stype, 1310 unsigned swizzle_in) 1311{ 1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1314 const struct tgsi_shader_info *info = bld->bld_base.info; 1315 LLVMBuilderRef builder = gallivm->builder; 1316 LLVMValueRef attrib_index = NULL; 1317 LLVMValueRef vertex_index = NULL; 1318 unsigned swizzle = swizzle_in & 0xffff; 1319 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1320 LLVMValueRef res; 1321 1322 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1323 /* This is really a system value not a regular input */ 1324 assert(!reg->Register.Indirect); 1325 assert(!reg->Dimension.Indirect); 1326 res = bld->system_values.prim_id; 1327 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1328 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1329 } 1330 return res; 1331 } 1332 1333 if (reg->Register.Indirect) { 1334 int index_limit = info->file_max[reg->Register.File]; 1335 attrib_index = get_indirect_index(bld, 1336 reg->Register.File, 1337 reg->Register.Index, 1338 ®->Indirect, 1339 index_limit); 1340 } else { 1341 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1342 } 1343 1344 if (reg->Dimension.Indirect) { 1345 vertex_index = get_indirect_index(bld, 1346 reg->Register.File, 1347 reg->Dimension.Index, 1348 ®->DimIndirect, 1349 PIPE_MAX_SHADER_INPUTS); 1350 } else { 1351 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1352 } 1353 1354 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) { 1355 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base, 1356 reg->Register.Indirect, 1357 attrib_index, 1358 swizzle_index); 1359 } else { 1360 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base, 1361 reg->Dimension.Indirect, 1362 vertex_index, 1363 reg->Register.Indirect, 1364 attrib_index, 1365 FALSE, 1366 swizzle_index); 1367 } 1368 1369 assert(res); 1370 if (tgsi_type_is_64bit(stype)) { 1371 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16); 1372 LLVMValueRef res2; 1373 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) { 1374 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base, 1375 reg->Register.Indirect, 1376 attrib_index, 1377 swizzle_index); 1378 } 1379 else { 1380 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base, 1381 reg->Dimension.Indirect, 1382 vertex_index, 1383 reg->Register.Indirect, 1384 attrib_index, 1385 FALSE, 1386 swizzle_index); 1387 } 1388 assert(res2); 1389 res = emit_fetch_64bit(bld_base, stype, res, res2); 1390 } else if (stype == TGSI_TYPE_UNSIGNED) { 1391 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1392 } else if (stype == TGSI_TYPE_SIGNED) { 1393 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1394 } 1395 1396 return res; 1397} 1398 1399 1400 1401static LLVMValueRef 1402emit_fetch_temporary( 1403 struct lp_build_tgsi_context * bld_base, 1404 const struct tgsi_full_src_register * reg, 1405 enum tgsi_opcode_type stype, 1406 unsigned swizzle_in) 1407{ 1408 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1409 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1410 LLVMBuilderRef builder = gallivm->builder; 1411 LLVMValueRef res; 1412 unsigned swizzle = swizzle_in & 0xffff; 1413 1414 if (reg->Register.Indirect) { 1415 LLVMValueRef indirect_index; 1416 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */ 1417 LLVMValueRef temps_array; 1418 LLVMTypeRef fptr_type; 1419 1420 indirect_index = get_indirect_index(bld, 1421 reg->Register.File, 1422 reg->Register.Index, 1423 ®->Indirect, 1424 bld->bld_base.info->file_max[reg->Register.File]); 1425 1426 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1427 indirect_index, 1428 swizzle, 1429 TRUE); 1430 if (tgsi_type_is_64bit(stype)) { 1431 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1432 indirect_index, 1433 swizzle_in >> 16, 1434 TRUE); 1435 } 1436 1437 /* cast temps_array pointer to float* */ 1438 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1439 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1440 1441 /* Gather values from the temporary register array */ 1442 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2); 1443 } 1444 else { 1445 LLVMValueRef temp_ptr; 1446 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 1447 res = LLVMBuildLoad(builder, temp_ptr, ""); 1448 1449 if (tgsi_type_is_64bit(stype)) { 1450 LLVMValueRef temp_ptr2, res2; 1451 1452 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16); 1453 res2 = LLVMBuildLoad(builder, temp_ptr2, ""); 1454 res = emit_fetch_64bit(bld_base, stype, res, res2); 1455 } 1456 } 1457 1458 if (stype == TGSI_TYPE_SIGNED || 1459 stype == TGSI_TYPE_UNSIGNED || 1460 stype == TGSI_TYPE_DOUBLE || 1461 stype == TGSI_TYPE_SIGNED64 || 1462 stype == TGSI_TYPE_UNSIGNED64) { 1463 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1464 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1465 } 1466 1467 return res; 1468} 1469 1470static LLVMValueRef 1471emit_fetch_system_value( 1472 struct lp_build_tgsi_context * bld_base, 1473 const struct tgsi_full_src_register * reg, 1474 enum tgsi_opcode_type stype, 1475 unsigned swizzle_in) 1476{ 1477 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1478 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1479 const struct tgsi_shader_info *info = bld->bld_base.info; 1480 LLVMBuilderRef builder = gallivm->builder; 1481 LLVMValueRef res; 1482 enum tgsi_opcode_type atype; // Actual type of the value 1483 unsigned swizzle = swizzle_in & 0xffff; 1484 1485 assert(!reg->Register.Indirect); 1486 1487 switch (info->system_value_semantic_name[reg->Register.Index]) { 1488 case TGSI_SEMANTIC_INSTANCEID: 1489 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 1490 atype = TGSI_TYPE_UNSIGNED; 1491 break; 1492 1493 case TGSI_SEMANTIC_VERTEXID: 1494 res = bld->system_values.vertex_id; 1495 atype = TGSI_TYPE_UNSIGNED; 1496 break; 1497 1498 case TGSI_SEMANTIC_VERTEXID_NOBASE: 1499 res = bld->system_values.vertex_id_nobase; 1500 atype = TGSI_TYPE_UNSIGNED; 1501 break; 1502 1503 case TGSI_SEMANTIC_BASEVERTEX: 1504 res = bld->system_values.basevertex; 1505 atype = TGSI_TYPE_UNSIGNED; 1506 break; 1507 1508 case TGSI_SEMANTIC_BASEINSTANCE: 1509 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance); 1510 atype = TGSI_TYPE_UNSIGNED; 1511 break; 1512 1513 case TGSI_SEMANTIC_PRIMID: 1514 res = bld->system_values.prim_id; 1515 atype = TGSI_TYPE_UNSIGNED; 1516 break; 1517 1518 case TGSI_SEMANTIC_INVOCATIONID: 1519 if (info->processor == PIPE_SHADER_TESS_CTRL) 1520 res = bld->system_values.invocation_id; 1521 else 1522 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); 1523 atype = TGSI_TYPE_UNSIGNED; 1524 break; 1525 1526 case TGSI_SEMANTIC_HELPER_INVOCATION: 1527 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), ""); 1528 atype = TGSI_TYPE_UNSIGNED; 1529 break; 1530 1531 case TGSI_SEMANTIC_THREAD_ID: 1532 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, ""); 1533 atype = TGSI_TYPE_UNSIGNED; 1534 break; 1535 1536 case TGSI_SEMANTIC_BLOCK_ID: 1537 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle)); 1538 atype = TGSI_TYPE_UNSIGNED; 1539 break; 1540 1541 case TGSI_SEMANTIC_GRID_SIZE: 1542 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle)); 1543 atype = TGSI_TYPE_UNSIGNED; 1544 break; 1545 1546 case TGSI_SEMANTIC_TESSCOORD: 1547 { 1548 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) }; 1549 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed"); 1550 res = LLVMBuildLoad(builder, array_indexed, "tess_coord"); 1551 } 1552 atype = TGSI_TYPE_FLOAT; 1553 break; 1554 1555 case TGSI_SEMANTIC_FACE: 1556 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing); 1557 atype = TGSI_TYPE_UNSIGNED; 1558 break; 1559 1560 case TGSI_SEMANTIC_DRAWID: 1561 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id); 1562 atype = TGSI_TYPE_UNSIGNED; 1563 break; 1564 1565 case TGSI_SEMANTIC_TESSOUTER: 1566 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type, 1567 bld->system_values.tess_outer, 1568 lp_build_const_int32(gallivm, swizzle_in)); 1569 atype = TGSI_TYPE_FLOAT; 1570 break; 1571 1572 case TGSI_SEMANTIC_TESSINNER: 1573 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type, 1574 bld->system_values.tess_inner, 1575 lp_build_const_int32(gallivm, swizzle_in)); 1576 atype = TGSI_TYPE_FLOAT; 1577 break; 1578 1579 case TGSI_SEMANTIC_VERTICESIN: 1580 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in); 1581 atype = TGSI_TYPE_UNSIGNED; 1582 break; 1583 1584 default: 1585 assert(!"unexpected semantic in emit_fetch_system_value"); 1586 res = bld_base->base.zero; 1587 atype = TGSI_TYPE_FLOAT; 1588 break; 1589 } 1590 1591 if (atype != stype) { 1592 if (stype == TGSI_TYPE_FLOAT) { 1593 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1594 } else if (stype == TGSI_TYPE_UNSIGNED) { 1595 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1596 } else if (stype == TGSI_TYPE_SIGNED) { 1597 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1598 } 1599 } 1600 1601 return res; 1602} 1603 1604/** 1605 * Register fetch with derivatives. 1606 */ 1607static void 1608emit_fetch_deriv( 1609 struct lp_build_tgsi_soa_context *bld, 1610 LLVMValueRef src, 1611 LLVMValueRef *res, 1612 LLVMValueRef *ddx, 1613 LLVMValueRef *ddy) 1614{ 1615 if (res) 1616 *res = src; 1617 1618 /* TODO: use interpolation coeffs for inputs */ 1619 1620 if (ddx) 1621 *ddx = lp_build_ddx(&bld->bld_base.base, src); 1622 1623 if (ddy) 1624 *ddy = lp_build_ddy(&bld->bld_base.base, src); 1625} 1626 1627/** 1628 * store an array of vec-length 64-bit into two arrays of vec_length floats 1629 * i.e. 1630 * value is d0, d1, d2, d3 etc. 1631 * each 64-bit has high and low pieces x, y 1632 * so gets stored into the separate channels as: 1633 * chan_ptr = d0.x, d1.x, d2.x, d3.x 1634 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y 1635 */ 1636static void 1637emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, 1638 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, 1639 LLVMValueRef value) 1640{ 1641 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1642 struct gallivm_state *gallivm = bld_base->base.gallivm; 1643 LLVMBuilderRef builder = gallivm->builder; 1644 struct lp_build_context *float_bld = &bld_base->base; 1645 unsigned i; 1646 LLVMValueRef temp, temp2; 1647 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; 1648 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; 1649 1650 for (i = 0; i < bld_base->base.type.length; i++) { 1651 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 1652 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 1653 } 1654 1655 temp = LLVMBuildShuffleVector(builder, value, 1656 LLVMGetUndef(LLVMTypeOf(value)), 1657 LLVMConstVector(shuffles, 1658 bld_base->base.type.length), 1659 ""); 1660 temp2 = LLVMBuildShuffleVector(builder, value, 1661 LLVMGetUndef(LLVMTypeOf(value)), 1662 LLVMConstVector(shuffles2, 1663 bld_base->base.type.length), 1664 ""); 1665 1666 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); 1667 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); 1668} 1669 1670static void 1671emit_store_output(struct lp_build_tgsi_context *bld_base, 1672 enum tgsi_opcode_type dtype, 1673 const struct tgsi_full_dst_register *reg, 1674 unsigned index, 1675 unsigned chan_index, 1676 LLVMValueRef indirect_index, 1677 LLVMValueRef value) 1678{ 1679 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1680 struct gallivm_state *gallivm = bld_base->base.gallivm; 1681 LLVMBuilderRef builder = gallivm->builder; 1682 struct lp_build_context *float_bld = &bld_base->base; 1683 1684 /* Outputs are always stored as floats */ 1685 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1686 1687 if (reg->Register.Indirect) { 1688 LLVMValueRef index_vec; /* indexes into the output registers */ 1689 LLVMValueRef outputs_array; 1690 LLVMTypeRef fptr_type; 1691 1692 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1693 indirect_index, 1694 chan_index, 1695 TRUE); 1696 1697 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1698 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); 1699 1700 /* Scatter store values into output registers */ 1701 emit_mask_scatter(bld, outputs_array, index_vec, value, 1702 &bld->exec_mask); 1703 } 1704 else { 1705 assert(LLVMTypeOf(value) == float_bld->vec_type); 1706 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1707 chan_index); 1708 1709 if (tgsi_type_is_64bit(dtype)) { 1710 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 1711 chan_index + 1); 1712 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2, 1713 value); 1714 } else 1715 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr); 1716 } 1717} 1718 1719static void 1720emit_store_tcs_output(struct lp_build_tgsi_context *bld_base, 1721 enum tgsi_opcode_type dtype, 1722 const struct tgsi_full_dst_register *reg, 1723 unsigned index, 1724 unsigned chan_index, 1725 LLVMValueRef indirect_index, 1726 LLVMValueRef value) 1727{ 1728 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1729 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1730 const struct tgsi_shader_info *info = bld->bld_base.info; 1731 LLVMValueRef attrib_index = NULL; 1732 LLVMValueRef vertex_index = NULL; 1733 LLVMValueRef channel_index = NULL; 1734 1735 if (reg->Register.Indirect) { 1736 /* 1737 * XXX: this is possibly not quite the right value, since file_max may be 1738 * larger than the max attrib index, due to it being the max of declared 1739 * inputs AND the max vertices per prim (which is 6 for tri adj). 1740 * It should however be safe to use (since we always allocate 1741 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit). 1742 */ 1743 int index_limit = info->file_max[reg->Register.File]; 1744 attrib_index = get_indirect_index(bld, 1745 reg->Register.File, 1746 reg->Register.Index, 1747 ®->Indirect, 1748 index_limit); 1749 } else { 1750 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1751 } 1752 1753 if (reg->Dimension.Indirect) { 1754 vertex_index = get_indirect_index(bld, 1755 reg->Register.File, 1756 reg->Dimension.Index, 1757 ®->DimIndirect, 1758 PIPE_MAX_SHADER_OUTPUTS); 1759 } else { 1760 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1761 } 1762 1763 channel_index = lp_build_const_int32(gallivm, chan_index); 1764 1765 assert(bld->tcs_iface->emit_store_output); 1766 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base, 1767 bld_base->info->output_semantic_name[reg->Register.Index], 1768 reg->Dimension.Indirect, 1769 vertex_index, 1770 reg->Register.Indirect, 1771 attrib_index, 1772 false, 1773 channel_index, 1774 value, 1775 mask_vec(bld_base)); 1776} 1777 1778static void 1779emit_store_temp(struct lp_build_tgsi_context *bld_base, 1780 enum tgsi_opcode_type dtype, 1781 const struct tgsi_full_dst_register *reg, 1782 unsigned index, 1783 unsigned chan_index, 1784 LLVMValueRef indirect_index, 1785 LLVMValueRef value) 1786{ 1787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1788 struct gallivm_state *gallivm = bld_base->base.gallivm; 1789 LLVMBuilderRef builder = gallivm->builder; 1790 struct lp_build_context *float_bld = &bld_base->base; 1791 1792 /* Temporaries are always stored as floats */ 1793 if (!tgsi_type_is_64bit(dtype)) 1794 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1795 else 1796 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); 1797 1798 if (reg->Register.Indirect) { 1799 LLVMValueRef index_vec; /* indexes into the temp registers */ 1800 LLVMValueRef temps_array; 1801 LLVMTypeRef fptr_type; 1802 1803 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1804 indirect_index, 1805 chan_index, 1806 TRUE); 1807 1808 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1809 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1810 1811 /* Scatter store values into temp registers */ 1812 emit_mask_scatter(bld, temps_array, index_vec, value, 1813 &bld->exec_mask); 1814 } 1815 else { 1816 LLVMValueRef temp_ptr; 1817 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); 1818 1819 if (tgsi_type_is_64bit(dtype)) { 1820 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld, 1821 reg->Register.Index, 1822 chan_index + 1); 1823 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2, 1824 value); 1825 } 1826 else 1827 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr); 1828 } 1829} 1830 1831static void 1832emit_store_address(struct lp_build_tgsi_context *bld_base, 1833 enum tgsi_opcode_type dtype, 1834 const struct tgsi_full_dst_register *reg, 1835 unsigned index, 1836 unsigned chan_index, 1837 LLVMValueRef indirect_index, 1838 LLVMValueRef value) 1839{ 1840 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1841 struct gallivm_state *gallivm = bld_base->base.gallivm; 1842 LLVMBuilderRef builder = gallivm->builder; 1843 struct lp_build_context *int_bld = &bld_base->int_bld; 1844 1845 assert(dtype == TGSI_TYPE_SIGNED); 1846 assert(LLVMTypeOf(value) == int_bld->vec_type); 1847 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); 1848 lp_exec_mask_store(&bld->exec_mask, int_bld, value, 1849 bld->addr[reg->Register.Index][chan_index]); 1850} 1851 1852/** 1853 * Register store. 1854 */ 1855static void 1856emit_store_chan( 1857 struct lp_build_tgsi_context *bld_base, 1858 const struct tgsi_full_instruction *inst, 1859 unsigned index, 1860 unsigned chan_index, 1861 LLVMValueRef value) 1862{ 1863 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1864 struct gallivm_state *gallivm = bld_base->base.gallivm; 1865 LLVMBuilderRef builder = gallivm->builder; 1866 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 1867 struct lp_build_context *float_bld = &bld_base->base; 1868 LLVMValueRef indirect_index = NULL; 1869 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1870 1871 /* 1872 * Apply saturation. 1873 * 1874 * It is always assumed to be float. 1875 */ 1876 if (inst->Instruction.Saturate) { 1877 assert(dtype == TGSI_TYPE_FLOAT || 1878 dtype == TGSI_TYPE_UNTYPED); 1879 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1880 value = lp_build_clamp_zero_one_nanzero(float_bld, value); 1881 } 1882 1883 if (reg->Register.Indirect) { 1884 /* 1885 * Currently the mesa/st doesn't generate indirect stores 1886 * to 64-bit values, it normally uses MOV to do indirect stores. 1887 */ 1888 assert(!tgsi_type_is_64bit(dtype)); 1889 indirect_index = get_indirect_index(bld, 1890 reg->Register.File, 1891 reg->Register.Index, 1892 ®->Indirect, 1893 bld->bld_base.info->file_max[reg->Register.File]); 1894 } else { 1895 assert(reg->Register.Index <= 1896 bld_base->info->file_max[reg->Register.File]); 1897 } 1898 1899 if (DEBUG_EXECUTION) { 1900 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); 1901 } 1902 1903 assert(bld_base->emit_store_reg_funcs[reg->Register.File]); 1904 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base, 1905 dtype, 1906 reg, 1907 index, 1908 chan_index, 1909 indirect_index, 1910 value); 1911 1912 (void)dtype; 1913} 1914 1915/* 1916 * Called at the beginning of the translation of each TGSI instruction, to 1917 * emit some debug code. 1918 */ 1919static void 1920emit_debug( 1921 struct lp_build_tgsi_context * bld_base, 1922 const struct tgsi_full_instruction * inst, 1923 const struct tgsi_opcode_info * info) 1924 1925{ 1926 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1927 1928 if (DEBUG_EXECUTION) { 1929 /* 1930 * Dump the TGSI instruction. 1931 */ 1932 1933 struct gallivm_state *gallivm = bld_base->base.gallivm; 1934 char buf[512]; 1935 buf[0] = '$'; 1936 buf[1] = ' '; 1937 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); 1938 lp_build_printf(gallivm, buf); 1939 1940 /* Dump the execution mask. 1941 */ 1942 if (bld->exec_mask.has_mask) { 1943 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); 1944 } 1945 } 1946} 1947 1948static void 1949emit_store( 1950 struct lp_build_tgsi_context * bld_base, 1951 const struct tgsi_full_instruction * inst, 1952 const struct tgsi_opcode_info * info, 1953 unsigned index, 1954 LLVMValueRef dst[4]) 1955 1956{ 1957 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1958 1959 unsigned writemask = inst->Dst[index].Register.WriteMask; 1960 while (writemask) { 1961 unsigned chan_index = u_bit_scan(&writemask); 1962 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 1963 continue; 1964 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]); 1965 } 1966} 1967 1968static unsigned 1969tgsi_to_pipe_tex_target(unsigned tgsi_target) 1970{ 1971 switch (tgsi_target) { 1972 case TGSI_TEXTURE_BUFFER: 1973 return PIPE_BUFFER; 1974 case TGSI_TEXTURE_1D: 1975 case TGSI_TEXTURE_SHADOW1D: 1976 return PIPE_TEXTURE_1D; 1977 case TGSI_TEXTURE_2D: 1978 case TGSI_TEXTURE_SHADOW2D: 1979 case TGSI_TEXTURE_2D_MSAA: 1980 return PIPE_TEXTURE_2D; 1981 case TGSI_TEXTURE_3D: 1982 return PIPE_TEXTURE_3D; 1983 case TGSI_TEXTURE_CUBE: 1984 case TGSI_TEXTURE_SHADOWCUBE: 1985 return PIPE_TEXTURE_CUBE; 1986 case TGSI_TEXTURE_RECT: 1987 case TGSI_TEXTURE_SHADOWRECT: 1988 return PIPE_TEXTURE_RECT; 1989 case TGSI_TEXTURE_1D_ARRAY: 1990 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1991 return PIPE_TEXTURE_1D_ARRAY; 1992 case TGSI_TEXTURE_2D_ARRAY: 1993 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1994 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1995 return PIPE_TEXTURE_2D_ARRAY; 1996 case TGSI_TEXTURE_CUBE_ARRAY: 1997 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1998 return PIPE_TEXTURE_CUBE_ARRAY; 1999 default: 2000 assert(0); 2001 return PIPE_BUFFER; 2002 } 2003} 2004 2005 2006static enum lp_sampler_lod_property 2007lp_build_lod_property( 2008 struct lp_build_tgsi_context *bld_base, 2009 const struct tgsi_full_instruction *inst, 2010 unsigned src_op) 2011{ 2012 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 2013 enum lp_sampler_lod_property lod_property; 2014 2015 /* 2016 * Not much we can do here. We could try catching inputs declared 2017 * with constant interpolation but not sure it's worth it - since for 2018 * TEX opcodes as well as FETCH/LD the lod comes from same reg as 2019 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just 2020 * like the constant/immediate recognition below. 2021 * What seems to be of more value would be to recognize temps holding 2022 * broadcasted scalars but no way we can do it. 2023 * Tried asking llvm but without any success (using LLVMIsConstant 2024 * even though this isn't exactly what we'd need), even as simple as 2025 * IMM[0] UINT32 (0,-1,0,0) 2026 * MOV TEMP[0] IMM[0].yyyy 2027 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] 2028 * doesn't work. 2029 * This means there's ZERO chance this will ever catch a scalar lod 2030 * with traditional tex opcodes as well as texel fetches, since the lod 2031 * comes from the same reg as coords (except some test shaders using 2032 * constant coords maybe). 2033 * There's at least hope for sample opcodes as well as size queries. 2034 */ 2035 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ || 2036 reg->Register.File == TGSI_FILE_CONSTANT || 2037 reg->Register.File == TGSI_FILE_IMMEDIATE) { 2038 lod_property = LP_SAMPLER_LOD_SCALAR; 2039 } 2040 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) { 2041 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2042 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2043 } 2044 else { 2045 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2046 } 2047 } 2048 else { 2049 /* never use scalar (per-quad) lod the results are just too wrong. */ 2050 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2051 } 2052 return lod_property; 2053} 2054 2055 2056/** 2057 * High-level instruction translators. 2058 */ 2059 2060static void 2061emit_tex( struct lp_build_tgsi_soa_context *bld, 2062 const struct tgsi_full_instruction *inst, 2063 enum lp_build_tex_modifier modifier, 2064 LLVMValueRef *texel, 2065 unsigned sampler_reg, 2066 enum lp_sampler_op_type sampler_op) 2067{ 2068 unsigned unit = inst->Src[sampler_reg].Register.Index; 2069 LLVMValueRef oow = NULL; 2070 LLVMValueRef lod = NULL; 2071 LLVMValueRef coords[5]; 2072 LLVMValueRef offsets[3] = { NULL }; 2073 struct lp_derivatives derivs; 2074 struct lp_sampler_params params; 2075 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2076 unsigned num_derivs, num_offsets, i; 2077 unsigned shadow_coord = 0; 2078 unsigned layer_coord = 0; 2079 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; 2080 2081 memset(¶ms, 0, sizeof(params)); 2082 2083 if (!bld->sampler) { 2084 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2085 for (i = 0; i < 4; i++) { 2086 texel[i] = bld->bld_base.base.undef; 2087 } 2088 return; 2089 } 2090 2091 switch (inst->Texture.Texture) { 2092 case TGSI_TEXTURE_1D_ARRAY: 2093 layer_coord = 1; 2094 FALLTHROUGH; 2095 case TGSI_TEXTURE_1D: 2096 num_offsets = 1; 2097 num_derivs = 1; 2098 break; 2099 case TGSI_TEXTURE_2D_ARRAY: 2100 layer_coord = 2; 2101 FALLTHROUGH; 2102 case TGSI_TEXTURE_2D: 2103 case TGSI_TEXTURE_RECT: 2104 num_offsets = 2; 2105 num_derivs = 2; 2106 break; 2107 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2108 layer_coord = 1; 2109 FALLTHROUGH; 2110 case TGSI_TEXTURE_SHADOW1D: 2111 shadow_coord = 2; 2112 num_offsets = 1; 2113 num_derivs = 1; 2114 break; 2115 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2116 layer_coord = 2; 2117 shadow_coord = 3; 2118 num_offsets = 2; 2119 num_derivs = 2; 2120 break; 2121 case TGSI_TEXTURE_SHADOW2D: 2122 case TGSI_TEXTURE_SHADOWRECT: 2123 shadow_coord = 2; 2124 num_offsets = 2; 2125 num_derivs = 2; 2126 break; 2127 case TGSI_TEXTURE_CUBE: 2128 num_offsets = 2; 2129 num_derivs = 3; 2130 break; 2131 case TGSI_TEXTURE_3D: 2132 num_offsets = 3; 2133 num_derivs = 3; 2134 break; 2135 case TGSI_TEXTURE_SHADOWCUBE: 2136 shadow_coord = 3; 2137 num_offsets = 2; 2138 num_derivs = 3; 2139 break; 2140 case TGSI_TEXTURE_CUBE_ARRAY: 2141 num_offsets = 2; 2142 num_derivs = 3; 2143 layer_coord = 3; 2144 break; 2145 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2146 num_offsets = 2; 2147 num_derivs = 3; 2148 layer_coord = 3; 2149 shadow_coord = 4; /* shadow coord special different reg */ 2150 break; 2151 case TGSI_TEXTURE_2D_MSAA: 2152 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2153 default: 2154 assert(0); 2155 return; 2156 } 2157 2158 /* Note lod and especially projected are illegal in a LOT of cases */ 2159 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2160 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2161 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) { 2162 lod = bld->bld_base.base.zero; 2163 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 2164 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { 2165 /* note that shadow cube array with bias/explicit lod does not exist */ 2166 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2167 } 2168 else { 2169 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2170 } 2171 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2172 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2173 } 2174 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2175 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2176 } 2177 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2178 } 2179 2180 if (sampler_op == LP_SAMPLER_OP_GATHER) { 2181 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX; 2182 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT); 2183 } 2184 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 2185 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2186 oow = lp_build_rcp(&bld->bld_base.base, oow); 2187 } 2188 2189 for (i = 0; i < num_derivs; i++) { 2190 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2191 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2192 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 2193 } 2194 for (i = num_derivs; i < 5; i++) { 2195 coords[i] = bld->bld_base.base.undef; 2196 } 2197 2198 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2199 if (layer_coord) { 2200 if (layer_coord == 3) { 2201 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2202 } 2203 else { 2204 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2205 } 2206 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2207 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); 2208 } 2209 /* Shadow coord occupies always 5th slot. */ 2210 if (shadow_coord) { 2211 sample_key |= LP_SAMPLER_SHADOW; 2212 if (shadow_coord == 4) { 2213 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2214 } 2215 else { 2216 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); 2217 } 2218 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2219 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); 2220 } 2221 2222 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2223 unsigned dim; 2224 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2225 for (dim = 0; dim < num_derivs; ++dim) { 2226 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); 2227 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); 2228 } 2229 params.derivs = &derivs; 2230 /* 2231 * could also check all src regs if constant but I doubt such 2232 * cases exist in practice. 2233 */ 2234 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2235 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2236 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2237 } 2238 else { 2239 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2240 } 2241 } 2242 else { 2243 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2244 } 2245 } 2246 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2247 2248 /* we don't handle the 4 offset version of tg4 */ 2249 if (inst->Texture.NumOffsets == 1) { 2250 unsigned dim; 2251 sample_key |= LP_SAMPLER_OFFSETS; 2252 for (dim = 0; dim < num_offsets; dim++) { 2253 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2254 } 2255 } 2256 2257 params.type = bld->bld_base.base.type; 2258 params.sample_key = sample_key; 2259 params.texture_index = unit; 2260 params.sampler_index = unit; 2261 params.context_ptr = bld->context_ptr; 2262 params.thread_data_ptr = bld->thread_data_ptr; 2263 params.coords = coords; 2264 params.offsets = offsets; 2265 params.lod = lod; 2266 params.texel = texel; 2267 2268 bld->sampler->emit_tex_sample(bld->sampler, 2269 bld->bld_base.base.gallivm, 2270 ¶ms); 2271} 2272 2273static void 2274emit_sample(struct lp_build_tgsi_soa_context *bld, 2275 const struct tgsi_full_instruction *inst, 2276 enum lp_build_tex_modifier modifier, 2277 boolean compare, 2278 enum lp_sampler_op_type sample_type, 2279 LLVMValueRef *texel) 2280{ 2281 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2282 unsigned texture_unit, sampler_unit; 2283 LLVMValueRef lod = NULL; 2284 LLVMValueRef coords[5]; 2285 LLVMValueRef offsets[3] = { NULL }; 2286 struct lp_derivatives derivs; 2287 struct lp_sampler_params params; 2288 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2289 2290 unsigned num_offsets, num_derivs, i; 2291 unsigned layer_coord = 0; 2292 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT; 2293 2294 memset(¶ms, 0, sizeof(params)); 2295 2296 if (!bld->sampler) { 2297 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2298 for (i = 0; i < 4; i++) { 2299 texel[i] = bld->bld_base.base.undef; 2300 } 2301 return; 2302 } 2303 2304 /* 2305 * unlike old-style tex opcodes the texture/sampler indices 2306 * always come from src1 and src2 respectively. 2307 */ 2308 texture_unit = inst->Src[1].Register.Index; 2309 sampler_unit = inst->Src[2].Register.Index; 2310 2311 /* 2312 * Note inst->Texture.Texture will contain the number of offsets, 2313 * however the target information is NOT there and comes from the 2314 * declared sampler views instead. 2315 */ 2316 switch (bld->sv[texture_unit].Resource) { 2317 case TGSI_TEXTURE_1D: 2318 num_offsets = 1; 2319 num_derivs = 1; 2320 break; 2321 case TGSI_TEXTURE_1D_ARRAY: 2322 layer_coord = 1; 2323 num_offsets = 1; 2324 num_derivs = 1; 2325 break; 2326 case TGSI_TEXTURE_2D: 2327 case TGSI_TEXTURE_RECT: 2328 num_offsets = 2; 2329 num_derivs = 2; 2330 break; 2331 case TGSI_TEXTURE_2D_ARRAY: 2332 layer_coord = 2; 2333 num_offsets = 2; 2334 num_derivs = 2; 2335 break; 2336 case TGSI_TEXTURE_CUBE: 2337 num_offsets = 2; 2338 num_derivs = 3; 2339 break; 2340 case TGSI_TEXTURE_3D: 2341 num_offsets = 3; 2342 num_derivs = 3; 2343 break; 2344 case TGSI_TEXTURE_CUBE_ARRAY: 2345 layer_coord = 3; 2346 num_offsets = 2; 2347 num_derivs = 3; 2348 break; 2349 default: 2350 assert(0); 2351 return; 2352 } 2353 2354 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2355 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2356 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2357 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2358 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2359 } 2360 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2361 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2362 } 2363 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2364 } 2365 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { 2366 /* XXX might be better to explicitly pass the level zero information */ 2367 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2368 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); 2369 } 2370 2371 for (i = 0; i < num_derivs; i++) { 2372 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2373 } 2374 for (i = num_derivs; i < 5; i++) { 2375 coords[i] = bld->bld_base.base.undef; 2376 } 2377 2378 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2379 if (layer_coord) { 2380 if (layer_coord == 3) 2381 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2382 else 2383 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2384 } 2385 /* Shadow coord occupies always 5th slot. */ 2386 if (compare) { 2387 sample_key |= LP_SAMPLER_SHADOW; 2388 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2389 } 2390 2391 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2392 unsigned dim; 2393 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2394 for (dim = 0; dim < num_derivs; ++dim) { 2395 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); 2396 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); 2397 } 2398 params.derivs = &derivs; 2399 /* 2400 * could also check all src regs if constant but I doubt such 2401 * cases exist in practice. 2402 */ 2403 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2404 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) { 2405 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2406 } 2407 else { 2408 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2409 } 2410 } 2411 else { 2412 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2413 } 2414 } 2415 2416 /* some advanced gather instructions (txgo) would require 4 offsets */ 2417 if (inst->Texture.NumOffsets == 1) { 2418 unsigned dim; 2419 sample_key |= LP_SAMPLER_OFFSETS; 2420 for (dim = 0; dim < num_offsets; dim++) { 2421 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2422 } 2423 } 2424 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2425 2426 params.type = bld->bld_base.base.type; 2427 params.sample_key = sample_key; 2428 params.texture_index = texture_unit; 2429 params.sampler_index = sampler_unit; 2430 params.context_ptr = bld->context_ptr; 2431 params.thread_data_ptr = bld->thread_data_ptr; 2432 params.coords = coords; 2433 params.offsets = offsets; 2434 params.lod = lod; 2435 params.texel = texel; 2436 2437 bld->sampler->emit_tex_sample(bld->sampler, 2438 bld->bld_base.base.gallivm, 2439 ¶ms); 2440 2441 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2442 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2443 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2444 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) { 2445 unsigned char swizzles[4]; 2446 swizzles[0] = inst->Src[1].Register.SwizzleX; 2447 swizzles[1] = inst->Src[1].Register.SwizzleY; 2448 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2449 swizzles[3] = inst->Src[1].Register.SwizzleW; 2450 2451 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2452 } 2453} 2454 2455static void 2456emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, 2457 const struct tgsi_full_instruction *inst, 2458 LLVMValueRef *texel, 2459 boolean is_samplei) 2460{ 2461 unsigned unit, target; 2462 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 2463 LLVMValueRef explicit_lod = NULL; 2464 LLVMValueRef coords[5]; 2465 LLVMValueRef offsets[3] = { NULL }; 2466 LLVMValueRef ms_index = NULL; 2467 struct lp_sampler_params params; 2468 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2469 unsigned dims, i; 2470 unsigned layer_coord = 0; 2471 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; 2472 2473 memset(¶ms, 0, sizeof(params)); 2474 2475 if (!bld->sampler) { 2476 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2477 for (i = 0; i < 4; i++) { 2478 texel[i] = coord_undef; 2479 } 2480 return; 2481 } 2482 2483 unit = inst->Src[1].Register.Index; 2484 2485 if (is_samplei) { 2486 target = bld->sv[unit].Resource; 2487 } 2488 else { 2489 target = inst->Texture.Texture; 2490 } 2491 2492 switch (target) { 2493 case TGSI_TEXTURE_1D: 2494 case TGSI_TEXTURE_BUFFER: 2495 dims = 1; 2496 break; 2497 case TGSI_TEXTURE_1D_ARRAY: 2498 layer_coord = 1; 2499 dims = 1; 2500 break; 2501 case TGSI_TEXTURE_2D: 2502 case TGSI_TEXTURE_RECT: 2503 case TGSI_TEXTURE_2D_MSAA: 2504 dims = 2; 2505 break; 2506 case TGSI_TEXTURE_2D_ARRAY: 2507 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2508 layer_coord = 2; 2509 dims = 2; 2510 break; 2511 case TGSI_TEXTURE_3D: 2512 dims = 3; 2513 break; 2514 default: 2515 assert(0); 2516 return; 2517 } 2518 2519 /* always have lod except for buffers and msaa targets ? */ 2520 if (target != TGSI_TEXTURE_BUFFER && 2521 target != TGSI_TEXTURE_2D_MSAA && 2522 target != TGSI_TEXTURE_2D_ARRAY_MSAA && 2523 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) { 2524 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2525 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2526 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2527 } 2528 2529 if (target == TGSI_TEXTURE_2D_MSAA || 2530 target == TGSI_TEXTURE_2D_ARRAY_MSAA) { 2531 sample_key |= LP_SAMPLER_FETCH_MS; 2532 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2533 } 2534 2535 /* 2536 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) 2537 * would be the sample index. 2538 */ 2539 2540 for (i = 0; i < dims; i++) { 2541 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2542 } 2543 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ 2544 for (i = dims; i < 5; i++) { 2545 coords[i] = coord_undef; 2546 } 2547 if (layer_coord) 2548 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2549 2550 if (inst->Texture.NumOffsets == 1) { 2551 unsigned dim; 2552 sample_key |= LP_SAMPLER_OFFSETS; 2553 for (dim = 0; dim < dims; dim++) { 2554 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2555 } 2556 } 2557 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2558 2559 params.type = bld->bld_base.base.type; 2560 params.sample_key = sample_key; 2561 params.texture_index = unit; 2562 /* 2563 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS 2564 * and trigger some assertions with d3d10 where the sampler view number 2565 * can exceed this. 2566 */ 2567 params.sampler_index = 0; 2568 params.context_ptr = bld->context_ptr; 2569 params.thread_data_ptr = bld->thread_data_ptr; 2570 params.coords = coords; 2571 params.offsets = offsets; 2572 params.derivs = NULL; 2573 params.lod = explicit_lod; 2574 params.texel = texel; 2575 params.ms_index = ms_index; 2576 2577 bld->sampler->emit_tex_sample(bld->sampler, 2578 bld->bld_base.base.gallivm, 2579 ¶ms); 2580 2581 if (is_samplei && 2582 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2583 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2584 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2585 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) { 2586 unsigned char swizzles[4]; 2587 swizzles[0] = inst->Src[1].Register.SwizzleX; 2588 swizzles[1] = inst->Src[1].Register.SwizzleY; 2589 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2590 swizzles[3] = inst->Src[1].Register.SwizzleW; 2591 2592 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2593 } 2594} 2595 2596static void 2597emit_size_query( struct lp_build_tgsi_soa_context *bld, 2598 const struct tgsi_full_instruction *inst, 2599 LLVMValueRef *sizes_out, 2600 boolean is_sviewinfo) 2601{ 2602 LLVMValueRef explicit_lod; 2603 enum lp_sampler_lod_property lod_property; 2604 unsigned has_lod; 2605 unsigned i; 2606 unsigned unit = inst->Src[1].Register.Index; 2607 unsigned target, pipe_target; 2608 struct lp_sampler_size_query_params params; 2609 2610 if (is_sviewinfo) { 2611 target = bld->sv[unit].Resource; 2612 } 2613 else { 2614 target = inst->Texture.Texture; 2615 } 2616 switch (target) { 2617 case TGSI_TEXTURE_BUFFER: 2618 case TGSI_TEXTURE_RECT: 2619 case TGSI_TEXTURE_SHADOWRECT: 2620 has_lod = 0; 2621 break; 2622 default: 2623 has_lod = 1; 2624 break; 2625 } 2626 2627 if (!bld->sampler) { 2628 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 2629 for (i = 0; i < 4; i++) 2630 sizes_out[i] = bld->bld_base.int_bld.undef; 2631 return; 2632 } 2633 2634 if (has_lod) { 2635 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); 2636 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2637 } 2638 else { 2639 explicit_lod = NULL; 2640 lod_property = LP_SAMPLER_LOD_SCALAR; 2641 } 2642 2643 2644 pipe_target = tgsi_to_pipe_tex_target(target); 2645 2646 params.int_type = bld->bld_base.int_bld.type; 2647 params.texture_unit = unit; 2648 params.target = pipe_target; 2649 params.context_ptr = bld->context_ptr; 2650 params.is_sviewinfo = TRUE; 2651 params.lod_property = lod_property; 2652 params.explicit_lod = explicit_lod; 2653 params.sizes_out = sizes_out; 2654 params.samples_only = false; 2655 2656 bld->sampler->emit_size_query(bld->sampler, 2657 bld->bld_base.base.gallivm, 2658 ¶ms); 2659} 2660 2661static boolean 2662near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 2663 int pc) 2664{ 2665 unsigned i; 2666 2667 for (i = 0; i < 5; i++) { 2668 enum tgsi_opcode opcode; 2669 2670 if (pc + i >= bld->bld_base.info->num_instructions) 2671 return TRUE; 2672 2673 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 2674 2675 if (opcode == TGSI_OPCODE_END) 2676 return TRUE; 2677 2678 if (opcode == TGSI_OPCODE_TEX || 2679 opcode == TGSI_OPCODE_TXP || 2680 opcode == TGSI_OPCODE_TXD || 2681 opcode == TGSI_OPCODE_TXB || 2682 opcode == TGSI_OPCODE_TXL || 2683 opcode == TGSI_OPCODE_TXF || 2684 opcode == TGSI_OPCODE_TXQ || 2685 opcode == TGSI_OPCODE_TEX2 || 2686 opcode == TGSI_OPCODE_TXB2 || 2687 opcode == TGSI_OPCODE_TXL2 || 2688 opcode == TGSI_OPCODE_SAMPLE || 2689 opcode == TGSI_OPCODE_SAMPLE_B || 2690 opcode == TGSI_OPCODE_SAMPLE_C || 2691 opcode == TGSI_OPCODE_SAMPLE_C_LZ || 2692 opcode == TGSI_OPCODE_SAMPLE_D || 2693 opcode == TGSI_OPCODE_SAMPLE_I || 2694 opcode == TGSI_OPCODE_SAMPLE_I_MS || 2695 opcode == TGSI_OPCODE_SAMPLE_L || 2696 opcode == TGSI_OPCODE_SVIEWINFO || 2697 opcode == TGSI_OPCODE_CAL || 2698 opcode == TGSI_OPCODE_IF || 2699 opcode == TGSI_OPCODE_UIF || 2700 opcode == TGSI_OPCODE_BGNLOOP || 2701 opcode == TGSI_OPCODE_SWITCH) 2702 return FALSE; 2703 } 2704 2705 return TRUE; 2706} 2707 2708 2709 2710/** 2711 * Kill fragment if any of the src register values are negative. 2712 */ 2713static void 2714emit_kill_if( 2715 struct lp_build_tgsi_soa_context *bld, 2716 const struct tgsi_full_instruction *inst, 2717 int pc) 2718{ 2719 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2720 const struct tgsi_full_src_register *reg = &inst->Src[0]; 2721 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 2722 LLVMValueRef mask; 2723 unsigned chan_index; 2724 2725 memset(&terms, 0, sizeof terms); 2726 2727 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2728 unsigned swizzle; 2729 2730 /* Unswizzle channel */ 2731 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 2732 2733 /* Check if the component has not been already tested. */ 2734 assert(swizzle < TGSI_NUM_CHANNELS); 2735 if( !terms[swizzle] ) 2736 /* TODO: change the comparison operator instead of setting the sign */ 2737 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 2738 } 2739 2740 mask = NULL; 2741 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2742 if(terms[chan_index]) { 2743 LLVMValueRef chan_mask; 2744 2745 /* 2746 * If term < 0 then mask = 0 else mask = ~0. 2747 */ 2748 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 2749 2750 if(mask) 2751 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 2752 else 2753 mask = chan_mask; 2754 } 2755 } 2756 2757 if (bld->exec_mask.has_mask) { 2758 LLVMValueRef invmask; 2759 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2760 mask = LLVMBuildOr(builder, mask, invmask, ""); 2761 } 2762 2763 lp_build_mask_update(bld->mask, mask); 2764 if (!near_end_of_shader(bld, pc)) 2765 lp_build_mask_check(bld->mask); 2766} 2767 2768 2769/** 2770 * Unconditional fragment kill. 2771 * The only predication is the execution mask which will apply if 2772 * we're inside a loop or conditional. 2773 */ 2774static void 2775emit_kill(struct lp_build_tgsi_soa_context *bld, 2776 int pc) 2777{ 2778 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2779 LLVMValueRef mask; 2780 2781 /* For those channels which are "alive", disable fragment shader 2782 * execution. 2783 */ 2784 if (bld->exec_mask.has_mask) { 2785 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2786 } 2787 else { 2788 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 2789 mask = zero; 2790 } 2791 2792 lp_build_mask_update(bld->mask, mask); 2793 2794 if (!near_end_of_shader(bld, pc)) 2795 lp_build_mask_check(bld->mask); 2796} 2797 2798 2799/** 2800 * Emit code which will dump the value of all the temporary registers 2801 * to stdout. 2802 */ 2803static void 2804emit_dump_file(struct lp_build_tgsi_soa_context *bld, 2805 unsigned file) 2806{ 2807 const struct tgsi_shader_info *info = bld->bld_base.info; 2808 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2809 LLVMBuilderRef builder = gallivm->builder; 2810 LLVMValueRef reg_ptr; 2811 int index; 2812 int max_index = info->file_max[file]; 2813 2814 /* 2815 * Some register files, particularly constants, can be very large, 2816 * and dumping everything could make this unusably slow. 2817 */ 2818 max_index = MIN2(max_index, 32); 2819 2820 for (index = 0; index <= max_index; index++) { 2821 LLVMValueRef res; 2822 unsigned mask; 2823 int chan; 2824 2825 if (index < 8 * sizeof(unsigned) && 2826 (info->file_mask[file] & (1u << index)) == 0) { 2827 /* This was not declared.*/ 2828 continue; 2829 } 2830 2831 if (file == TGSI_FILE_INPUT) { 2832 mask = info->input_usage_mask[index]; 2833 } else { 2834 mask = TGSI_WRITEMASK_XYZW; 2835 } 2836 2837 for (chan = 0; chan < 4; chan++) { 2838 if ((mask & (1 << chan)) == 0) { 2839 /* This channel is not used.*/ 2840 continue; 2841 } 2842 2843 if (file == TGSI_FILE_CONSTANT) { 2844 struct tgsi_full_src_register reg; 2845 memset(®, 0, sizeof reg); 2846 reg.Register.File = file; 2847 reg.Register.Index = index; 2848 reg.Register.SwizzleX = 0; 2849 reg.Register.SwizzleY = 1; 2850 reg.Register.SwizzleZ = 2; 2851 reg.Register.SwizzleW = 3; 2852 2853 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); 2854 if (!res) { 2855 continue; 2856 } 2857 } else if (file == TGSI_FILE_INPUT) { 2858 res = bld->inputs[index][chan]; 2859 if (!res) { 2860 continue; 2861 } 2862 } else if (file == TGSI_FILE_TEMPORARY) { 2863 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); 2864 assert(reg_ptr); 2865 res = LLVMBuildLoad(builder, reg_ptr, ""); 2866 } else if (file == TGSI_FILE_OUTPUT) { 2867 reg_ptr = lp_get_output_ptr(bld, index, chan); 2868 assert(reg_ptr); 2869 res = LLVMBuildLoad(builder, reg_ptr, ""); 2870 } else { 2871 assert(0); 2872 continue; 2873 } 2874 2875 emit_dump_reg(gallivm, file, index, chan, res); 2876 } 2877 } 2878} 2879 2880 2881 2882void 2883lp_emit_declaration_soa( 2884 struct lp_build_tgsi_context *bld_base, 2885 const struct tgsi_full_declaration *decl) 2886{ 2887 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2888 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2889 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 2890 const unsigned first = decl->Range.First; 2891 const unsigned last = decl->Range.Last; 2892 unsigned idx, i; 2893 2894 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 2895 2896 switch (decl->Declaration.File) { 2897 case TGSI_FILE_TEMPORARY: 2898 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 2899 assert(last < LP_MAX_INLINED_TEMPS); 2900 for (idx = first; idx <= last; ++idx) { 2901 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2902 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 2903 } 2904 } 2905 break; 2906 2907 case TGSI_FILE_OUTPUT: 2908 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 2909 for (idx = first; idx <= last; ++idx) { 2910 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2911 bld->outputs[idx][i] = lp_build_alloca(gallivm, 2912 vec_type, "output"); 2913 } 2914 } 2915 break; 2916 2917 case TGSI_FILE_ADDRESS: 2918 /* ADDR registers are only allocated with an integer LLVM IR type, 2919 * as they are guaranteed to always have integers. 2920 * XXX: Not sure if this exception is worthwhile (or the whole idea of 2921 * an ADDR register for that matter). 2922 */ 2923 assert(last < LP_MAX_TGSI_ADDRS); 2924 for (idx = first; idx <= last; ++idx) { 2925 assert(idx < LP_MAX_TGSI_ADDRS); 2926 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2927 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 2928 } 2929 break; 2930 2931 case TGSI_FILE_SAMPLER_VIEW: 2932 /* 2933 * The target stored here MUST match whatever there actually 2934 * is in the set sampler views (what about return type?). 2935 */ 2936 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); 2937 for (idx = first; idx <= last; ++idx) { 2938 bld->sv[idx] = decl->SamplerView; 2939 } 2940 break; 2941 2942 case TGSI_FILE_CONSTANT: 2943 { 2944 /* 2945 * We could trivially fetch the per-buffer pointer when fetching the 2946 * constant, relying on llvm to figure out it's always the same pointer 2947 * anyway. However, doing so results in a huge (more than factor of 10) 2948 * slowdown in llvm compilation times for some (but not all) shaders 2949 * (more specifically, the IR optimization spends way more time in 2950 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. 2951 */ 2952 unsigned idx2D = decl->Dim.Index2D; 2953 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); 2954 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); 2955 bld->consts[idx2D] = 2956 lp_build_array_get(gallivm, bld->consts_ptr, index2D); 2957 bld->consts_sizes[idx2D] = 2958 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); 2959 } 2960 break; 2961 case TGSI_FILE_BUFFER: 2962 { 2963 unsigned idx = decl->Range.First; 2964 LLVMValueRef index = lp_build_const_int32(gallivm, idx); 2965 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS); 2966 bld->ssbos[idx] = 2967 lp_build_array_get(gallivm, bld->ssbo_ptr, index); 2968 bld->ssbo_sizes[idx] = 2969 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index); 2970 2971 } 2972 break; 2973 case TGSI_FILE_MEMORY: 2974 break; 2975 default: 2976 /* don't need to declare other vars */ 2977 break; 2978 } 2979} 2980 2981 2982void lp_emit_immediate_soa( 2983 struct lp_build_tgsi_context *bld_base, 2984 const struct tgsi_full_immediate *imm) 2985{ 2986 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2987 struct gallivm_state * gallivm = bld_base->base.gallivm; 2988 LLVMValueRef imms[4]; 2989 unsigned i; 2990 const uint size = imm->Immediate.NrTokens - 1; 2991 assert(size <= 4); 2992 switch (imm->Immediate.DataType) { 2993 case TGSI_IMM_FLOAT32: 2994 for( i = 0; i < size; ++i ) 2995 imms[i] = 2996 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 2997 2998 break; 2999 case TGSI_IMM_FLOAT64: 3000 case TGSI_IMM_UINT64: 3001 case TGSI_IMM_INT64: 3002 case TGSI_IMM_UINT32: 3003 for( i = 0; i < size; ++i ) { 3004 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 3005 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 3006 } 3007 3008 break; 3009 case TGSI_IMM_INT32: 3010 for( i = 0; i < size; ++i ) { 3011 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 3012 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 3013 } 3014 3015 break; 3016 } 3017 for( i = size; i < 4; ++i ) 3018 imms[i] = bld_base->base.undef; 3019 3020 if (bld->use_immediates_array) { 3021 unsigned index = bld->num_immediates; 3022 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3023 LLVMBuilderRef builder = gallivm->builder; 3024 LLVMValueRef gep[2]; 3025 gep[0] = lp_build_const_int32(gallivm, 0); 3026 3027 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); 3028 for (i = 0; i < 4; ++i ) { 3029 gep[1] = lp_build_const_int32(gallivm, index * 4 + i); 3030 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3031 bld->imms_array, gep, 2, ""); 3032 LLVMBuildStore(builder, imms[i], imm_ptr); 3033 } 3034 } else { 3035 /* simply copy the immediate values into the next immediates[] slot */ 3036 unsigned i; 3037 assert(imm->Immediate.NrTokens - 1 <= 4); 3038 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); 3039 3040 for(i = 0; i < 4; ++i ) 3041 bld->immediates[bld->num_immediates][i] = imms[i]; 3042 3043 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3044 unsigned index = bld->num_immediates; 3045 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3046 LLVMBuilderRef builder = gallivm->builder; 3047 LLVMValueRef gep[2]; 3048 gep[0] = lp_build_const_int32(gallivm, 0); 3049 for (i = 0; i < 4; ++i ) { 3050 gep[1] = lp_build_const_int32(gallivm, index * 4 + i); 3051 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3052 bld->imms_array, gep, 2, ""); 3053 LLVMBuildStore(builder, 3054 bld->immediates[index][i], 3055 imm_ptr); 3056 } 3057 } 3058 } 3059 3060 bld->num_immediates++; 3061} 3062 3063static void 3064ddx_emit( 3065 const struct lp_build_tgsi_action * action, 3066 struct lp_build_tgsi_context * bld_base, 3067 struct lp_build_emit_data * emit_data) 3068{ 3069 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3070 3071 emit_fetch_deriv(bld, emit_data->args[0], NULL, 3072 &emit_data->output[emit_data->chan], NULL); 3073} 3074 3075static void 3076ddy_emit( 3077 const struct lp_build_tgsi_action * action, 3078 struct lp_build_tgsi_context * bld_base, 3079 struct lp_build_emit_data * emit_data) 3080{ 3081 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3082 3083 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 3084 &emit_data->output[emit_data->chan]); 3085} 3086 3087static void 3088kill_emit( 3089 const struct lp_build_tgsi_action * action, 3090 struct lp_build_tgsi_context * bld_base, 3091 struct lp_build_emit_data * emit_data) 3092{ 3093 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3094 3095 emit_kill(bld, bld_base->pc - 1); 3096} 3097 3098static void 3099kill_if_emit( 3100 const struct lp_build_tgsi_action * action, 3101 struct lp_build_tgsi_context * bld_base, 3102 struct lp_build_emit_data * emit_data) 3103{ 3104 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3105 3106 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); 3107} 3108 3109static void 3110tex_emit( 3111 const struct lp_build_tgsi_action * action, 3112 struct lp_build_tgsi_context * bld_base, 3113 struct lp_build_emit_data * emit_data) 3114{ 3115 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3116 3117 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3118 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3119} 3120 3121static void 3122tex2_emit( 3123 const struct lp_build_tgsi_action * action, 3124 struct lp_build_tgsi_context * bld_base, 3125 struct lp_build_emit_data * emit_data) 3126{ 3127 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3128 3129 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3130 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3131} 3132 3133static void 3134txb_emit( 3135 const struct lp_build_tgsi_action * action, 3136 struct lp_build_tgsi_context * bld_base, 3137 struct lp_build_emit_data * emit_data) 3138{ 3139 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3140 3141 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3142 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3143} 3144 3145static void 3146txb2_emit( 3147 const struct lp_build_tgsi_action * action, 3148 struct lp_build_tgsi_context * bld_base, 3149 struct lp_build_emit_data * emit_data) 3150{ 3151 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3152 3153 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3154 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3155} 3156 3157static void 3158txd_emit( 3159 const struct lp_build_tgsi_action * action, 3160 struct lp_build_tgsi_context * bld_base, 3161 struct lp_build_emit_data * emit_data) 3162{ 3163 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3164 3165 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3166 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); 3167} 3168 3169static void 3170txl_emit( 3171 const struct lp_build_tgsi_action * action, 3172 struct lp_build_tgsi_context * bld_base, 3173 struct lp_build_emit_data * emit_data) 3174{ 3175 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3176 3177 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3178 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3179} 3180 3181static void 3182txl2_emit( 3183 const struct lp_build_tgsi_action * action, 3184 struct lp_build_tgsi_context * bld_base, 3185 struct lp_build_emit_data * emit_data) 3186{ 3187 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3188 3189 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3190 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3191} 3192 3193static void 3194txp_emit( 3195 const struct lp_build_tgsi_action * action, 3196 struct lp_build_tgsi_context * bld_base, 3197 struct lp_build_emit_data * emit_data) 3198{ 3199 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3200 3201 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 3202 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3203} 3204 3205static void 3206tg4_emit( 3207 const struct lp_build_tgsi_action * action, 3208 struct lp_build_tgsi_context * bld_base, 3209 struct lp_build_emit_data * emit_data) 3210{ 3211 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3212 3213 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3214 emit_data->output, 2, LP_SAMPLER_OP_GATHER); 3215} 3216 3217static void 3218lodq_emit( 3219 const struct lp_build_tgsi_action * action, 3220 struct lp_build_tgsi_context * bld_base, 3221 struct lp_build_emit_data * emit_data) 3222{ 3223 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3224 3225 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3226 emit_data->output, 1, LP_SAMPLER_OP_LODQ); 3227} 3228 3229static void 3230txq_emit( 3231 const struct lp_build_tgsi_action * action, 3232 struct lp_build_tgsi_context * bld_base, 3233 struct lp_build_emit_data * emit_data) 3234{ 3235 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3236 3237 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); 3238} 3239 3240static void 3241txf_emit( 3242 const struct lp_build_tgsi_action * action, 3243 struct lp_build_tgsi_context * bld_base, 3244 struct lp_build_emit_data * emit_data) 3245{ 3246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3247 3248 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); 3249} 3250 3251static void 3252sample_i_emit( 3253 const struct lp_build_tgsi_action * action, 3254 struct lp_build_tgsi_context * bld_base, 3255 struct lp_build_emit_data * emit_data) 3256{ 3257 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3258 3259 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); 3260} 3261 3262static void 3263sample_emit( 3264 const struct lp_build_tgsi_action * action, 3265 struct lp_build_tgsi_context * bld_base, 3266 struct lp_build_emit_data * emit_data) 3267{ 3268 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3269 3270 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3271 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3272} 3273 3274static void 3275sample_b_emit( 3276 const struct lp_build_tgsi_action * action, 3277 struct lp_build_tgsi_context * bld_base, 3278 struct lp_build_emit_data * emit_data) 3279{ 3280 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3281 3282 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3283 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3284} 3285 3286static void 3287sample_c_emit( 3288 const struct lp_build_tgsi_action * action, 3289 struct lp_build_tgsi_context * bld_base, 3290 struct lp_build_emit_data * emit_data) 3291{ 3292 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3293 3294 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3295 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3296} 3297 3298static void 3299sample_c_lz_emit( 3300 const struct lp_build_tgsi_action * action, 3301 struct lp_build_tgsi_context * bld_base, 3302 struct lp_build_emit_data * emit_data) 3303{ 3304 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3305 3306 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, 3307 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3308} 3309 3310static void 3311sample_d_emit( 3312 const struct lp_build_tgsi_action * action, 3313 struct lp_build_tgsi_context * bld_base, 3314 struct lp_build_emit_data * emit_data) 3315{ 3316 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3317 3318 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3319 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3320} 3321 3322static void 3323sample_l_emit( 3324 const struct lp_build_tgsi_action * action, 3325 struct lp_build_tgsi_context * bld_base, 3326 struct lp_build_emit_data * emit_data) 3327{ 3328 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3329 3330 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3331 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3332} 3333 3334static void 3335gather4_emit( 3336 const struct lp_build_tgsi_action * action, 3337 struct lp_build_tgsi_context * bld_base, 3338 struct lp_build_emit_data * emit_data) 3339{ 3340 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3341 3342 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3343 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output); 3344} 3345 3346static void 3347sviewinfo_emit( 3348 const struct lp_build_tgsi_action * action, 3349 struct lp_build_tgsi_context * bld_base, 3350 struct lp_build_emit_data * emit_data) 3351{ 3352 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3353 3354 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); 3355} 3356 3357static void 3358lod_emit( 3359 const struct lp_build_tgsi_action * action, 3360 struct lp_build_tgsi_context * bld_base, 3361 struct lp_build_emit_data * emit_data) 3362{ 3363 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3364 3365 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3366 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); 3367} 3368 3369static void target_to_dims_layer(unsigned target, 3370 unsigned *dims, 3371 unsigned *layer_coord) 3372{ 3373 *layer_coord = 0; 3374 switch (target) { 3375 case TGSI_TEXTURE_1D: 3376 case TGSI_TEXTURE_BUFFER: 3377 *dims = 1; 3378 break; 3379 case TGSI_TEXTURE_1D_ARRAY: 3380 *layer_coord = 1; 3381 *dims = 1; 3382 break; 3383 case TGSI_TEXTURE_2D: 3384 case TGSI_TEXTURE_RECT: 3385 *dims = 2; 3386 break; 3387 case TGSI_TEXTURE_2D_ARRAY: 3388 *layer_coord = 2; 3389 *dims = 2; 3390 break; 3391 case TGSI_TEXTURE_3D: 3392 case TGSI_TEXTURE_CUBE: 3393 case TGSI_TEXTURE_CUBE_ARRAY: 3394 *dims = 3; 3395 break; 3396 default: 3397 assert(0); 3398 *dims = 0; 3399 return; 3400 } 3401} 3402 3403static void 3404img_load_emit( 3405 const struct lp_build_tgsi_action * action, 3406 struct lp_build_tgsi_context * bld_base, 3407 struct lp_build_emit_data * emit_data) 3408{ 3409 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3410 struct lp_img_params params; 3411 LLVMValueRef coords[5]; 3412 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 3413 unsigned dims; 3414 unsigned target = emit_data->inst->Memory.Texture; 3415 unsigned layer_coord; 3416 3417 target_to_dims_layer(target, &dims, &layer_coord); 3418 3419 for (unsigned i = 0; i < dims; i++) { 3420 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); 3421 } 3422 for (unsigned i = dims; i < 5; i++) { 3423 coords[i] = coord_undef; 3424 } 3425 if (layer_coord) 3426 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord); 3427 3428 memset(¶ms, 0, sizeof(params)); 3429 3430 params.type = bld->bld_base.base.type; 3431 params.context_ptr = bld->context_ptr; 3432 params.thread_data_ptr = bld->thread_data_ptr; 3433 params.coords = coords; 3434 params.outdata = emit_data->output; 3435 params.target = tgsi_to_pipe_tex_target(target); 3436 params.image_index = emit_data->inst->Src[0].Register.Index; 3437 params.img_op = LP_IMG_LOAD; 3438 bld->image->emit_op(bld->image, 3439 bld->bld_base.base.gallivm, 3440 ¶ms); 3441} 3442 3443static void 3444load_emit( 3445 const struct lp_build_tgsi_action * action, 3446 struct lp_build_tgsi_context * bld_base, 3447 struct lp_build_emit_data * emit_data) 3448{ 3449 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3450 struct gallivm_state * gallivm = bld_base->base.gallivm; 3451 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3452 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; 3453 unsigned buf = bufreg->Register.Index; 3454 assert(bufreg->Register.File == TGSI_FILE_BUFFER || 3455 bufreg->Register.File == TGSI_FILE_IMAGE || 3456 bufreg->Register.File == TGSI_FILE_MEMORY || 3457 bufreg->Register.File == TGSI_FILE_CONSTBUF); 3458 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; 3459 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3460 3461 if (bufreg->Register.File == TGSI_FILE_IMAGE) { 3462 img_load_emit(action, bld_base, emit_data); 3463 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) { 3464 LLVMValueRef consts_ptr = bld->consts[buf]; 3465 LLVMValueRef num_consts = bld->consts_sizes[buf]; 3466 3467 LLVMValueRef indirect_index; 3468 LLVMValueRef overflow_mask; 3469 3470 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0); 3471 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4); 3472 3473 /* All fetches are from the same constant buffer, so 3474 * we need to propagate the size to a vector to do a 3475 * vector comparison */ 3476 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); 3477 3478 /* Gather values from the constant buffer */ 3479 unsigned chan_index; 3480 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { 3481 /* Construct a boolean vector telling us which channels 3482 * overflow the bound constant buffer */ 3483 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, 3484 indirect_index, num_consts); 3485 3486 /* index_vec = indirect_index * 4 */ 3487 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 3488 index_vec = lp_build_add(uint_bld, index_vec, 3489 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); 3490 3491 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL); 3492 } 3493 } else if (0) { 3494 /* for indirect support with ARB_gpu_shader5 */ 3495 } else { 3496 LLVMValueRef index; 3497 LLVMValueRef scalar, scalar_ptr; 3498 unsigned chan_index; 3499 3500 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0); 3501 index = lp_build_shr_imm(uint_bld, index, 2); 3502 3503 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf]; 3504 3505 LLVMValueRef ssbo_limit = NULL; 3506 3507 if (!is_shared) { 3508 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); 3509 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); 3510 } 3511 3512 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { 3513 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); 3514 3515 LLVMValueRef exec_mask = mask_vec(bld_base); 3516 if (!is_shared) { 3517 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); 3518 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); 3519 } 3520 3521 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, ""); 3522 struct lp_build_loop_state loop_state; 3523 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); 3524 3525 struct lp_build_if_state ifthen; 3526 LLVMValueRef cond, temp_res; 3527 3528 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, 3529 loop_state.counter, ""); 3530 3531 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); 3532 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); 3533 3534 lp_build_if(&ifthen, gallivm, cond); 3535 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index); 3536 3537 temp_res = LLVMBuildLoad(builder, result, ""); 3538 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); 3539 LLVMBuildStore(builder, temp_res, result); 3540 lp_build_else(&ifthen); 3541 temp_res = LLVMBuildLoad(builder, result, ""); 3542 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); 3543 LLVMBuildStore(builder, temp_res, result); 3544 lp_build_endif(&ifthen); 3545 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), 3546 NULL, LLVMIntUGE); 3547 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, ""); 3548 } 3549 } 3550} 3551 3552static void 3553img_store_emit( 3554 const struct lp_build_tgsi_action * action, 3555 struct lp_build_tgsi_context * bld_base, 3556 struct lp_build_emit_data * emit_data) 3557{ 3558 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3559 struct lp_img_params params; 3560 LLVMValueRef coords[5]; 3561 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 3562 unsigned dims; 3563 unsigned target = emit_data->inst->Memory.Texture; 3564 unsigned layer_coord; 3565 3566 target_to_dims_layer(target, &dims, &layer_coord); 3567 for (unsigned i = 0; i < dims; i++) { 3568 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i); 3569 } 3570 for (unsigned i = dims; i < 5; i++) { 3571 coords[i] = coord_undef; 3572 } 3573 if (layer_coord) 3574 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord); 3575 memset(¶ms, 0, sizeof(params)); 3576 3577 params.type = bld->bld_base.base.type; 3578 params.context_ptr = bld->context_ptr; 3579 params.thread_data_ptr = bld->thread_data_ptr; 3580 params.coords = coords; 3581 params.outdata = NULL; 3582 params.exec_mask = mask_vec(bld_base); 3583 params.target = tgsi_to_pipe_tex_target(target); 3584 params.image_index = emit_data->inst->Dst[0].Register.Index; 3585 params.img_op = LP_IMG_STORE; 3586 for (unsigned i = 0; i < 4; i++) 3587 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); 3588 3589 bld->image->emit_op(bld->image, 3590 bld->bld_base.base.gallivm, 3591 ¶ms); 3592} 3593 3594static void 3595store_emit( 3596 const struct lp_build_tgsi_action * action, 3597 struct lp_build_tgsi_context * bld_base, 3598 struct lp_build_emit_data * emit_data) 3599{ 3600 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3601 struct gallivm_state * gallivm = bld_base->base.gallivm; 3602 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3603 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3604 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0]; 3605 unsigned buf = bufreg->Register.Index; 3606 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY); 3607 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; 3608 3609 if (bufreg->Register.File == TGSI_FILE_IMAGE) { 3610 img_store_emit(action, bld_base, emit_data); 3611 } else if (0) { 3612 3613 } else { 3614 LLVMValueRef index; /* index into the const buffer */ 3615 LLVMValueRef scalar_ptr; 3616 LLVMValueRef value; 3617 unsigned chan_index; 3618 3619 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0); 3620 index = lp_build_shr_imm(uint_bld, index, 2); 3621 3622 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf]; 3623 3624 LLVMValueRef ssbo_limit = NULL; 3625 3626 if (!is_shared) { 3627 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); 3628 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); 3629 } 3630 3631 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { 3632 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); 3633 3634 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index); 3635 3636 LLVMValueRef exec_mask = mask_vec(bld_base); 3637 if (!is_shared) { 3638 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); 3639 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); 3640 } 3641 3642 struct lp_build_loop_state loop_state; 3643 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); 3644 3645 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value, 3646 loop_state.counter, ""); 3647 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); 3648 3649 struct lp_build_if_state ifthen; 3650 LLVMValueRef cond; 3651 3652 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, 3653 loop_state.counter, ""); 3654 3655 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); 3656 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); 3657 lp_build_if(&ifthen, gallivm, cond); 3658 3659 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr); 3660 3661 lp_build_endif(&ifthen); 3662 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), 3663 NULL, LLVMIntUGE); 3664 } 3665 } 3666} 3667 3668static void 3669resq_emit( 3670 const struct lp_build_tgsi_action * action, 3671 struct lp_build_tgsi_context * bld_base, 3672 struct lp_build_emit_data * emit_data) 3673{ 3674 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3675 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3676 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; 3677 3678 unsigned buf = bufreg->Register.Index; 3679 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); 3680 3681 if (bufreg->Register.File == TGSI_FILE_IMAGE) { 3682 unsigned target = emit_data->inst->Memory.Texture; 3683 struct lp_sampler_size_query_params params = { 0 }; 3684 params.int_type = bld->bld_base.int_bld.type; 3685 params.texture_unit = buf; 3686 params.target = tgsi_to_pipe_tex_target(target); 3687 params.context_ptr = bld->context_ptr; 3688 params.sizes_out = emit_data->output; 3689 3690 bld->image->emit_size_query(bld->image, 3691 bld->bld_base.base.gallivm, 3692 ¶ms); 3693 } else { 3694 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf]; 3695 3696 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo); 3697 } 3698} 3699 3700static void 3701img_atomic_emit( 3702 const struct lp_build_tgsi_action * action, 3703 struct lp_build_tgsi_context * bld_base, 3704 struct lp_build_emit_data * emit_data, 3705 LLVMAtomicRMWBinOp op) 3706{ 3707 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3708 struct lp_img_params params; 3709 LLVMValueRef coords[5]; 3710 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 3711 unsigned dims; 3712 unsigned layer_coord; 3713 unsigned target = emit_data->inst->Memory.Texture; 3714 3715 target_to_dims_layer(target, &dims, &layer_coord); 3716 3717 for (unsigned i = 0; i < dims; i++) { 3718 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); 3719 } 3720 for (unsigned i = dims; i < 5; i++) { 3721 coords[i] = coord_undef; 3722 } 3723 if (layer_coord) 3724 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord); 3725 memset(¶ms, 0, sizeof(params)); 3726 3727 params.type = bld->bld_base.base.type; 3728 params.context_ptr = bld->context_ptr; 3729 params.thread_data_ptr = bld->thread_data_ptr; 3730 params.exec_mask = mask_vec(bld_base); 3731 params.image_index = emit_data->inst->Src[0].Register.Index; 3732 params.coords = coords; 3733 params.target = tgsi_to_pipe_tex_target(target); 3734 params.op = op; 3735 params.outdata = emit_data->output; 3736 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC; 3737 3738 for (unsigned i = 0; i < 4; i++) 3739 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i); 3740 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 3741 for (unsigned i = 0; i < 4; i++) 3742 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i); 3743 } 3744 bld->image->emit_op(bld->image, 3745 bld->bld_base.base.gallivm, 3746 ¶ms); 3747} 3748 3749static void 3750atomic_emit( 3751 const struct lp_build_tgsi_action * action, 3752 struct lp_build_tgsi_context * bld_base, 3753 struct lp_build_emit_data * emit_data) 3754{ 3755 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3756 struct gallivm_state * gallivm = bld_base->base.gallivm; 3757 LLVMBuilderRef builder = gallivm->builder; 3758 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3759 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; 3760 3761 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY); 3762 unsigned buf = bufreg->Register.Index; 3763 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY; 3764 3765 LLVMAtomicRMWBinOp op = -1; 3766 switch (emit_data->inst->Instruction.Opcode) { 3767 case TGSI_OPCODE_ATOMUADD: 3768 op = LLVMAtomicRMWBinOpAdd; 3769 break; 3770 case TGSI_OPCODE_ATOMXCHG: 3771 op = LLVMAtomicRMWBinOpXchg; 3772 break; 3773 case TGSI_OPCODE_ATOMAND: 3774 op = LLVMAtomicRMWBinOpAnd; 3775 break; 3776 case TGSI_OPCODE_ATOMOR: 3777 op = LLVMAtomicRMWBinOpOr; 3778 break; 3779 case TGSI_OPCODE_ATOMXOR: 3780 op = LLVMAtomicRMWBinOpXor; 3781 break; 3782 case TGSI_OPCODE_ATOMUMIN: 3783 op = LLVMAtomicRMWBinOpUMin; 3784 break; 3785 case TGSI_OPCODE_ATOMUMAX: 3786 op = LLVMAtomicRMWBinOpUMax; 3787 break; 3788 case TGSI_OPCODE_ATOMIMIN: 3789 op = LLVMAtomicRMWBinOpMin; 3790 break; 3791 case TGSI_OPCODE_ATOMIMAX: 3792 op = LLVMAtomicRMWBinOpMax; 3793 break; 3794 case TGSI_OPCODE_ATOMCAS: 3795 break; 3796 default: 3797 assert(0); 3798 return; 3799 } 3800 3801 if (bufreg->Register.File == TGSI_FILE_IMAGE) { 3802 img_atomic_emit(action, bld_base, emit_data, op); 3803 } else if (0) { 3804 } else { 3805 LLVMValueRef index; /* index into the const buffer */ 3806 LLVMValueRef scalar, scalar_ptr; 3807 LLVMValueRef value; 3808 3809 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0); 3810 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0); 3811 3812 index = lp_build_shr_imm(uint_bld, index, 2); 3813 3814 if (!is_shared) { 3815 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan)); 3816 scalar_ptr = bld->ssbos[buf]; 3817 } else 3818 scalar_ptr = bld->shared_ptr; 3819 3820 LLVMValueRef atom_res = lp_build_alloca(gallivm, 3821 uint_bld->vec_type, ""); 3822 3823 LLVMValueRef ssbo_limit; 3824 if (!is_shared) { 3825 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); 3826 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); 3827 } 3828 3829 LLVMValueRef exec_mask = mask_vec(bld_base); 3830 3831 if (!is_shared) { 3832 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit); 3833 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); 3834 } 3835 3836 struct lp_build_loop_state loop_state; 3837 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); 3838 3839 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value, 3840 loop_state.counter, ""); 3841 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); 3842 3843 index = LLVMBuildExtractElement(gallivm->builder, index, 3844 loop_state.counter, ""); 3845 3846 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr, 3847 &index, 1, ""); 3848 3849 struct lp_build_if_state ifthen; 3850 LLVMValueRef cond, temp_res; 3851 3852 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); 3853 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); 3854 lp_build_if(&ifthen, gallivm, cond); 3855 3856 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 3857 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0); 3858 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src, 3859 loop_state.counter, ""); 3860 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, ""); 3861 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, 3862 cas_src_ptr, 3863 LLVMAtomicOrderingSequentiallyConsistent, 3864 LLVMAtomicOrderingSequentiallyConsistent, 3865 false); 3866 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); 3867 } else { 3868 scalar = LLVMBuildAtomicRMW(builder, op, 3869 scalar_ptr, value_ptr, 3870 LLVMAtomicOrderingSequentiallyConsistent, 3871 false); 3872 } 3873 temp_res = LLVMBuildLoad(builder, atom_res, ""); 3874 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); 3875 LLVMBuildStore(builder, temp_res, atom_res); 3876 lp_build_else(&ifthen); 3877 temp_res = LLVMBuildLoad(builder, atom_res, ""); 3878 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); 3879 LLVMBuildStore(builder, temp_res, atom_res); 3880 lp_build_endif(&ifthen); 3881 3882 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), 3883 NULL, LLVMIntUGE); 3884 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, ""); 3885 } 3886} 3887 3888static void 3889barrier_emit( 3890 const struct lp_build_tgsi_action * action, 3891 struct lp_build_tgsi_context * bld_base, 3892 struct lp_build_emit_data * emit_data) 3893{ 3894 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3895 struct gallivm_state * gallivm = bld_base->base.gallivm; 3896 3897 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume"); 3898 3899 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false); 3900 LLVMPositionBuilderAtEnd(gallivm->builder, resume); 3901} 3902 3903static void 3904membar_emit( 3905 const struct lp_build_tgsi_action * action, 3906 struct lp_build_tgsi_context * bld_base, 3907 struct lp_build_emit_data * emit_data) 3908{ 3909 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3910 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, ""); 3911} 3912 3913static void 3914increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, 3915 LLVMValueRef ptr, 3916 LLVMValueRef mask) 3917{ 3918 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3919 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3920 3921 current_vec = LLVMBuildSub(builder, current_vec, mask, ""); 3922 3923 LLVMBuildStore(builder, current_vec, ptr); 3924} 3925 3926static void 3927clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, 3928 LLVMValueRef ptr, 3929 LLVMValueRef mask) 3930{ 3931 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3932 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3933 3934 current_vec = lp_build_select(&bld_base->uint_bld, 3935 mask, 3936 bld_base->uint_bld.zero, 3937 current_vec); 3938 3939 LLVMBuildStore(builder, current_vec, ptr); 3940} 3941 3942static LLVMValueRef 3943clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, 3944 LLVMValueRef current_mask_vec, 3945 LLVMValueRef total_emitted_vertices_vec) 3946{ 3947 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3948 struct lp_build_context *int_bld = &bld->bld_base.int_bld; 3949 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, 3950 total_emitted_vertices_vec, 3951 bld->max_output_vertices_vec); 3952 3953 return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); 3954} 3955 3956static void 3957emit_vertex( 3958 const struct lp_build_tgsi_action * action, 3959 struct lp_build_tgsi_context * bld_base, 3960 struct lp_build_emit_data * emit_data) 3961{ 3962 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3963 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3964 3965 if (bld->gs_iface->emit_vertex) { 3966 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0], 3967 TGSI_TYPE_UNSIGNED, 3968 emit_data->inst->Src[0].Register.SwizzleX); 3969 LLVMValueRef mask = mask_vec(bld_base); 3970 LLVMValueRef total_emitted_vertices_vec = 3971 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3972 3973 mask = clamp_mask_to_max_output_vertices(bld, mask, 3974 total_emitted_vertices_vec); 3975 gather_outputs(bld); 3976 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base, 3977 bld->outputs, 3978 total_emitted_vertices_vec, 3979 mask, 3980 stream_id); 3981 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, 3982 mask); 3983 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, 3984 mask); 3985#if DUMP_GS_EMITS 3986 lp_build_print_value(bld->bld_base.base.gallivm, 3987 " +++ emit vertex masked ones = ", 3988 mask); 3989 lp_build_print_value(bld->bld_base.base.gallivm, 3990 " +++ emit vertex emitted = ", 3991 total_emitted_vertices_vec); 3992#endif 3993 } 3994} 3995 3996 3997static void 3998end_primitive_masked(struct lp_build_tgsi_context * bld_base, 3999 LLVMValueRef mask) 4000{ 4001 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4002 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 4003 4004 if (bld->gs_iface->end_primitive) { 4005 struct lp_build_context *uint_bld = &bld_base->uint_bld; 4006 LLVMValueRef emitted_vertices_vec = 4007 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); 4008 LLVMValueRef emitted_prims_vec = 4009 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 4010 LLVMValueRef total_emitted_vertices_vec = 4011 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 4012 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 4013 emitted_vertices_vec, 4014 uint_bld->zero); 4015 /* We need to combine the current execution mask with the mask 4016 telling us which, if any, execution slots actually have 4017 unemitted primitives, this way we make sure that end_primitives 4018 executes only on the paths that have unflushed vertices */ 4019 mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); 4020 4021 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base, 4022 total_emitted_vertices_vec, 4023 emitted_vertices_vec, 4024 emitted_prims_vec, 4025 mask_vec(bld_base), 0); 4026 4027#if DUMP_GS_EMITS 4028 lp_build_print_value(bld->bld_base.base.gallivm, 4029 " +++ end prim masked ones = ", 4030 mask); 4031 lp_build_print_value(bld->bld_base.base.gallivm, 4032 " +++ end prim emitted verts1 = ", 4033 emitted_vertices_vec); 4034 lp_build_print_value(bld->bld_base.base.gallivm, 4035 " +++ end prim emitted prims1 = ", 4036 LLVMBuildLoad(builder, 4037 bld->emitted_prims_vec_ptr, "")); 4038#endif 4039 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, 4040 mask); 4041 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, 4042 mask); 4043#if DUMP_GS_EMITS 4044 lp_build_print_value(bld->bld_base.base.gallivm, 4045 " +++ end prim emitted verts2 = ", 4046 LLVMBuildLoad(builder, 4047 bld->emitted_vertices_vec_ptr, "")); 4048#endif 4049 } 4050 4051} 4052 4053static void 4054end_primitive( 4055 const struct lp_build_tgsi_action * action, 4056 struct lp_build_tgsi_context * bld_base, 4057 struct lp_build_emit_data * emit_data) 4058{ 4059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4060 4061 if (bld->gs_iface->end_primitive) { 4062 LLVMValueRef mask = mask_vec(bld_base); 4063 end_primitive_masked(bld_base, mask); 4064 } 4065} 4066 4067static void 4068barrier_emit_tcs( 4069 const struct lp_build_tgsi_action * action, 4070 struct lp_build_tgsi_context * bld_base, 4071 struct lp_build_emit_data * emit_data) 4072{ 4073 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4074 4075 if (bld->tcs_iface->emit_barrier) { 4076 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base); 4077 } 4078} 4079 4080 4081static void 4082cal_emit( 4083 const struct lp_build_tgsi_action * action, 4084 struct lp_build_tgsi_context * bld_base, 4085 struct lp_build_emit_data * emit_data) 4086{ 4087 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4088 4089 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 4090 &bld_base->pc); 4091} 4092 4093static void 4094ret_emit( 4095 const struct lp_build_tgsi_action * action, 4096 struct lp_build_tgsi_context * bld_base, 4097 struct lp_build_emit_data * emit_data) 4098{ 4099 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4100 4101 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 4102} 4103 4104static void 4105brk_emit( 4106 const struct lp_build_tgsi_action * action, 4107 struct lp_build_tgsi_context * bld_base, 4108 struct lp_build_emit_data * emit_data) 4109{ 4110 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4111 4112 lp_exec_tgsi_break(&bld->exec_mask, bld_base); 4113} 4114 4115static void 4116if_emit( 4117 const struct lp_build_tgsi_action * action, 4118 struct lp_build_tgsi_context * bld_base, 4119 struct lp_build_emit_data * emit_data) 4120{ 4121 LLVMValueRef tmp; 4122 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4123 4124 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 4125 emit_data->args[0], bld->bld_base.base.zero); 4126 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 4127} 4128 4129static void 4130uif_emit( 4131 const struct lp_build_tgsi_action * action, 4132 struct lp_build_tgsi_context * bld_base, 4133 struct lp_build_emit_data * emit_data) 4134{ 4135 LLVMValueRef tmp; 4136 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4137 struct lp_build_context *uint_bld = &bld_base->uint_bld; 4138 4139 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 4140 emit_data->args[0], uint_bld->zero); 4141 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 4142} 4143 4144static void 4145case_emit( 4146 const struct lp_build_tgsi_action * action, 4147 struct lp_build_tgsi_context * bld_base, 4148 struct lp_build_emit_data * emit_data) 4149{ 4150 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4151 4152 lp_exec_case(&bld->exec_mask, emit_data->args[0]); 4153} 4154 4155static void 4156default_emit( 4157 const struct lp_build_tgsi_action * action, 4158 struct lp_build_tgsi_context * bld_base, 4159 struct lp_build_emit_data * emit_data) 4160{ 4161 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4162 4163 lp_exec_default(&bld->exec_mask, bld_base); 4164} 4165 4166static void 4167switch_emit( 4168 const struct lp_build_tgsi_action * action, 4169 struct lp_build_tgsi_context * bld_base, 4170 struct lp_build_emit_data * emit_data) 4171{ 4172 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4173 4174 lp_exec_switch(&bld->exec_mask, emit_data->args[0]); 4175} 4176 4177static void 4178endswitch_emit( 4179 const struct lp_build_tgsi_action * action, 4180 struct lp_build_tgsi_context * bld_base, 4181 struct lp_build_emit_data * emit_data) 4182{ 4183 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4184 4185 lp_exec_endswitch(&bld->exec_mask, bld_base); 4186} 4187 4188static void 4189bgnloop_emit( 4190 const struct lp_build_tgsi_action * action, 4191 struct lp_build_tgsi_context * bld_base, 4192 struct lp_build_emit_data * emit_data) 4193{ 4194 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4195 4196 lp_exec_bgnloop(&bld->exec_mask, true); 4197} 4198 4199static void 4200bgnsub_emit( 4201 const struct lp_build_tgsi_action * action, 4202 struct lp_build_tgsi_context * bld_base, 4203 struct lp_build_emit_data * emit_data) 4204{ 4205 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4206 4207 lp_exec_mask_bgnsub(&bld->exec_mask); 4208} 4209 4210static void 4211else_emit( 4212 const struct lp_build_tgsi_action * action, 4213 struct lp_build_tgsi_context * bld_base, 4214 struct lp_build_emit_data * emit_data) 4215{ 4216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4217 4218 lp_exec_mask_cond_invert(&bld->exec_mask); 4219} 4220 4221static void 4222endif_emit( 4223 const struct lp_build_tgsi_action * action, 4224 struct lp_build_tgsi_context * bld_base, 4225 struct lp_build_emit_data * emit_data) 4226{ 4227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4228 4229 lp_exec_mask_cond_pop(&bld->exec_mask); 4230} 4231 4232static void 4233endloop_emit( 4234 const struct lp_build_tgsi_action * action, 4235 struct lp_build_tgsi_context * bld_base, 4236 struct lp_build_emit_data * emit_data) 4237{ 4238 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4239 4240 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 4241} 4242 4243static void 4244endsub_emit( 4245 const struct lp_build_tgsi_action * action, 4246 struct lp_build_tgsi_context * bld_base, 4247 struct lp_build_emit_data * emit_data) 4248{ 4249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4250 4251 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 4252} 4253 4254static void 4255cont_emit( 4256 const struct lp_build_tgsi_action * action, 4257 struct lp_build_tgsi_context * bld_base, 4258 struct lp_build_emit_data * emit_data) 4259{ 4260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4261 4262 lp_exec_continue(&bld->exec_mask); 4263} 4264 4265static void emit_prologue(struct lp_build_tgsi_context * bld_base) 4266{ 4267 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4268 struct gallivm_state * gallivm = bld_base->base.gallivm; 4269 4270 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 4271 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4; 4272 bld->temps_array = lp_build_alloca_undef(gallivm, 4273 LLVMArrayType(bld_base->base.vec_type, array_size), 4274 "temp_array"); 4275 } 4276 4277 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 4278 LLVMValueRef array_size = 4279 lp_build_const_int32(gallivm, 4280 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 4281 bld->outputs_array = lp_build_array_alloca(gallivm, 4282 bld_base->base.vec_type, array_size, 4283 "output_array"); 4284 } 4285 4286 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 4287 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4; 4288 bld->imms_array = lp_build_alloca_undef(gallivm, 4289 LLVMArrayType(bld_base->base.vec_type, array_size), 4290 "imms_array"); 4291 } 4292 4293 /* If we have indirect addressing in inputs we need to copy them into 4294 * our alloca array to be able to iterate over them */ 4295 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && 4296 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) { 4297 unsigned index, chan; 4298 LLVMTypeRef vec_type = bld_base->base.vec_type; 4299 LLVMValueRef array_size = lp_build_const_int32(gallivm, 4300 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 4301 bld->inputs_array = lp_build_array_alloca(gallivm, 4302 vec_type, array_size, 4303 "input_array"); 4304 4305 assert(bld_base->info->num_inputs 4306 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 4307 4308 for (index = 0; index < bld_base->info->num_inputs; ++index) { 4309 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 4310 LLVMValueRef lindex = 4311 lp_build_const_int32(gallivm, index * 4 + chan); 4312 LLVMValueRef input_ptr = 4313 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 4314 &lindex, 1, ""); 4315 LLVMValueRef value = bld->inputs[index][chan]; 4316 if (value) 4317 LLVMBuildStore(gallivm->builder, value, input_ptr); 4318 } 4319 } 4320 } 4321 4322 if (bld->gs_iface) { 4323 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 4324 bld->emitted_prims_vec_ptr = 4325 lp_build_alloca(gallivm, 4326 uint_bld->vec_type, 4327 "emitted_prims_ptr"); 4328 bld->emitted_vertices_vec_ptr = 4329 lp_build_alloca(gallivm, 4330 uint_bld->vec_type, 4331 "emitted_vertices_ptr"); 4332 bld->total_emitted_vertices_vec_ptr = 4333 lp_build_alloca(gallivm, 4334 uint_bld->vec_type, 4335 "total_emitted_vertices_ptr"); 4336 4337 LLVMBuildStore(gallivm->builder, uint_bld->zero, 4338 bld->emitted_prims_vec_ptr); 4339 LLVMBuildStore(gallivm->builder, uint_bld->zero, 4340 bld->emitted_vertices_vec_ptr); 4341 LLVMBuildStore(gallivm->builder, uint_bld->zero, 4342 bld->total_emitted_vertices_vec_ptr); 4343 } 4344 4345 if (DEBUG_EXECUTION) { 4346 lp_build_printf(gallivm, "\n"); 4347 emit_dump_file(bld, TGSI_FILE_CONSTANT); 4348 if (!bld->gs_iface) 4349 emit_dump_file(bld, TGSI_FILE_INPUT); 4350 } 4351} 4352 4353static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base) 4354{ 4355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4356 4357 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) { 4358 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base); 4359 } 4360} 4361 4362static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 4363{ 4364 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 4365 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 4366 4367 if (DEBUG_EXECUTION) { 4368 /* for debugging */ 4369 if (0) { 4370 emit_dump_file(bld, TGSI_FILE_TEMPORARY); 4371 } 4372 emit_dump_file(bld, TGSI_FILE_OUTPUT); 4373 lp_build_printf(bld_base->base.gallivm, "\n"); 4374 } 4375 4376 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) { 4377 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base); 4378 } 4379 4380 /* If we have indirect addressing in outputs we need to copy our alloca array 4381 * to the outputs slots specified by the caller */ 4382 if (bld->gs_iface) { 4383 LLVMValueRef total_emitted_vertices_vec; 4384 LLVMValueRef emitted_prims_vec; 4385 /* implicit end_primitives, needed in case there are any unflushed 4386 vertices in the cache. Note must not call end_primitive here 4387 since the exec_mask is not valid at this point. */ 4388 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); 4389 4390 total_emitted_vertices_vec = 4391 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 4392 emitted_prims_vec = 4393 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 4394 4395 bld->gs_iface->gs_epilogue(bld->gs_iface, 4396 total_emitted_vertices_vec, 4397 emitted_prims_vec, 0); 4398 } else { 4399 gather_outputs(bld); 4400 } 4401} 4402 4403void 4404lp_build_tgsi_soa(struct gallivm_state *gallivm, 4405 const struct tgsi_token *tokens, 4406 const struct lp_build_tgsi_params *params, 4407 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]) 4408{ 4409 struct lp_build_tgsi_soa_context bld; 4410 struct lp_type type = params->type; 4411 struct lp_type res_type; 4412 4413 assert(type.length <= LP_MAX_VECTOR_LENGTH); 4414 memset(&res_type, 0, sizeof res_type); 4415 res_type.width = type.width; 4416 res_type.length = type.length; 4417 res_type.sign = 1; 4418 4419 /* Setup build context */ 4420 memset(&bld, 0, sizeof bld); 4421 lp_build_context_init(&bld.bld_base.base, gallivm, type); 4422 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 4423 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 4424 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 4425 { 4426 struct lp_type dbl_type; 4427 dbl_type = type; 4428 dbl_type.width *= 2; 4429 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); 4430 } 4431 { 4432 struct lp_type uint64_type; 4433 uint64_type = lp_uint_type(type); 4434 uint64_type.width *= 2; 4435 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); 4436 } 4437 { 4438 struct lp_type int64_type; 4439 int64_type = lp_int_type(type); 4440 int64_type.width *= 2; 4441 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); 4442 } 4443 bld.mask = params->mask; 4444 bld.inputs = params->inputs; 4445 bld.outputs = outputs; 4446 bld.consts_ptr = params->consts_ptr; 4447 bld.const_sizes_ptr = params->const_sizes_ptr; 4448 bld.ssbo_ptr = params->ssbo_ptr; 4449 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr; 4450 bld.sampler = params->sampler; 4451 bld.bld_base.info = params->info; 4452 bld.indirect_files = params->info->indirect_files; 4453 bld.context_ptr = params->context_ptr; 4454 bld.thread_data_ptr = params->thread_data_ptr; 4455 bld.image = params->image; 4456 bld.shared_ptr = params->shared_ptr; 4457 bld.coro = params->coro; 4458 4459 /* 4460 * If the number of temporaries is rather large then we just 4461 * allocate them as an array right from the start and treat 4462 * like indirect temporaries. 4463 */ 4464 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { 4465 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); 4466 } 4467 /* 4468 * For performance reason immediates are always backed in a static 4469 * array, but if their number is too great, we have to use just 4470 * a dynamically allocated array. 4471 */ 4472 bld.use_immediates_array = 4473 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); 4474 if (bld.use_immediates_array) { 4475 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); 4476 } 4477 4478 4479 bld.bld_base.soa = TRUE; 4480 bld.bld_base.emit_debug = emit_debug; 4481 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 4482 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 4483 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 4484 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 4485 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 4486 4487 bld.bld_base.emit_store = emit_store; 4488 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output; 4489 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp; 4490 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address; 4491 4492 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 4493 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 4494 4495 bld.bld_base.emit_prologue = emit_prologue; 4496 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl; 4497 bld.bld_base.emit_epilogue = emit_epilogue; 4498 4499 /* Set opcode actions */ 4500 lp_set_default_actions_cpu(&bld.bld_base); 4501 4502 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 4503 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 4504 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 4505 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 4506 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; 4507 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 4508 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 4509 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 4510 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; 4511 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 4512 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 4513 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 4514 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 4515 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; 4516 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 4517 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 4518 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; 4519 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; 4520 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 4521 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; 4522 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 4523 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 4524 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 4525 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 4526 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit; 4527 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 4528 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 4529 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; 4530 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit; 4531 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; 4532 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; 4533 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; 4534 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; 4535 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit; 4536 /* DX10 sampling ops */ 4537 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; 4538 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; 4539 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; 4540 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; 4541 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; 4542 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; 4543 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; 4544 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; 4545 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; 4546 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; 4547 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; 4548 4549 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit; 4550 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit; 4551 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; 4552 4553 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit; 4554 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit; 4555 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit; 4556 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit; 4557 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit; 4558 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit; 4559 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit; 4560 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit; 4561 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit; 4562 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit; 4563 4564 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; 4565 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit; 4566 4567 if (params->gs_iface) { 4568 /* There's no specific value for this because it should always 4569 * be set, but apps using ext_geometry_shader4 quite often 4570 * were forgetting so we're using MAX_VERTEX_VARYING from 4571 * that spec even though we could debug_assert if it's not 4572 * set, but that's a lot uglier. */ 4573 uint max_output_vertices; 4574 4575 /* inputs are always indirect with gs */ 4576 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 4577 bld.gs_iface = params->gs_iface; 4578 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; 4579 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; 4580 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; 4581 4582 max_output_vertices = 4583 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 4584 if (!max_output_vertices) 4585 max_output_vertices = 32; 4586 4587 bld.max_output_vertices_vec = 4588 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, 4589 max_output_vertices); 4590 } 4591 4592 if (params->tes_iface) { 4593 /* inputs are always indirect with tes */ 4594 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 4595 bld.tes_iface = params->tes_iface; 4596 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input; 4597 } 4598 4599 if (params->tcs_iface) { 4600 bld.tcs_iface = params->tcs_iface; 4601 /* outputs and inputs are always indirect with tcs */ 4602 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT); 4603 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output; 4604 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 4605 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input; 4606 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input; 4607 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs; 4608 } 4609 4610 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); 4611 4612 bld.system_values = *params->system_values; 4613 4614 lp_build_tgsi_llvm(&bld.bld_base, tokens); 4615 4616 if (0) { 4617 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 4618 LLVMValueRef function = LLVMGetBasicBlockParent(block); 4619 debug_printf("11111111111111111111111111111 \n"); 4620 tgsi_dump(tokens, 0); 4621 lp_debug_dump_value(function); 4622 debug_printf("2222222222222222222222222222 \n"); 4623 } 4624 4625 if (0) { 4626 LLVMModuleRef module = LLVMGetGlobalParent( 4627 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 4628 LLVMDumpModule(module); 4629 4630 } 4631 lp_exec_mask_fini(&bld.exec_mask); 4632} 4633