swr_shader.cpp revision 7ec681f3
1/**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24#include <llvm/Config/llvm-config.h> 25 26#if LLVM_VERSION_MAJOR < 7 27// llvm redefines DEBUG 28#pragma push_macro("DEBUG") 29#undef DEBUG 30#endif 31 32#include "JitManager.h" 33#include "llvm-c/Core.h" 34#include "llvm/Support/CBindingWrapping.h" 35#include "llvm/IR/LegacyPassManager.h" 36 37#if LLVM_VERSION_MAJOR < 7 38#pragma pop_macro("DEBUG") 39#endif 40 41#include "state.h" 42#include "gen_state_llvm.h" 43#include "builder.h" 44#include "functionpasses/passes.h" 45 46#include "tgsi/tgsi_strings.h" 47#include "util/format/u_format.h" 48#include "util/u_prim.h" 49#include "gallivm/lp_bld_init.h" 50#include "gallivm/lp_bld_flow.h" 51#include "gallivm/lp_bld_struct.h" 52#include "gallivm/lp_bld_tgsi.h" 53#include "gallivm/lp_bld_const.h" 54#include "gallivm/lp_bld_printf.h" 55#include "gallivm/lp_bld_logic.h" 56 57#include "swr_context.h" 58#include "gen_surf_state_llvm.h" 59#include "gen_swr_context_llvm.h" 60#include "swr_resource.h" 61#include "swr_state.h" 62#include "swr_screen.h" 63 64 65///////////////////////////////////////////////////////////////////////// 66 67#include <stdio.h> 68#include <inttypes.h> 69 70#include "util/u_debug.h" 71#include "util/u_memory.h" 72#include "util/u_string.h" 73 74#include "gallivm/lp_bld_type.h" 75 76#if defined(DEBUG) && defined(SWR_VERBOSE_SHADER) 77constexpr bool verbose_shader = true; 78constexpr bool verbose_tcs_shader_in = true; 79constexpr bool verbose_tcs_shader_out = true; 80constexpr bool verbose_tcs_shader_loop = true; 81constexpr bool verbose_vs_shader = true; 82#else 83constexpr bool verbose_shader = false; 84constexpr bool verbose_tcs_shader_in = false; 85constexpr bool verbose_tcs_shader_out = false; 86constexpr bool verbose_tcs_shader_loop = false; 87constexpr bool verbose_vs_shader = false; 88#endif 89 90using namespace SwrJit; 91 92static unsigned 93locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); 94 95bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) 96{ 97 return !memcmp(&lhs, &rhs, sizeof(lhs)); 98} 99 100bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) 101{ 102 return !memcmp(&lhs, &rhs, sizeof(lhs)); 103} 104 105bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs) 106{ 107 return !memcmp(&lhs, &rhs, sizeof(lhs)); 108} 109 110bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs) 111{ 112 return !memcmp(&lhs, &rhs, sizeof(lhs)); 113} 114 115bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs) 116{ 117 return !memcmp(&lhs, &rhs, sizeof(lhs)); 118} 119 120bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs) 121{ 122 return !memcmp(&lhs, &rhs, sizeof(lhs)); 123} 124 125 126static void 127swr_generate_sampler_key(const struct lp_tgsi_info &info, 128 struct swr_context *ctx, 129 enum pipe_shader_type shader_type, 130 struct swr_jit_sampler_key &key) 131{ 132 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; 133 134 for (unsigned i = 0; i < key.nr_samplers; i++) { 135 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 136 lp_sampler_static_sampler_state( 137 &key.sampler[i].sampler_state, 138 ctx->samplers[shader_type][i]); 139 } 140 } 141 142 /* 143 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 144 * are dx10-style? Can't really have mixed opcodes, at least not 145 * if we want to skip the holes here (without rescanning tgsi). 146 */ 147 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 148 key.nr_sampler_views = 149 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 150 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 151 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { 152 const struct pipe_sampler_view *view = 153 ctx->sampler_views[shader_type][i]; 154 lp_sampler_static_texture_state( 155 &key.sampler[i].texture_state, view); 156 if (view) { 157 struct swr_resource *swr_res = swr_resource(view->texture); 158 const struct util_format_description *desc = 159 util_format_description(view->format); 160 if (swr_res->has_depth && swr_res->has_stencil && 161 !util_format_has_depth(desc)) 162 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 163 } 164 } 165 } 166 } else { 167 key.nr_sampler_views = key.nr_samplers; 168 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 169 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 170 const struct pipe_sampler_view *view = 171 ctx->sampler_views[shader_type][i]; 172 lp_sampler_static_texture_state( 173 &key.sampler[i].texture_state, view); 174 if (view) { 175 struct swr_resource *swr_res = swr_resource(view->texture); 176 const struct util_format_description *desc = 177 util_format_description(view->format); 178 if (swr_res->has_depth && swr_res->has_stencil && 179 !util_format_has_depth(desc)) 180 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 181 } 182 } 183 } 184 } 185} 186 187void 188swr_generate_fs_key(struct swr_jit_fs_key &key, 189 struct swr_context *ctx, 190 swr_fragment_shader *swr_fs) 191{ 192 memset((void*)&key, 0, sizeof(key)); 193 194 key.nr_cbufs = ctx->framebuffer.nr_cbufs; 195 key.light_twoside = ctx->rasterizer->light_twoside; 196 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; 197 198 struct tgsi_shader_info *pPrevShader; 199 if (ctx->gs) 200 pPrevShader = &ctx->gs->info.base; 201 else if (ctx->tes) 202 pPrevShader = &ctx->tes->info.base; 203 else 204 pPrevShader = &ctx->vs->info.base; 205 206 memcpy(&key.vs_output_semantic_name, 207 &pPrevShader->output_semantic_name, 208 sizeof(key.vs_output_semantic_name)); 209 memcpy(&key.vs_output_semantic_idx, 210 &pPrevShader->output_semantic_index, 211 sizeof(key.vs_output_semantic_idx)); 212 213 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); 214 215 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable && 216 ctx->poly_stipple.prim_is_poly; 217} 218 219void 220swr_generate_vs_key(struct swr_jit_vs_key &key, 221 struct swr_context *ctx, 222 swr_vertex_shader *swr_vs) 223{ 224 memset((void*)&key, 0, sizeof(key)); 225 226 key.clip_plane_mask = 227 swr_vs->info.base.clipdist_writemask ? 228 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 229 ctx->rasterizer->clip_plane_enable; 230 231 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); 232} 233 234void 235swr_generate_fetch_key(struct swr_jit_fetch_key &key, 236 struct swr_vertex_element_state *velems) 237{ 238 memset((void*)&key, 0, sizeof(key)); 239 240 key.fsState = velems->fsState; 241} 242 243void 244swr_generate_gs_key(struct swr_jit_gs_key &key, 245 struct swr_context *ctx, 246 swr_geometry_shader *swr_gs) 247{ 248 memset((void*)&key, 0, sizeof(key)); 249 250 struct tgsi_shader_info *pPrevShader = nullptr; 251 252 if (ctx->tes) { 253 pPrevShader = &ctx->tes->info.base; 254 } else { 255 pPrevShader = &ctx->vs->info.base; 256 } 257 258 memcpy(&key.vs_output_semantic_name, 259 &pPrevShader->output_semantic_name, 260 sizeof(key.vs_output_semantic_name)); 261 memcpy(&key.vs_output_semantic_idx, 262 &pPrevShader->output_semantic_index, 263 sizeof(key.vs_output_semantic_idx)); 264 265 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key); 266} 267 268void 269swr_generate_tcs_key(struct swr_jit_tcs_key &key, 270 struct swr_context *ctx, 271 swr_tess_control_shader *swr_tcs) 272{ 273 memset((void*)&key, 0, sizeof(key)); 274 275 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base; 276 277 memcpy(&key.vs_output_semantic_name, 278 &pPrevShader->output_semantic_name, 279 sizeof(key.vs_output_semantic_name)); 280 memcpy(&key.vs_output_semantic_idx, 281 &pPrevShader->output_semantic_index, 282 sizeof(key.vs_output_semantic_idx)); 283 284 key.clip_plane_mask = 285 swr_tcs->info.base.clipdist_writemask ? 286 swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 287 ctx->rasterizer->clip_plane_enable; 288 289 swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key); 290} 291 292void 293swr_generate_tes_key(struct swr_jit_tes_key &key, 294 struct swr_context *ctx, 295 swr_tess_evaluation_shader *swr_tes) 296{ 297 memset((void*)&key, 0, sizeof(key)); 298 299 struct tgsi_shader_info *pPrevShader = nullptr; 300 301 if (ctx->tcs) { 302 pPrevShader = &ctx->tcs->info.base; 303 } 304 else { 305 pPrevShader = &ctx->vs->info.base; 306 } 307 308 SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined"); 309 310 memcpy(&key.prev_output_semantic_name, 311 &pPrevShader->output_semantic_name, 312 sizeof(key.prev_output_semantic_name)); 313 memcpy(&key.prev_output_semantic_idx, 314 &pPrevShader->output_semantic_index, 315 sizeof(key.prev_output_semantic_idx)); 316 317 key.clip_plane_mask = 318 swr_tes->info.base.clipdist_writemask ? 319 swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 320 ctx->rasterizer->clip_plane_enable; 321 322 swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key); 323} 324 325struct BuilderSWR : public Builder { 326 BuilderSWR(JitManager *pJitMgr, const char *pName) 327 : Builder(pJitMgr) 328 { 329 pJitMgr->SetupNewModule(); 330 gallivm = gallivm_create(pName, wrap(&JM()->mContext), NULL); 331 pJitMgr->mpCurrentModule = unwrap(gallivm->module); 332 } 333 334 ~BuilderSWR() { 335 gallivm_free_ir(gallivm); 336 } 337 338 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, 339 unsigned slot, unsigned channel); 340 341 struct gallivm_state *gallivm; 342 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); 343 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); 344 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key); 345 PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key); 346 PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key); 347 348 // GS-specific emit functions 349 LLVMValueRef 350 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 351 struct lp_build_context * bld, 352 boolean is_vindex_indirect, 353 LLVMValueRef vertex_index, 354 boolean is_aindex_indirect, 355 LLVMValueRef attrib_index, 356 LLVMValueRef swizzle_index); 357 void 358 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 359 struct lp_build_context * bld, 360 LLVMValueRef (*outputs)[4], 361 LLVMValueRef emitted_vertices_vec, 362 LLVMValueRef stream_id); 363 364 void 365 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 366 struct lp_build_context * bld, 367 LLVMValueRef total_emitted_vertices_vec_ptr, 368 LLVMValueRef verts_per_prim_vec, 369 LLVMValueRef emitted_prims_vec, 370 LLVMValueRef mask_vec); 371 372 void 373 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 374 LLVMValueRef total_emitted_vertices_vec, 375 LLVMValueRef emitted_prims_vec, unsigned stream); 376 377 // TCS-specific emit functions 378 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld); 379 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld); 380 381 LLVMValueRef 382 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 383 struct lp_build_tgsi_context * bld_base, 384 boolean is_vindex_indirect, 385 LLVMValueRef vertex_index, 386 boolean is_aindex_indirect, 387 LLVMValueRef attrib_index, 388 LLVMValueRef swizzle_index); 389 390 LLVMValueRef 391 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 392 struct lp_build_tgsi_context * bld_base, 393 boolean is_vindex_indirect, 394 LLVMValueRef vertex_index, 395 boolean is_aindex_indirect, 396 LLVMValueRef attrib_index, 397 LLVMValueRef swizzle_index, 398 uint32_t name); 399 400 void 401 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 402 struct lp_build_tgsi_context * bld_base, 403 unsigned name, 404 boolean is_vindex_indirect, 405 LLVMValueRef vertex_index, 406 boolean is_aindex_indirect, 407 LLVMValueRef attrib_index, 408 LLVMValueRef swizzle_index, 409 LLVMValueRef value, 410 LLVMValueRef mask_vec); 411 412 // Barrier implementation (available only in TCS) 413 void 414 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface, 415 struct lp_build_tgsi_context *bld_base); 416 417 // TES-specific emit functions 418 LLVMValueRef 419 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 420 struct lp_build_tgsi_context * bld_base, 421 boolean is_vindex_indirect, 422 LLVMValueRef vertex_index, 423 boolean is_aindex_indirect, 424 LLVMValueRef attrib_index, 425 LLVMValueRef swizzle_index); 426 427 LLVMValueRef 428 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 429 struct lp_build_tgsi_context * bld_base, 430 boolean is_aindex_indirect, 431 LLVMValueRef attrib_index, 432 LLVMValueRef swizzle_index); 433}; 434 435struct swr_gs_llvm_iface { 436 struct lp_build_gs_iface base; 437 struct tgsi_shader_info *info; 438 439 BuilderSWR *pBuilder; 440 441 Value *pGsCtx; 442 SWR_GS_STATE *pGsState; 443 uint32_t num_outputs; 444 uint32_t num_verts_per_prim; 445 446 Value *pVtxAttribMap; 447}; 448 449struct swr_tcs_llvm_iface { 450 struct lp_build_tcs_iface base; 451 struct tgsi_shader_info *info; 452 453 BuilderSWR *pBuilder; 454 455 Value *pTcsCtx; 456 SWR_TS_STATE *pTsState; 457 458 uint32_t output_vertices; 459 460 LLVMValueRef loop_var; 461 462 Value *pVtxAttribMap; 463 Value *pVtxOutputAttribMap; 464 Value *pPatchOutputAttribMap; 465}; 466 467struct swr_tes_llvm_iface { 468 struct lp_build_tes_iface base; 469 struct tgsi_shader_info *info; 470 471 BuilderSWR *pBuilder; 472 473 Value *pTesCtx; 474 SWR_TS_STATE *pTsState; 475 476 uint32_t num_outputs; 477 478 Value *pVtxAttribMap; 479 Value *pPatchAttribMap; 480}; 481 482// trampoline functions so we can use the builder llvm construction methods 483static LLVMValueRef 484swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 485 struct lp_build_context * bld, 486 boolean is_vindex_indirect, 487 LLVMValueRef vertex_index, 488 boolean is_aindex_indirect, 489 LLVMValueRef attrib_index, 490 LLVMValueRef swizzle_index) 491{ 492 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 493 494 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld, 495 is_vindex_indirect, 496 vertex_index, 497 is_aindex_indirect, 498 attrib_index, 499 swizzle_index); 500} 501 502static void 503swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 504 struct lp_build_context * bld, 505 LLVMValueRef (*outputs)[4], 506 LLVMValueRef emitted_vertices_vec, 507 LLVMValueRef mask_vec, 508 LLVMValueRef stream_id) 509{ 510 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 511 512 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld, 513 outputs, 514 emitted_vertices_vec, 515 stream_id); 516} 517 518static void 519swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 520 struct lp_build_context * bld, 521 LLVMValueRef total_emitted_vertices_vec_ptr, 522 LLVMValueRef verts_per_prim_vec, 523 LLVMValueRef emitted_prims_vec, 524 LLVMValueRef mask_vec, unsigned stream_id) 525{ 526 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 527 528 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld, 529 total_emitted_vertices_vec_ptr, 530 verts_per_prim_vec, 531 emitted_prims_vec, 532 mask_vec); 533} 534 535static void 536swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 537 LLVMValueRef total_emitted_vertices_vec, 538 LLVMValueRef emitted_prims_vec, unsigned stream) 539{ 540 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 541 542 iface->pBuilder->swr_gs_llvm_epilogue(gs_base, 543 total_emitted_vertices_vec, 544 emitted_prims_vec, stream); 545} 546 547static LLVMValueRef 548swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 549 struct lp_build_context * bld, 550 boolean is_vindex_indirect, 551 LLVMValueRef vertex_index, 552 boolean is_aindex_indirect, 553 LLVMValueRef attrib_index, 554 boolean is_sindex_indirect, 555 LLVMValueRef swizzle_index) 556{ 557 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 558 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 559 560 return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base, 561 is_vindex_indirect, 562 vertex_index, 563 is_aindex_indirect, 564 attrib_index, 565 swizzle_index); 566} 567 568static LLVMValueRef 569swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 570 struct lp_build_context * bld, 571 boolean is_vindex_indirect, 572 LLVMValueRef vertex_index, 573 boolean is_aindex_indirect, 574 LLVMValueRef attrib_index, 575 boolean is_sindex_indirect, 576 LLVMValueRef swizzle_index, 577 uint32_t name) 578{ 579 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 580 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 581 582 return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base, 583 is_vindex_indirect, 584 vertex_index, 585 is_aindex_indirect, 586 attrib_index, 587 swizzle_index, 588 name); 589} 590 591 592static void 593swr_tcs_llvm_emit_prologue(struct lp_build_context* bld) 594{ 595 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 596 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 597 iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base); 598} 599 600static void 601swr_tcs_llvm_emit_epilogue(struct lp_build_context* bld) 602{ 603 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 604 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 605 iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base); 606} 607 608static 609void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 610 struct lp_build_context * bld, 611 unsigned name, 612 boolean is_vindex_indirect, 613 LLVMValueRef vertex_index, 614 boolean is_aindex_indirect, 615 LLVMValueRef attrib_index, 616 boolean is_sindex_indirect, 617 LLVMValueRef swizzle_index, 618 LLVMValueRef value, 619 LLVMValueRef mask_vec) 620{ 621 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 622 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 623 624 iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface, 625 bld_base, 626 name, 627 is_vindex_indirect, 628 vertex_index, 629 is_aindex_indirect, 630 attrib_index, 631 swizzle_index, 632 value, 633 mask_vec); 634} 635 636 637static 638void swr_tcs_llvm_emit_barrier(struct lp_build_context *bld) 639{ 640 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 641 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 642 643 iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base); 644} 645 646 647static LLVMValueRef 648swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 649 struct lp_build_context * bld, 650 boolean is_vindex_indirect, 651 LLVMValueRef vertex_index, 652 boolean is_aindex_indirect, 653 LLVMValueRef attrib_index, 654 boolean is_sindex_indirect, 655 LLVMValueRef swizzle_index) 656{ 657 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 658 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 659 660 return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base, 661 is_vindex_indirect, 662 vertex_index, 663 is_aindex_indirect, 664 attrib_index, 665 swizzle_index); 666} 667 668static LLVMValueRef 669swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 670 struct lp_build_context * bld, 671 boolean is_aindex_indirect, 672 LLVMValueRef attrib_index, 673 LLVMValueRef swizzle_index) 674{ 675 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 676 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 677 678 return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base, 679 is_aindex_indirect, 680 attrib_index, 681 swizzle_index); 682} 683 684LLVMValueRef 685BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 686 struct lp_build_context * bld, 687 boolean is_vindex_indirect, 688 LLVMValueRef vertex_index, 689 boolean is_aindex_indirect, 690 LLVMValueRef attrib_index, 691 LLVMValueRef swizzle_index) 692{ 693 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 694 Value *vert_index = unwrap(vertex_index); 695 Value *attr_index = unwrap(attrib_index); 696 697 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 698 699 if (is_vindex_indirect || is_aindex_indirect) { 700 int i; 701 Value *res = unwrap(bld->zero); 702 struct lp_type type = bld->type; 703 704 for (i = 0; i < type.length; i++) { 705 Value *vert_chan_index = vert_index; 706 Value *attr_chan_index = attr_index; 707 708 if (is_vindex_indirect) { 709 vert_chan_index = VEXTRACT(vert_index, C(i)); 710 } 711 if (is_aindex_indirect) { 712 attr_chan_index = VEXTRACT(attr_index, C(i)); 713 } 714 715 Value *attrib = 716 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 717 718 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 719 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 720 721 Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib); 722 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 723 724 Value *value = VEXTRACT(pInput, C(i)); 725 res = VINSERT(res, value, C(i)); 726 } 727 728 return wrap(res); 729 } else { 730 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 731 732 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 733 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 734 735 Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib); 736 737 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 738 739 return wrap(pInput); 740 } 741} 742 743// GS output stream layout 744#define VERTEX_COUNT_SIZE 32 745#define CONTROL_HEADER_SIZE (8*32) 746 747void 748BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 749 struct lp_build_context * bld, 750 LLVMValueRef (*outputs)[4], 751 LLVMValueRef emitted_vertices_vec, 752 LLVMValueRef stream_id) 753{ 754 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 755 756 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 757 const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE; 758 const uint32_t attribSize = 4 * sizeof(float); 759 const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS; 760 Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize)); 761 762 Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask}); 763 Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, mVWidth)); 764 765 Value *pStack = STACKSAVE(); 766 Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking 767 768 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) { 769 uint32_t attribSlot = attrib; 770 uint32_t sgvChannel = 0; 771 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 772 attribSlot = VERTEX_SGV_SLOT; 773 sgvChannel = VERTEX_SGV_POINT_SIZE_COMP; 774 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) { 775 attribSlot = VERTEX_SGV_SLOT; 776 sgvChannel = VERTEX_SGV_RTAI_COMP; 777 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) { 778 attribSlot = VERTEX_SGV_SLOT; 779 sgvChannel = VERTEX_SGV_VAI_COMP; 780 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 781 attribSlot = VERTEX_POSITION_SLOT; 782 } else { 783 attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; 784 if (iface->info->writes_position) { 785 attribSlot--; 786 } 787 } 788 789 Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ? 790 791 for (uint32_t lane = 0; lane < mVWidth; ++lane) { 792 Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane)); 793 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 794 Value *pStreamOffset = GEP(pStream, pLaneOffset); 795 pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy); 796 797 Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 798 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 799 800 for (uint32_t channel = 0; channel < 4; ++channel) { 801 Value *vData; 802 803 if (attribSlot == VERTEX_SGV_SLOT) 804 vData = LOAD(unwrap(outputs[attrib][0])); 805 else 806 vData = LOAD(unwrap(outputs[attrib][channel])); 807 808 if (attribSlot != VERTEX_SGV_SLOT || 809 sgvChannel == channel) { 810 vData = VEXTRACT(vData, C(lane)); 811 STORE(vData, pStreamOffset); 812 } 813 pStreamOffset = GEP(pStreamOffset, C(1)); 814 } 815 } 816 } 817 818 /* When the output type is not points, the geometry shader may not 819 * output data to multiple streams. So early exit here. 820 */ 821 if(iface->pGsState->outputTopology != TOP_POINT_LIST) { 822 STACKRESTORE(pStack); 823 return; 824 } 825 826 // Info about stream id for each vertex 827 // is coded in 2 bits (4 vert per byte "box"): 828 // ----------------- ----------------- ---- 829 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |... 830 // ----------------- ----------------- ---- 831 832 // Calculate where need to put stream id for current vert 833 // in 1 byte "box". 834 Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2)); 835 836 // Calculate in which box put stream id for current vert. 837 Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2)); 838 839 // Skip count header 840 Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE)); 841 842 for (uint32_t lane = 0; lane < mVWidth; ++lane) { 843 Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty); 844 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 845 846 Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane))); 847 848 // Just make sure that not overflow max - stream id = (0,1,2,3) 849 Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty); 850 851 // Shift it to correct position in byte "box" 852 vVal = SHL(vVal, pShift); 853 854 // Info about other vertices can be already stored 855 // so we need to read and add bits from current vert info. 856 Value *storedValue = LOAD(pStreamOffset); 857 vVal = OR(storedValue, vVal); 858 STORE(vVal, pStreamOffset); 859 } 860 861 STACKRESTORE(pStack); 862} 863 864void 865BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 866 struct lp_build_context * bld, 867 LLVMValueRef total_emitted_vertices_vec, 868 LLVMValueRef verts_per_prim_vec, 869 LLVMValueRef emitted_prims_vec, 870 LLVMValueRef mask_vec) 871{ 872 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 873 874 /* When the output type is points, the geometry shader may output data 875 * to multiple streams, and end_primitive has no effect. Info about 876 * stream id for vertices is stored into the same place in memory where 877 * end primitive info is stored so early exit in this case. 878 */ 879 if (iface->pGsState->outputTopology == TOP_POINT_LIST) { 880 return; 881 } 882 883 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 884 885 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); 886 Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, 8)); 887 888 uint32_t vertsPerPrim = iface->num_verts_per_prim; 889 890 Value *vCount = 891 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)), 892 unwrap(verts_per_prim_vec)); 893 894 vCount = unwrap(total_emitted_vertices_vec); 895 896 Value *mask = unwrap(mask_vec); 897 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0))); 898 mask = AND(mask, cmpMask); 899 vMask1 = TRUNC(mask, getVectorType(mInt1Ty, 8)); 900 901 vCount = SUB(vCount, VIMMED1(1)); 902 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE)); 903 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8))); 904 905 vValue = TRUNC(vValue, getVectorType(mInt8Ty, 8)); 906 907 Value *pStack = STACKSAVE(); 908 Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking 909 910 for (uint32_t lane = 0; lane < mVWidth; ++lane) { 911 Value *vLaneOffset = VEXTRACT(vOffset, C(lane)); 912 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 913 Value *pStreamOffset = GEP(pStream, vLaneOffset); 914 915 Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 916 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 917 918 Value *vVal = LOAD(pStreamOffset); 919 vVal = OR(vVal, VEXTRACT(vValue, C(lane))); 920 STORE(vVal, pStreamOffset); 921 } 922 923 STACKRESTORE(pStack); 924} 925 926void 927BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 928 LLVMValueRef total_emitted_vertices_vec, 929 LLVMValueRef emitted_prims_vec, unsigned stream) 930{ 931 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 932 933 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 934 935 // Store emit count to each output stream in the first DWORD 936 for (uint32_t lane = 0; lane < mVWidth; ++lane) 937 { 938 Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 939 pStream = BITCAST(pStream, mInt32PtrTy); 940 Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane)); 941 STORE(pLaneCount, pStream); 942 } 943} 944 945void 946BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld) 947{ 948 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface; 949 950 Value* loop_var = ALLOCA(mSimdInt32Ty); 951 STORE(VBROADCAST(C(0)), loop_var); 952 953 iface->loop_var = wrap(loop_var); 954 955 lp_exec_bgnloop(&bld->exec_mask, true); 956 957 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 958 bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var)))); 959 960 if (verbose_tcs_shader_loop) { 961 lp_build_print_value(gallivm, "Prologue LOOP Iteration BEGIN:", bld->system_values.invocation_id); 962 } 963 964} 965 966void 967BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld) 968{ 969 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface; 970 971 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 972 973 STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var)); 974 if (verbose_tcs_shader_loop) { 975 lp_build_print_value(gallivm, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface->loop_var)))); 976 } 977 978 LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))), 979 wrap(VBROADCAST(C(iface->output_vertices)))); 980 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 981 lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false); 982 lp_exec_mask_cond_pop(&bld->exec_mask); 983 lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask); 984} 985 986LLVMValueRef 987BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 988 struct lp_build_tgsi_context * bld_base, 989 boolean is_vindex_indirect, 990 LLVMValueRef vertex_index, 991 boolean is_aindex_indirect, 992 LLVMValueRef attrib_index, 993 LLVMValueRef swizzle_index) 994{ 995 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 996 997 Value *vert_index = unwrap(vertex_index); 998 Value *attr_index = unwrap(attrib_index); 999 1000 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1001 1002 if (verbose_tcs_shader_in) { 1003 lp_build_printf(gallivm, "[TCS IN][VTX] ======================================\n"); 1004 lp_build_print_value(gallivm, "[TCS IN][VTX] vertex_index: ", vertex_index); 1005 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 1006 lp_build_printf(gallivm, "[TCS IN][VTX] --------------------------------------\n"); 1007 } 1008 1009 Value *res = unwrap(bld_base->base.zero); 1010 if (is_vindex_indirect || is_aindex_indirect) { 1011 int i; 1012 struct lp_type type = bld_base->base.type; 1013 1014 for (i = 0; i < type.length; i++) { 1015 Value *vert_chan_index = vert_index; 1016 Value *attr_chan_index = attr_index; 1017 1018 if (is_vindex_indirect) { 1019 vert_chan_index = VEXTRACT(vert_index, C(i)); 1020 } 1021 if (is_aindex_indirect) { 1022 attr_chan_index = VEXTRACT(attr_index, C(i)); 1023 } 1024 1025 Value *attrib = 1026 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 1027 1028 Value *pBase = GEP(iface->pTcsCtx, 1029 { C(0), C(SWR_HS_CONTEXT_vert), vert_chan_index, 1030 C(simdvertex_attrib), attrib, unwrap(swizzle_index), C(i) }); 1031 1032 Value *val = LOAD(pBase); 1033 1034 if (verbose_tcs_shader_in) { 1035 lp_build_print_value(gallivm, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index)); 1036 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 1037 lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index)); 1038 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib)); 1039 lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index); 1040 lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(val)); 1041 } 1042 res = VINSERT(res, val, C(i)); 1043 } 1044 } else { 1045 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 1046 1047 Value *pBase = GEP(iface->pTcsCtx, 1048 { C(0), C(SWR_HS_CONTEXT_vert), vert_index, 1049 C(simdvertex_attrib), attrib, unwrap(swizzle_index) }); 1050 1051 res = LOAD(pBase); 1052 1053 if (verbose_tcs_shader_in) { 1054 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 1055 lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index)); 1056 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib)); 1057 lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index); 1058 lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(res)); 1059 } 1060 } 1061 if (verbose_tcs_shader_in) { 1062 lp_build_print_value(gallivm, "[TCS IN][VTX] returning: ", wrap(res)); 1063 } 1064 return wrap(res); 1065} 1066 1067LLVMValueRef 1068BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 1069 struct lp_build_tgsi_context * bld_base, 1070 boolean is_vindex_indirect, 1071 LLVMValueRef vertex_index, 1072 boolean is_aindex_indirect, 1073 LLVMValueRef attrib_index, 1074 LLVMValueRef swizzle_index, 1075 uint32_t name) 1076{ 1077 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 1078 1079 Value *vert_index = unwrap(vertex_index); 1080 Value *attr_index = unwrap(attrib_index); 1081 1082 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1083 1084 if (verbose_tcs_shader_in) { 1085 lp_build_print_value(gallivm, "[TCS INOUT] Vertex index: ", vertex_index); 1086 lp_build_print_value(gallivm, "[TCS INOUT] Attrib index: ", wrap(attr_index)); 1087 lp_build_print_value(gallivm, "[TCS INOUT] Swizzle index: ", swizzle_index); 1088 } 1089 1090 Value* res = unwrap(bld_base->base.zero); 1091 1092 for (uint32_t lane = 0; lane < mVWidth; lane++) { 1093 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout}); 1094 Value* pCpOut = GEP(p1, {lane}); 1095 1096 Value *vert_chan_index = vert_index; 1097 Value *attr_chan_index = attr_index; 1098 1099 if (is_vindex_indirect) { 1100 vert_chan_index = VEXTRACT(vert_index, C(lane)); 1101 if (verbose_tcs_shader_in) { 1102 lp_build_print_value(gallivm, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index)); 1103 } 1104 } 1105 1106 if (is_aindex_indirect) { 1107 attr_chan_index = VEXTRACT(attr_index, C(lane)); 1108 if (verbose_tcs_shader_in) { 1109 lp_build_print_value(gallivm, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index)); 1110 } 1111 } 1112 1113 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) { 1114 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors}); 1115 Value* tessFactorArray = nullptr; 1116 if (name == TGSI_SEMANTIC_TESSOUTER) { 1117 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors}); 1118 } else { 1119 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors}); 1120 } 1121 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)}); 1122 res = VINSERT(res, LOAD(tessFactor), C(lane)); 1123 if (verbose_tcs_shader_in) { 1124 lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane))); 1125 lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] loaded value: ", wrap(res)); 1126 } 1127 } else if (name == TGSI_SEMANTIC_PATCH) { 1128 Value* attr_index_from_map = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_chan_index})); 1129 Value* attr_value = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index_from_map, unwrap(swizzle_index)}); 1130 res = VINSERT(res, LOAD(attr_value), C(lane)); 1131 if (verbose_tcs_shader_in) { 1132 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map)); 1133 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane))); 1134 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] loaded value: ", wrap(res)); 1135 } 1136 } else { 1137 // Generic attribute 1138 Value *attrib = 1139 LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_chan_index})); 1140 if (verbose_tcs_shader_in) { 1141 lp_build_print_value(gallivm, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib)); 1142 } 1143 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_chan_index, 1144 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)}); 1145 1146 res = VINSERT(res, LOAD(attr_chan), C(lane)); 1147 if (verbose_tcs_shader_in) { 1148 lp_build_print_value(gallivm, "[TCS INOUT][VTX] loaded value: ", wrap(res)); 1149 } 1150 } 1151 } 1152 1153 return wrap(res); 1154} 1155 1156void 1157BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 1158 struct lp_build_tgsi_context *bld_base, 1159 unsigned name, 1160 boolean is_vindex_indirect, 1161 LLVMValueRef vertex_index, 1162 boolean is_aindex_indirect, 1163 LLVMValueRef attrib_index, 1164 LLVMValueRef swizzle_index, 1165 LLVMValueRef value, 1166 LLVMValueRef mask_vec) 1167{ 1168 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 1169 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base; 1170 1171 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1172 1173 if (verbose_tcs_shader_out) { 1174 lp_build_printf(gallivm, "[TCS OUT] =============================================\n"); 1175 } 1176 1177 if (verbose_tcs_shader_out) { 1178 lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask); 1179 lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value); 1180 } 1181 1182 Value *vert_index = unwrap(vertex_index); 1183 Value *attr_index = unwrap(attrib_index); 1184 1185 if (verbose_tcs_shader_out) { 1186 lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index); 1187 lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index)); 1188 lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index); 1189 } 1190 1191 if (is_vindex_indirect) { 1192 vert_index = VEXTRACT(vert_index, C(0)); 1193 if (verbose_tcs_shader_out) { 1194 lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index); 1195 } 1196 } 1197 1198 if (is_aindex_indirect) { 1199 attr_index = VEXTRACT(attr_index, C(0)); 1200 if (verbose_tcs_shader_out) { 1201 lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index)); 1202 } 1203 } 1204 1205 if (verbose_tcs_shader_out) { 1206 if (bld->exec_mask.has_mask) { 1207 lp_build_print_value(gallivm, "[TCS OUT] Exec mask: ", bld->exec_mask.exec_mask); 1208 } 1209 else { 1210 lp_build_printf(gallivm, "[TCS OUT] has no mask\n"); 1211 } 1212 } 1213 for (uint32_t lane = 0; lane < mVWidth; lane++) { 1214 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout}); 1215 Value* pCpOut = GEP(p1, {lane}); 1216 1217 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) { 1218 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors}); 1219 Value* tessFactorArray = nullptr; 1220 if (name == TGSI_SEMANTIC_TESSOUTER) { 1221 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors}); 1222 } else { 1223 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors}); 1224 } 1225 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)}); 1226 Value* valueToStore = VEXTRACT(unwrap(value), C(lane)); 1227 valueToStore = BITCAST(valueToStore, mFP32Ty); 1228 if (mask_vec) { 1229 Value *originalVal = LOAD(tessFactor); 1230 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 1231 valueToStore = SELECT(vMask, valueToStore, originalVal); 1232 } 1233 STORE(valueToStore, tessFactor); 1234 if (verbose_tcs_shader_out) 1235 { 1236 lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec); 1237 lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore)); 1238 } 1239 } else if (name == TGSI_SEMANTIC_PATCH) { 1240 Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index})); 1241 if (verbose_tcs_shader_out) { 1242 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index)); 1243 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index)); 1244 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect))); 1245 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect))); 1246 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib)); 1247 } 1248 Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib}); 1249 Value* value_to_store = VEXTRACT(unwrap(value), C(lane)); 1250 if (verbose_tcs_shader_out) { 1251 lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane))); 1252 lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value); 1253 lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store)); 1254 lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index); 1255 } 1256 value_to_store = BITCAST(value_to_store, mFP32Ty); 1257 if (mask_vec) { 1258 Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)}); 1259 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 1260 value_to_store = SELECT(vMask, value_to_store, originalVal); 1261 if (verbose_tcs_shader_out) { 1262 lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", mask_vec); 1263 lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal)); 1264 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask)); 1265 lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store)); 1266 } 1267 } 1268 STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)}); 1269 if (verbose_tcs_shader_out) { 1270 lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store)); 1271 } 1272 } else { 1273 Value* value_to_store = VEXTRACT(unwrap(value), C(lane)); 1274 Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index})); 1275 1276 if (verbose_tcs_shader_out) { 1277 lp_build_printf(gallivm, "[TCS OUT] Writting attribute\n"); 1278 lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id); 1279 lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index)); 1280 lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib)); 1281 lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index); 1282 lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value); 1283 lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store)); 1284 } 1285 1286 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), 1287 VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)), 1288 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)}); 1289 1290 // Mask output values if needed 1291 value_to_store = BITCAST(value_to_store, mFP32Ty); 1292 if (mask_vec) { 1293 Value *originalVal = LOAD(attr_chan); 1294 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 1295 value_to_store = SELECT(vMask, value_to_store, originalVal); 1296 } 1297 STORE(value_to_store, attr_chan); 1298 if (verbose_tcs_shader_out) { 1299 lp_build_print_value(gallivm, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec); 1300 lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store)); 1301 } 1302 } 1303 } 1304} 1305 1306void 1307BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface, 1308 struct lp_build_tgsi_context *bld_base) 1309{ 1310 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 1311 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base; 1312 1313 if (verbose_tcs_shader_loop) { 1314 lp_build_print_value(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_var); 1315 } 1316 1317 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 1318 1319 STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var)); 1320 1321 LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))), 1322 wrap(VBROADCAST(C(iface->output_vertices)))); 1323 1324 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 1325 lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false); 1326 lp_exec_mask_cond_pop(&bld->exec_mask); 1327 lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask); 1328 1329 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1330 1331 STORE(VBROADCAST(C(0)), unwrap(iface->loop_var)); 1332 lp_exec_bgnloop(&bld->exec_mask, true); 1333 1334 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1335 1336 bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var)))); 1337 1338 if (verbose_tcs_shader_loop) { 1339 lp_build_print_value(gallivm, "Barrier LOOP: Iteration BEGIN: ", iface->loop_var); 1340 lp_build_print_value(gallivm, "Barrier LOOP: InvocationId: \n", bld->system_values.invocation_id); 1341 } 1342} 1343 1344 1345LLVMValueRef 1346BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 1347 struct lp_build_tgsi_context * bld_base, 1348 boolean is_aindex_indirect, 1349 LLVMValueRef attrib_index, 1350 LLVMValueRef swizzle_index) 1351{ 1352 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 1353 Value *attr_index = unwrap(attrib_index); 1354 Value *res = unwrap(bld_base->base.zero); 1355 1356 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1357 1358 if (verbose_shader) { 1359 lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n"); 1360 } 1361 1362 if (is_aindex_indirect) { 1363 int i; 1364 struct lp_type type = bld_base->base.type; 1365 1366 for (i = 0; i < type.length; i++) { 1367 Value *attr_chan_index = attr_index; 1368 1369 if (is_aindex_indirect) { 1370 attr_chan_index = VEXTRACT(attr_index, C(i)); 1371 } 1372 1373 Value *attrib = 1374 LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index})); 1375 1376 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 1377 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData}); 1378 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib}); 1379 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)}); 1380 if (verbose_shader) { 1381 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index); 1382 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index)); 1383 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib)); 1384 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index); 1385 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val)); 1386 } 1387 res = VINSERT(res, Val, C(i)); 1388 } 1389 } else { 1390 Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index})); 1391 1392 Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 1393 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData}); 1394 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib}); 1395 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)}); 1396 if (verbose_shader) { 1397 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index); 1398 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index)); 1399 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib)); 1400 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index); 1401 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val)); 1402 } 1403 res = VBROADCAST(Val); 1404 } 1405 if (verbose_shader) { 1406 lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res)); 1407 } 1408 return wrap(res); 1409} 1410 1411 1412 1413LLVMValueRef 1414BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 1415 struct lp_build_tgsi_context * bld_base, 1416 boolean is_vindex_indirect, 1417 LLVMValueRef vertex_index, 1418 boolean is_aindex_indirect, 1419 LLVMValueRef attrib_index, 1420 LLVMValueRef swizzle_index) 1421{ 1422 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 1423 Value *vert_index = unwrap(vertex_index); 1424 Value *attr_index = unwrap(attrib_index); 1425 1426 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1427 1428 if (verbose_shader) { 1429 lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n"); 1430 } 1431 1432 Value *res = unwrap(bld_base->base.zero); 1433 if (is_vindex_indirect || is_aindex_indirect) { 1434 int i; 1435 struct lp_type type = bld_base->base.type; 1436 1437 for (i = 0; i < type.length; i++) { 1438 Value *vert_chan_index = vert_index; 1439 Value *attr_chan_index = attr_index; 1440 1441 if (is_vindex_indirect) { 1442 vert_chan_index = VEXTRACT(vert_index, C(i)); 1443 } 1444 if (is_aindex_indirect) { 1445 attr_chan_index = VEXTRACT(attr_index, C(i)); 1446 } 1447 1448 Value *attrib = 1449 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 1450 1451 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 1452 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp}); 1453 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index}); 1454 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)}); 1455 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib}); 1456 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)}); 1457 if (verbose_shader) { 1458 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index); 1459 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index)); 1460 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib)); 1461 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index); 1462 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val)); 1463 } 1464 res = VINSERT(res, Val, C(i)); 1465 } 1466 } else { 1467 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 1468 1469 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 1470 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp}); 1471 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index}); 1472 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)}); 1473 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib}); 1474 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)}); 1475 if (verbose_shader) { 1476 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index); 1477 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index)); 1478 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib)); 1479 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index); 1480 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val)); 1481 } 1482 res = VBROADCAST(Val); 1483 } 1484 if (verbose_shader) { 1485 lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res)); 1486 } 1487 return wrap(res); 1488} 1489 1490 1491 1492 1493PFN_GS_FUNC 1494BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) 1495{ 1496 SWR_GS_STATE *pGS = &ctx->gs->gsState; 1497 struct tgsi_shader_info *info = &ctx->gs->info.base; 1498 1499 memset(pGS, 0, sizeof(*pGS)); 1500 1501 pGS->gsEnable = true; 1502 1503 pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs; 1504 pGS->outputTopology = 1505 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0); 1506 1507 /* It's +1 because emit_vertex in swr is always called exactly one time more 1508 * than max_vertices passed in Geometry Shader. We need to allocate more memory 1509 * to avoid crash/memory overwritten. 1510 */ 1511 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1; 1512 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; 1513 1514 // If point primitive then assume to use multiple streams 1515 if(pGS->outputTopology == TOP_POINT_LIST) { 1516 pGS->isSingleStream = false; 1517 } else { 1518 pGS->isSingleStream = true; 1519 pGS->singleStreamID = 0; 1520 } 1521 1522 pGS->vertexAttribOffset = VERTEX_POSITION_SLOT; 1523 pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset; 1524 pGS->outputVertexSize = SWR_VTX_NUM_SLOTS; 1525 pGS->controlDataSize = 8; // GS outputs max of 8 32B units 1526 pGS->controlDataOffset = VERTEX_COUNT_SIZE; 1527 pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE; 1528 1529 pGS->allocationSize = 1530 VERTEX_COUNT_SIZE + // vertex count 1531 CONTROL_HEADER_SIZE + // control header 1532 (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex 1533 pGS->maxNumVerts; // num verts 1534 1535 struct swr_geometry_shader *gs = ctx->gs; 1536 1537 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 1538 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 1539 1540 memset(outputs, 0, sizeof(outputs)); 1541 1542 AttrBuilder attrBuilder; 1543 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 1544 1545 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 1546 PointerType::get(mInt8Ty, 0), 1547 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)}; 1548 FunctionType *vsFuncType = 1549 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false); 1550 1551 // create new vertex shader function 1552 auto pFunction = Function::Create(vsFuncType, 1553 GlobalValue::ExternalLinkage, 1554 "GS", 1555 JM()->mpCurrentModule); 1556#if LLVM_VERSION_MAJOR < 5 1557 AttributeSet attrSet = AttributeSet::get( 1558 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 1559 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 1560#else 1561 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 1562#endif 1563 1564 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 1565 IRB()->SetInsertPoint(block); 1566 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 1567 1568 auto argitr = pFunction->arg_begin(); 1569 Value *hPrivateData = &*argitr++; 1570 hPrivateData->setName("hPrivateData"); 1571 Value *pWorkerData = &*argitr++; 1572 pWorkerData->setName("pWorkerData"); 1573 Value *pGsCtx = &*argitr++; 1574 pGsCtx->setName("gsCtx"); 1575 1576 Value *consts_ptr = 1577 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)}); 1578 consts_ptr->setName("gs_constants"); 1579 Value *const_sizes_ptr = 1580 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS}); 1581 const_sizes_ptr->setName("num_gs_constants"); 1582 1583 struct lp_build_sampler_soa *sampler = 1584 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY); 1585 assert(sampler != nullptr); 1586 1587 struct lp_bld_tgsi_system_values system_values; 1588 memset(&system_values, 0, sizeof(system_values)); 1589 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID})); 1590 system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID})); 1591 1592 std::vector<Constant*> mapConstants; 1593 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 1594 for (unsigned slot = 0; slot < info->num_inputs; slot++) { 1595 ubyte semantic_name = info->input_semantic_name[slot]; 1596 ubyte semantic_idx = info->input_semantic_index[slot]; 1597 1598 unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 1599 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS); 1600 1601 vs_slot += VERTEX_ATTRIB_START_SLOT; 1602 1603 if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 1604 vs_slot--; 1605 1606 if (semantic_name == TGSI_SEMANTIC_POSITION) 1607 vs_slot = VERTEX_POSITION_SLOT; 1608 1609 STORE(C(vs_slot), vtxAttribMap, {0, slot}); 1610 mapConstants.push_back(C(vs_slot)); 1611 } 1612 1613 struct lp_build_mask_context mask; 1614 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask"); 1615 lp_build_mask_begin(&mask, gallivm, 1616 lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 1617 1618 // zero out cut buffer so we can load/modify/store bits 1619 for (uint32_t lane = 0; lane < mVWidth; ++lane) 1620 { 1621 Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 1622#if LLVM_VERSION_MAJOR >= 10 1623 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH)); 1624#else 1625 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH); 1626#endif 1627 } 1628 1629 struct swr_gs_llvm_iface gs_iface; 1630 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input; 1631 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex; 1632 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive; 1633 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue; 1634 gs_iface.pBuilder = this; 1635 gs_iface.pGsCtx = pGsCtx; 1636 gs_iface.pGsState = pGS; 1637 gs_iface.num_outputs = gs->info.base.num_outputs; 1638 gs_iface.num_verts_per_prim = 1639 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); 1640 gs_iface.info = info; 1641 gs_iface.pVtxAttribMap = vtxAttribMap; 1642 1643 struct lp_build_tgsi_params params; 1644 memset(¶ms, 0, sizeof(params)); 1645 params.type = lp_type_float_vec(32, 32 * 8); 1646 params.mask = & mask; 1647 params.consts_ptr = wrap(consts_ptr); 1648 params.const_sizes_ptr = wrap(const_sizes_ptr); 1649 params.system_values = &system_values; 1650 params.inputs = inputs; 1651 params.context_ptr = wrap(hPrivateData); 1652 params.sampler = sampler; 1653 params.info = &gs->info.base; 1654 params.gs_iface = &gs_iface.base; 1655 1656 lp_build_tgsi_soa(gallivm, 1657 gs->pipe.tokens, 1658 ¶ms, 1659 outputs); 1660 1661 lp_build_mask_end(&mask); 1662 1663 sampler->destroy(sampler); 1664 1665 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1666 1667 RET_VOID(); 1668 1669 gallivm_verify_function(gallivm, wrap(pFunction)); 1670 gallivm_compile_module(gallivm); 1671 1672 PFN_GS_FUNC pFunc = 1673 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 1674 1675 debug_printf("geom shader %p\n", pFunc); 1676 assert(pFunc && "Error: GeomShader = NULL"); 1677 1678 JM()->mIsModuleFinalized = true; 1679 1680 return pFunc; 1681} 1682 1683PFN_TES_FUNC 1684BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key) 1685{ 1686 SWR_TS_STATE *pTS = &ctx->tsState; 1687 struct tgsi_shader_info *info = &ctx->tes->info.base; 1688 1689 // tessellation is enabled if TES is present 1690 // clear tessellation state here then 1691 memset(pTS, 0, sizeof(*pTS)); 1692 1693 pTS->tsEnable = true; 1694 1695 unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; 1696 unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; 1697 bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; 1698 bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; 1699 SWR_TS_DOMAIN type = SWR_TS_ISOLINE; 1700 SWR_TS_PARTITIONING partitioning = SWR_TS_EVEN_FRACTIONAL; 1701 SWR_TS_OUTPUT_TOPOLOGY topology = SWR_TS_OUTPUT_POINT; 1702 PRIMITIVE_TOPOLOGY postDSTopology = TOP_POINT_LIST; 1703 1704 // TESS_TODO: move this to helper functions to improve readability 1705 switch (tes_prim_mode) { 1706 case PIPE_PRIM_LINES: 1707 type = SWR_TS_ISOLINE; 1708 postDSTopology = TOP_LINE_LIST; 1709 break; 1710 case PIPE_PRIM_TRIANGLES: 1711 type = SWR_TS_TRI; 1712 postDSTopology = TOP_TRIANGLE_LIST; 1713 break; 1714 case PIPE_PRIM_QUADS: 1715 type = SWR_TS_QUAD; 1716 // See OpenGL spec - quads are tessellated into triangles 1717 postDSTopology = TOP_TRIANGLE_LIST; 1718 break; 1719 default: 1720 assert(0); 1721 } 1722 1723 switch (tes_spacing) { 1724 case PIPE_TESS_SPACING_FRACTIONAL_ODD: 1725 partitioning = SWR_TS_ODD_FRACTIONAL; 1726 break; 1727 case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 1728 partitioning = SWR_TS_EVEN_FRACTIONAL; 1729 break; 1730 case PIPE_TESS_SPACING_EQUAL: 1731 partitioning = SWR_TS_INTEGER; 1732 break; 1733 default: 1734 assert(0); 1735 } 1736 1737 if (tes_point_mode) { 1738 topology = SWR_TS_OUTPUT_POINT; 1739 postDSTopology = TOP_POINT_LIST; 1740 } 1741 else if (tes_prim_mode == PIPE_PRIM_LINES) { 1742 topology = SWR_TS_OUTPUT_LINE; 1743 } 1744 else if (tes_vertex_order_cw) { 1745 topology = SWR_TS_OUTPUT_TRI_CW; 1746 } 1747 else { 1748 topology = SWR_TS_OUTPUT_TRI_CCW; 1749 } 1750 1751 pTS->domain = type; 1752 pTS->tsOutputTopology = topology; 1753 pTS->partitioning = partitioning; 1754 pTS->numDsOutputAttribs = info->num_outputs; 1755 pTS->postDSTopology = postDSTopology; 1756 1757 pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM; 1758 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 1759 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 1760 pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT; 1761 1762 struct swr_tess_evaluation_shader *tes = ctx->tes; 1763 1764 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 1765 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 1766 1767 memset(outputs, 0, sizeof(outputs)); 1768 1769 AttrBuilder attrBuilder; 1770 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 1771 1772 std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 1773 PointerType::get(mInt8Ty, 0), 1774 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)}; 1775 FunctionType *tesFuncType = 1776 FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false); 1777 1778 // create new vertex shader function 1779 auto pFunction = Function::Create(tesFuncType, 1780 GlobalValue::ExternalLinkage, 1781 "TES", 1782 JM()->mpCurrentModule); 1783 1784#if LLVM_VERSION_MAJOR < 5 1785 AttributeSet attrSet = AttributeSet::get( 1786 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 1787 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 1788#else 1789 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 1790#endif 1791 1792 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 1793 IRB()->SetInsertPoint(block); 1794 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 1795 1796 auto argitr = pFunction->arg_begin(); 1797 Value *hPrivateData = &*argitr++; 1798 hPrivateData->setName("hPrivateData"); 1799 Value *pWorkerData = &*argitr++; 1800 pWorkerData->setName("pWorkerData"); 1801 Value *pTesCtx = &*argitr++; 1802 pTesCtx->setName("tesCtx"); 1803 1804 Value *consts_ptr = 1805 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)}); 1806 consts_ptr->setName("tes_constants"); 1807 Value *const_sizes_ptr = 1808 GEP(hPrivateData, {0, swr_draw_context_num_constantsTES}); 1809 const_sizes_ptr->setName("num_tes_constants"); 1810 1811 struct lp_build_sampler_soa *sampler = 1812 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL); 1813 assert(sampler != nullptr); 1814 1815 struct lp_bld_tgsi_system_values system_values; 1816 memset(&system_values, 0, sizeof(system_values)); 1817 1818 // Load and calculate system values 1819 // Tessellation coordinates (gl_TessCoord) 1820 Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset"); 1821 Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride"); 1822 Value *vecIndex = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}); 1823 1824 Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3)); 1825 1826 Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU"); 1827 STORE(tessCoordU, tess_coord, {0, 0}); 1828 Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV"); 1829 STORE(tessCoordV, tess_coord, {0, 1}); 1830 Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW"); 1831 STORE(tessCoordW, tess_coord, {0, 2}); 1832 system_values.tess_coord = wrap(tess_coord); 1833 1834 // Primitive ID 1835 system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID")); 1836 1837 // Tessellation factors 1838 Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}); 1839 Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)}); 1840 1841 assert(SWR_NUM_OUTER_TESS_FACTORS == 4); 1842 Value* sys_value_outer_factors = UndefValue::get(getVectorType(mFP32Ty, 4)); 1843 for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) { 1844 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i}); 1845 sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter"); 1846 } 1847 system_values.tess_outer = wrap(sys_value_outer_factors); 1848 1849 assert(SWR_NUM_INNER_TESS_FACTORS == 2); 1850 Value* sys_value_inner_factors = UndefValue::get(getVectorType(mFP32Ty, 4)); 1851 for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) { 1852 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i}); 1853 sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner"); 1854 } 1855 system_values.tess_inner = wrap(sys_value_inner_factors); 1856 1857 if (verbose_shader) 1858 { 1859 lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord); 1860 } 1861 1862 struct tgsi_shader_info *pPrevShader = nullptr; 1863 1864 if (ctx->tcs) { 1865 pPrevShader = &ctx->tcs->info.base; 1866 } 1867 else { 1868 pPrevShader = &ctx->vs->info.base; 1869 } 1870 1871 // Figure out how many per-patch attributes we have 1872 unsigned perPatchAttrs = 0; 1873 unsigned genericAttrs = 0; 1874 unsigned tessLevelAttrs = 0; 1875 unsigned sgvAttrs = 0; 1876 for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) { 1877 switch (pPrevShader->output_semantic_name[slot]) { 1878 case TGSI_SEMANTIC_PATCH: 1879 perPatchAttrs++; 1880 break; 1881 case TGSI_SEMANTIC_GENERIC: 1882 genericAttrs++; 1883 break; 1884 case TGSI_SEMANTIC_TESSINNER: 1885 case TGSI_SEMANTIC_TESSOUTER: 1886 tessLevelAttrs++; 1887 break; 1888 case TGSI_SEMANTIC_POSITION: 1889 case TGSI_SEMANTIC_CLIPDIST: 1890 case TGSI_SEMANTIC_PSIZE: 1891 sgvAttrs++; 1892 break; 1893 default: 1894 assert(!"Unknown semantic input in TES"); 1895 } 1896 } 1897 1898 std::vector<Constant *> mapConstants; 1899 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 1900 Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 1901 for (unsigned slot = 0; slot < info->num_inputs; slot++) { 1902 ubyte semantic_name = info->input_semantic_name[slot]; 1903 ubyte semantic_idx = info->input_semantic_index[slot]; 1904 1905 // Where in TCS output is my attribute? 1906 // TESS_TODO: revisit after implement pass-through TCS 1907 unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader); 1908 assert(tcs_slot < PIPE_MAX_SHADER_OUTPUTS); 1909 1910 // Skip tessellation levels - these go to the tessellator, not TES 1911 switch (semantic_name) { 1912 case TGSI_SEMANTIC_GENERIC: 1913 tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs; 1914 break; 1915 case TGSI_SEMANTIC_PATCH: 1916 tcs_slot = semantic_idx; 1917 break; 1918 case TGSI_SEMANTIC_POSITION: 1919 tcs_slot = VERTEX_POSITION_SLOT; 1920 break; 1921 case TGSI_SEMANTIC_CLIPDIST: 1922 case TGSI_SEMANTIC_PSIZE: 1923 break; 1924 default: 1925 assert(!"Unexpected semantic found while building TES input map"); 1926 } 1927 if (semantic_name == TGSI_SEMANTIC_PATCH) { 1928 STORE(C(tcs_slot), patchAttribMap, {0, slot}); 1929 } else { 1930 STORE(C(tcs_slot), vtxAttribMap, {0, slot}); 1931 } 1932 mapConstants.push_back(C(tcs_slot)); 1933 } 1934 1935 // Build execution mask 1936 struct lp_build_mask_context mask; 1937 Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask"); 1938 1939 if (verbose_shader) 1940 lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val)); 1941 1942 lp_build_mask_begin(&mask, gallivm, 1943 lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 1944 1945 struct swr_tes_llvm_iface tes_iface; 1946 1947 tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input; 1948 tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input; 1949 1950 tes_iface.pBuilder = this; 1951 tes_iface.pTesCtx = pTesCtx; 1952 tes_iface.pTsState = pTS; 1953 tes_iface.num_outputs = tes->info.base.num_outputs; 1954 tes_iface.info = info; 1955 tes_iface.pVtxAttribMap = vtxAttribMap; 1956 tes_iface.pPatchAttribMap = patchAttribMap; 1957 1958 struct lp_build_tgsi_params params; 1959 memset(¶ms, 0, sizeof(params)); 1960 params.type = lp_type_float_vec(32, 32 * 8); 1961 params.mask = & mask; 1962 params.consts_ptr = wrap(consts_ptr); 1963 params.const_sizes_ptr = wrap(const_sizes_ptr); 1964 params.system_values = &system_values; 1965 params.inputs = inputs; 1966 params.context_ptr = wrap(hPrivateData); 1967 params.sampler = sampler; 1968 params.info = &tes->info.base; 1969 params.tes_iface = &tes_iface.base; 1970 1971 // Build LLVM IR 1972 lp_build_tgsi_soa(gallivm, 1973 tes->pipe.tokens, 1974 ¶ms, 1975 outputs); 1976 1977 lp_build_mask_end(&mask); 1978 1979 sampler->destroy(sampler); 1980 1981 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1982 1983 // Write output attributes 1984 Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut"); 1985 1986 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 1987 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 1988 if (!outputs[attrib][channel]) 1989 continue; 1990 1991 Value *val = LOAD(unwrap(outputs[attrib][channel]));; 1992 Value *attribOffset = 1993 LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset}); 1994 1995 // Assume we write possition 1996 Value* outputSlot = C(VERTEX_POSITION_SLOT); 1997 if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) { 1998 // No, it's a generic attribute, not a position - let's calculate output slot 1999 uint32_t outSlot = attrib; 2000 if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 2001 // this shader will write position, so in shader's term 2002 // output starts at attrib 1, but we will handle that separately, 2003 // so let's fix the outSlot 2004 outSlot--; 2005 } 2006 outputSlot = ADD(attribOffset, C(outSlot)); 2007 } 2008 2009 Value *attribVecIndex = 2010 ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset); 2011 2012 uint32_t outputComponent = 0; 2013 uint32_t curComp = outputComponent + channel; 2014 auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp))); 2015 STOREV(val, dclOut, {outValIndex}); 2016 2017 if (verbose_shader) { 2018 lp_build_printf(gallivm, 2019 "TES output [%d][%d]", 2020 C(attrib), 2021 C(channel)); 2022 lp_build_print_value(gallivm, " = ", wrap(val)); 2023 } 2024 } 2025 } 2026 2027 RET_VOID(); 2028 2029 JM()->DumpToFile(pFunction, "src"); 2030 gallivm_verify_function(gallivm, wrap(pFunction)); 2031 2032 gallivm_compile_module(gallivm); 2033 JM()->DumpToFile(pFunction, "optimized"); 2034 2035 PFN_TES_FUNC pFunc = 2036 (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 2037 2038 debug_printf("tess evaluation shader %p\n", pFunc); 2039 assert(pFunc && "Error: TessEvaluationShader = NULL"); 2040 2041 JM()->DumpAsm(pFunction, "asm"); 2042 2043 JM()->mIsModuleFinalized = true; 2044 2045 return pFunc; 2046} 2047 2048PFN_TCS_FUNC 2049BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key) 2050{ 2051 SWR_TS_STATE *pTS = &ctx->tsState; 2052 struct tgsi_shader_info *info = &ctx->tcs->info.base; 2053 2054 pTS->numHsInputAttribs = info->num_inputs; 2055 pTS->numHsOutputAttribs = info->num_outputs; 2056 2057 pTS->hsAllocationSize = sizeof(ScalarPatch); 2058 2059 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 2060 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 2061 2062 struct swr_tess_control_shader *tcs = ctx->tcs; 2063 2064 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 2065 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 2066 2067 memset(outputs, 0, sizeof(outputs)); 2068 2069 AttrBuilder attrBuilder; 2070 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 2071 2072 std::vector<Type *> tcsArgs{ 2073 PointerType::get(Gen_swr_draw_context(JM()), 0), 2074 PointerType::get(mInt8Ty, 0), 2075 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)}; 2076 FunctionType *tcsFuncType = 2077 FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false); 2078 2079 // create new vertex shader function 2080 auto pFunction = Function::Create(tcsFuncType, 2081 GlobalValue::ExternalLinkage, 2082 "TCS", 2083 JM()->mpCurrentModule); 2084 2085#if LLVM_VERSION_MAJOR < 5 2086 AttributeSet attrSet = AttributeSet::get( 2087 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 2088 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 2089#else 2090 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 2091#endif 2092 2093 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 2094 IRB()->SetInsertPoint(block); 2095 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 2096 2097 auto argitr = pFunction->arg_begin(); 2098 Value *hPrivateData = &*argitr++; 2099 hPrivateData->setName("hPrivateData"); 2100 Value *pWorkerData = &*argitr++; 2101 pWorkerData->setName("pWorkerData"); 2102 Value *pTcsCtx = &*argitr++; 2103 pTcsCtx->setName("tcsCtx"); 2104 2105 Value *consts_ptr = 2106 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)}); 2107 consts_ptr->setName("tcs_constants"); 2108 Value *const_sizes_ptr = 2109 GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS}); 2110 const_sizes_ptr->setName("num_tcs_constants"); 2111 2112 struct lp_build_sampler_soa *sampler = 2113 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL); 2114 assert(sampler != nullptr); 2115 2116 struct lp_bld_tgsi_system_values system_values; 2117 memset(&system_values, 0, sizeof(system_values)); 2118 2119 system_values.prim_id = 2120 wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID})); 2121 2122 system_values.invocation_id = wrap(VBROADCAST(C(0))); 2123 system_values.vertices_in = wrap(C(tcs->vertices_per_patch)); 2124 2125 if (verbose_shader) { 2126 lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id); 2127 lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id); 2128 lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in); 2129 } 2130 2131 std::vector<Constant *> mapConstants; 2132 Value *vtxAttribMap = 2133 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 2134 2135 for (unsigned slot = 0; slot < info->num_inputs; slot++) { 2136 ubyte semantic_name = info->input_semantic_name[slot]; 2137 ubyte semantic_idx = info->input_semantic_index[slot]; 2138 2139 unsigned vs_slot = 2140 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 2141 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS); 2142 2143 vs_slot += VERTEX_ATTRIB_START_SLOT; 2144 2145 if (ctx->vs->info.base.output_semantic_name[0] 2146 == TGSI_SEMANTIC_POSITION) 2147 vs_slot--; 2148 2149 if (semantic_name == TGSI_SEMANTIC_POSITION) 2150 vs_slot = VERTEX_POSITION_SLOT; 2151 2152 STORE(C(vs_slot), vtxAttribMap, {0, slot}); 2153 mapConstants.push_back(C(vs_slot)); 2154 } 2155 2156 // Prepare map of output attributes. Needed when shader instance wants 2157 // to read own output or output of other instance, which is allowed in TCS 2158 Value *vtxOutputAttribMap = 2159 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 2160 // Map for per-patch attributes 2161 Value *patchOutputAttribMap = 2162 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 2163 for (unsigned slot = 0; slot < info->num_outputs; slot++) { 2164 ubyte name = info->output_semantic_name[slot]; 2165 int32_t idx = info->output_semantic_index[slot]; 2166 if (name == TGSI_SEMANTIC_PATCH) { 2167 STORE(C(idx), patchOutputAttribMap, {0, slot}); 2168 } else { 2169 int32_t target_slot = slot; 2170 if (name == TGSI_SEMANTIC_GENERIC) { 2171 target_slot += VERTEX_ATTRIB_START_SLOT; 2172 } 2173 // Now normalize target slot 2174 for (ubyte as = 0; as < slot; as++) { 2175 ubyte name = info->output_semantic_name[as]; 2176 switch (name) { 2177 case TGSI_SEMANTIC_TESSOUTER: 2178 case TGSI_SEMANTIC_TESSINNER: 2179 case TGSI_SEMANTIC_PATCH: 2180 case TGSI_SEMANTIC_POSITION: 2181 target_slot--; 2182 } 2183 } 2184 if (name == TGSI_SEMANTIC_POSITION) { 2185 target_slot = VERTEX_POSITION_SLOT; 2186 } 2187 STORE(C(target_slot), vtxOutputAttribMap, {0, slot}); 2188 mapConstants.push_back(C(target_slot)); 2189 } 2190 } 2191 2192 struct lp_build_mask_context mask; 2193 Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask"); 2194 lp_build_mask_begin( 2195 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 2196 2197 struct swr_tcs_llvm_iface tcs_iface; 2198 2199 tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output; 2200 tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input; 2201 tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output; 2202 tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier; 2203 tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue; 2204 tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue; 2205 2206 tcs_iface.pBuilder = this; 2207 tcs_iface.pTcsCtx = pTcsCtx; 2208 tcs_iface.pTsState = pTS; 2209 tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; 2210 tcs_iface.info = info; 2211 tcs_iface.pVtxAttribMap = vtxAttribMap; 2212 tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap; 2213 tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap; 2214 2215 struct lp_build_tgsi_params params; 2216 memset(¶ms, 0, sizeof(params)); 2217 params.type = lp_type_float_vec(32, 32 * 8); 2218 params.mask = &mask; 2219 params.consts_ptr = wrap(consts_ptr); 2220 params.const_sizes_ptr = wrap(const_sizes_ptr); 2221 params.system_values = &system_values; 2222 params.inputs = inputs; 2223 params.context_ptr = wrap(hPrivateData); 2224 params.sampler = sampler; 2225 params.info = &tcs->info.base; 2226 params.tcs_iface = &tcs_iface.base; 2227 2228 lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, ¶ms, outputs); 2229 2230 lp_build_mask_end(&mask); 2231 2232 sampler->destroy(sampler); 2233 2234 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 2235 RET_VOID(); 2236 2237 JM()->DumpToFile(pFunction, "src"); 2238 gallivm_verify_function(gallivm, wrap(pFunction)); 2239 gallivm_compile_module(gallivm); 2240 JM()->DumpToFile(pFunction, "optimized"); 2241 2242 PFN_TCS_FUNC pFunc = 2243 (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 2244 2245 debug_printf("tess control shader %p\n", pFunc); 2246 assert(pFunc && "Error: TessControlShader = NULL"); 2247 JM()->DumpAsm(pFunction, "asm"); 2248 2249 JM()->mIsModuleFinalized = true; 2250 2251 return pFunc; 2252} 2253 2254 2255PFN_GS_FUNC 2256swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key) 2257{ 2258 BuilderSWR builder( 2259 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 2260 "GS"); 2261 PFN_GS_FUNC func = builder.CompileGS(ctx, key); 2262 2263 ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func)))); 2264 return func; 2265} 2266 2267PFN_TCS_FUNC 2268swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key) 2269{ 2270 BuilderSWR builder( 2271 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 2272 "TCS"); 2273 PFN_TCS_FUNC func = builder.CompileTCS(ctx, key); 2274 2275 ctx->tcs->map.insert( 2276 std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func)))); 2277 2278 return func; 2279} 2280 2281PFN_TES_FUNC 2282swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key) 2283{ 2284 BuilderSWR builder( 2285 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 2286 "TES"); 2287 PFN_TES_FUNC func = builder.CompileTES(ctx, key); 2288 2289 ctx->tes->map.insert( 2290 std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func)))); 2291 2292 return func; 2293} 2294 2295void 2296BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel) 2297{ 2298#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS 2299 // interleave the simdvertex components into the dest simd16vertex 2300 // slot16offset = slot8offset * 2 2301 // comp16offset = comp8offset * 2 + alternateOffset 2302 2303 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset }); 2304 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } ); 2305 STORE(pVal, pOut, {channel * 2}); 2306#else 2307 Value *pOut = GEP(pVtxOutput, {0, 0, slot}); 2308 STORE(pVal, pOut, {0, channel}); 2309 if (verbose_vs_shader) { 2310 lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel)); 2311 lp_build_print_value(gallivm, "", wrap(pVal)); 2312 } 2313#endif 2314} 2315 2316PFN_VERTEX_FUNC 2317BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) 2318{ 2319 struct swr_vertex_shader *swr_vs = ctx->vs; 2320 2321 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 2322 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 2323 2324 memset(outputs, 0, sizeof(outputs)); 2325 2326 AttrBuilder attrBuilder; 2327 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 2328 2329 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 2330 PointerType::get(mInt8Ty, 0), 2331 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; 2332 FunctionType *vsFuncType = 2333 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); 2334 2335 // create new vertex shader function 2336 auto pFunction = Function::Create(vsFuncType, 2337 GlobalValue::ExternalLinkage, 2338 "VS", 2339 JM()->mpCurrentModule); 2340#if LLVM_VERSION_MAJOR < 5 2341 AttributeSet attrSet = AttributeSet::get( 2342 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 2343 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 2344#else 2345 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 2346#endif 2347 2348 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 2349 IRB()->SetInsertPoint(block); 2350 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 2351 2352 auto argitr = pFunction->arg_begin(); 2353 Value *hPrivateData = &*argitr++; 2354 hPrivateData->setName("hPrivateData"); 2355 Value *pWorkerData = &*argitr++; 2356 pWorkerData->setName("pWorkerData"); 2357 Value *pVsCtx = &*argitr++; 2358 pVsCtx->setName("vsCtx"); 2359 2360 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); 2361 2362 consts_ptr->setName("vs_constants"); 2363 Value *const_sizes_ptr = 2364 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); 2365 const_sizes_ptr->setName("num_vs_constants"); 2366 2367 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); 2368#if USE_SIMD16_VS 2369 vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0)); 2370#endif 2371 2372 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 2373 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; 2374 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 2375 if (mask & (1 << channel)) { 2376 inputs[attrib][channel] = 2377 wrap(LOAD(vtxInput, {0, 0, attrib, channel})); 2378 } 2379 } 2380 } 2381 2382 struct lp_build_sampler_soa *sampler = 2383 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); 2384 assert(sampler != nullptr); 2385 2386 struct lp_bld_tgsi_system_values system_values; 2387 memset(&system_values, 0, sizeof(system_values)); 2388 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); 2389 2390#if USE_SIMD16_VS 2391 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16})); 2392#else 2393 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); 2394#endif 2395 2396#if USE_SIMD16_VS 2397 uint32_t vectorWidth = mVWidth16; 2398#else 2399 uint32_t vectorWidth = mVWidth; 2400#endif 2401 2402 struct lp_build_tgsi_params params; 2403 memset(¶ms, 0, sizeof(params)); 2404 params.type = lp_type_float_vec(32, 32 * vectorWidth); 2405 params.consts_ptr = wrap(consts_ptr); 2406 params.const_sizes_ptr = wrap(const_sizes_ptr); 2407 params.system_values = &system_values; 2408 params.inputs = inputs; 2409 params.context_ptr = wrap(hPrivateData); 2410 params.sampler = sampler; 2411 params.info = &swr_vs->info.base; 2412 2413 lp_build_tgsi_soa(gallivm, 2414 swr_vs->pipe.tokens, 2415 ¶ms, 2416 outputs); 2417 2418 sampler->destroy(sampler); 2419 2420 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 2421 2422 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); 2423#if USE_SIMD16_VS 2424 vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0)); 2425#endif 2426 2427 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 2428 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 2429 if (!outputs[attrib][channel]) 2430 continue; 2431 2432 Value *val; 2433 uint32_t outSlot; 2434 2435 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 2436 if (channel != VERTEX_SGV_POINT_SIZE_COMP) 2437 continue; 2438 val = LOAD(unwrap(outputs[attrib][0])); 2439 outSlot = VERTEX_SGV_SLOT; 2440 } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 2441 val = LOAD(unwrap(outputs[attrib][channel])); 2442 outSlot = VERTEX_POSITION_SLOT; 2443 } else { 2444 val = LOAD(unwrap(outputs[attrib][channel])); 2445 outSlot = VERTEX_ATTRIB_START_SLOT + attrib; 2446 if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 2447 outSlot--; 2448 } 2449 2450 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel); 2451 } 2452 } 2453 2454 if (ctx->rasterizer->clip_plane_enable || 2455 swr_vs->info.base.culldist_writemask) { 2456 unsigned clip_mask = ctx->rasterizer->clip_plane_enable; 2457 2458 unsigned cv = 0; 2459 if (swr_vs->info.base.writes_clipvertex) { 2460 cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, 2461 &swr_vs->info.base); 2462 } else { 2463 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 2464 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 2465 swr_vs->info.base.output_semantic_index[i] == 0) { 2466 cv = i; 2467 break; 2468 } 2469 } 2470 } 2471 assert(cv < PIPE_MAX_SHADER_OUTPUTS); 2472 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], ""); 2473 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], ""); 2474 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], ""); 2475 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], ""); 2476 2477 tgsi_shader_info *pLastFE = &ctx->vs->info.base; 2478 2479 if (ctx->gs) { 2480 pLastFE = &ctx->gs->info.base; 2481 } 2482 else if (ctx->tes) { 2483 pLastFE = &ctx->tes->info.base; 2484 } 2485 else if (ctx->tcs) { 2486 pLastFE = &ctx->tcs->info.base; 2487 } 2488 2489 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) { 2490 // clip distance overrides user clip planes 2491 if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) || 2492 ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) { 2493 unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE); 2494 assert(cv < PIPE_MAX_SHADER_OUTPUTS); 2495 if (val < 4) { 2496 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); 2497 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 2498 } else { 2499 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], ""); 2500 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 2501 } 2502 continue; 2503 } 2504 2505 if (!(clip_mask & (1 << val))) 2506 continue; 2507 2508 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0})); 2509 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1})); 2510 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2})); 2511 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3})); 2512#if USE_SIMD16_VS 2513 Value *bpx = VBROADCAST_16(px); 2514 Value *bpy = VBROADCAST_16(py); 2515 Value *bpz = VBROADCAST_16(pz); 2516 Value *bpw = VBROADCAST_16(pw); 2517#else 2518 Value *bpx = VBROADCAST(px); 2519 Value *bpy = VBROADCAST(py); 2520 Value *bpz = VBROADCAST(pz); 2521 Value *bpw = VBROADCAST(pw); 2522#endif 2523 Value *dist = FADD(FMUL(unwrap(cx), bpx), 2524 FADD(FMUL(unwrap(cy), bpy), 2525 FADD(FMUL(unwrap(cz), bpz), 2526 FMUL(unwrap(cw), bpw)))); 2527 2528 if (val < 4) 2529 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 2530 else 2531 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 2532 } 2533 } 2534 2535 RET_VOID(); 2536 2537 JM()->DumpToFile(pFunction, "vs_function1"); 2538 gallivm_verify_function(gallivm, wrap(pFunction)); 2539 gallivm_compile_module(gallivm); 2540 JM()->DumpToFile(pFunction, "vs_function2"); 2541 2542 // lp_debug_dump_value(func); 2543 2544 PFN_VERTEX_FUNC pFunc = 2545 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 2546 2547 JM()->DumpAsm(pFunction, "vs_function_asm"); 2548 debug_printf("vert shader %p\n", pFunc); 2549 assert(pFunc && "Error: VertShader = NULL"); 2550 2551 JM()->mIsModuleFinalized = true; 2552 2553 return pFunc; 2554} 2555 2556PFN_VERTEX_FUNC 2557swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) 2558{ 2559 if (!ctx->vs->pipe.tokens) 2560 return NULL; 2561 2562 BuilderSWR builder( 2563 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 2564 "VS"); 2565 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); 2566 2567 ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func)))); 2568 return func; 2569} 2570 2571unsigned 2572swr_so_adjust_attrib(unsigned in_attrib, 2573 swr_vertex_shader *swr_vs) 2574{ 2575 ubyte semantic_name; 2576 unsigned attrib; 2577 2578 attrib = in_attrib + VERTEX_ATTRIB_START_SLOT; 2579 2580 if (swr_vs) { 2581 semantic_name = swr_vs->info.base.output_semantic_name[in_attrib]; 2582 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2583 attrib = VERTEX_POSITION_SLOT; 2584 } else if (semantic_name == TGSI_SEMANTIC_PSIZE) { 2585 attrib = VERTEX_SGV_SLOT; 2586 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { 2587 attrib = VERTEX_SGV_SLOT; 2588 } else { 2589 if (swr_vs->info.base.writes_position) { 2590 attrib--; 2591 } 2592 } 2593 } 2594 2595 return attrib; 2596} 2597 2598static unsigned 2599locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) 2600{ 2601 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 2602 if ((info->output_semantic_name[i] == name) 2603 && (info->output_semantic_index[i] == index)) { 2604 return i; 2605 } 2606 } 2607 2608 return 0xFFFFFFFF; 2609} 2610 2611PFN_PIXEL_KERNEL 2612BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) 2613{ 2614 struct swr_fragment_shader *swr_fs = ctx->fs; 2615 2616 struct tgsi_shader_info *pPrevShader; 2617 if (ctx->gs) 2618 pPrevShader = &ctx->gs->info.base; 2619 else if (ctx->tes) 2620 pPrevShader = &ctx->tes->info.base; 2621 else 2622 pPrevShader = &ctx->vs->info.base; 2623 2624 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 2625 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 2626 2627 memset(inputs, 0, sizeof(inputs)); 2628 memset(outputs, 0, sizeof(outputs)); 2629 2630 struct lp_build_sampler_soa *sampler = NULL; 2631 2632 AttrBuilder attrBuilder; 2633 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 2634 2635 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 2636 PointerType::get(mInt8Ty, 0), 2637 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; 2638 FunctionType *funcType = 2639 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); 2640 2641 auto pFunction = Function::Create(funcType, 2642 GlobalValue::ExternalLinkage, 2643 "FS", 2644 JM()->mpCurrentModule); 2645#if LLVM_VERSION_MAJOR < 5 2646 AttributeSet attrSet = AttributeSet::get( 2647 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 2648 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 2649#else 2650 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 2651#endif 2652 2653 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 2654 IRB()->SetInsertPoint(block); 2655 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 2656 2657 auto args = pFunction->arg_begin(); 2658 Value *hPrivateData = &*args++; 2659 hPrivateData->setName("hPrivateData"); 2660 Value *pWorkerData = &*args++; 2661 pWorkerData->setName("pWorkerData"); 2662 Value *pPS = &*args++; 2663 pPS->setName("psCtx"); 2664 2665 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); 2666 consts_ptr->setName("fs_constants"); 2667 Value *const_sizes_ptr = 2668 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); 2669 const_sizes_ptr->setName("num_fs_constants"); 2670 2671 // load *pAttribs, *pPerspAttribs 2672 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); 2673 Value *pPerspAttribs = 2674 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); 2675 2676 swr_fs->constantMask = 0; 2677 swr_fs->flatConstantMask = 0; 2678 swr_fs->pointSpriteMask = 0; 2679 2680 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 2681 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; 2682 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; 2683 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; 2684 2685 if (!mask) 2686 continue; 2687 2688 // load i,j 2689 Value *vi = nullptr, *vj = nullptr; 2690 switch (interpLoc) { 2691 case TGSI_INTERPOLATE_LOC_CENTER: 2692 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); 2693 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); 2694 break; 2695 case TGSI_INTERPOLATE_LOC_CENTROID: 2696 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); 2697 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); 2698 break; 2699 case TGSI_INTERPOLATE_LOC_SAMPLE: 2700 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); 2701 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); 2702 break; 2703 } 2704 2705 // load/compute w 2706 Value *vw = nullptr, *pAttribs; 2707 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 2708 interpMode == TGSI_INTERPOLATE_COLOR) { 2709 pAttribs = pPerspAttribs; 2710 switch (interpLoc) { 2711 case TGSI_INTERPOLATE_LOC_CENTER: 2712 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); 2713 break; 2714 case TGSI_INTERPOLATE_LOC_CENTROID: 2715 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); 2716 break; 2717 case TGSI_INTERPOLATE_LOC_SAMPLE: 2718 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); 2719 break; 2720 } 2721 } else { 2722 pAttribs = pRawAttribs; 2723 vw = VIMMED1(1.f); 2724 } 2725 2726 vw->setName("w"); 2727 2728 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; 2729 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; 2730 2731 if (semantic_name == TGSI_SEMANTIC_FACE) { 2732 Value *ff = 2733 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); 2734 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); 2735 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); 2736 2737 inputs[attrib][0] = wrap(ff); 2738 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 2739 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 2740 inputs[attrib][3] = wrap(VIMMED1(1.0f)); 2741 continue; 2742 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord 2743 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == 2744 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) { 2745 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); 2746 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); 2747 } else { 2748 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX")); 2749 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY")); 2750 } 2751 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); 2752 inputs[attrib][3] = 2753 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); 2754 continue; 2755 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer 2756 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex}); 2757 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex"); 2758 inputs[attrib][0] = wrap(ff); 2759 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 2760 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 2761 inputs[attrib][3] = wrap(VIMMED1(0.0f)); 2762 continue; 2763 } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex 2764 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex}); 2765 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex"); 2766 inputs[attrib][0] = wrap(ff); 2767 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 2768 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 2769 inputs[attrib][3] = wrap(VIMMED1(0.0f)); 2770 continue; 2771 } 2772 unsigned linkedAttrib = 2773 locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1; 2774 2775 uint32_t extraAttribs = 0; 2776 if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) { 2777 /* non-gs generated primID - need to grab from swizzleMap override */ 2778 linkedAttrib = pPrevShader->num_outputs - 1; 2779 swr_fs->constantMask |= 1 << linkedAttrib; 2780 extraAttribs++; 2781 } else if (semantic_name == TGSI_SEMANTIC_GENERIC && 2782 key.sprite_coord_enable & (1 << semantic_idx)) { 2783 /* we add an extra attrib to the backendState in swr_update_derived. */ 2784 linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1; 2785 swr_fs->pointSpriteMask |= (1 << linkedAttrib); 2786 extraAttribs++; 2787 } else if (linkedAttrib + 1 == 0xFFFFFFFF) { 2788 inputs[attrib][0] = wrap(VIMMED1(0.0f)); 2789 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 2790 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 2791 inputs[attrib][3] = wrap(VIMMED1(1.0f)); 2792 /* If we're reading in color and 2-sided lighting is enabled, we have 2793 * to keep going. 2794 */ 2795 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside) 2796 continue; 2797 } else { 2798 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 2799 swr_fs->constantMask |= 1 << linkedAttrib; 2800 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 2801 swr_fs->flatConstantMask |= 1 << linkedAttrib; 2802 } 2803 } 2804 2805 unsigned bcolorAttrib = 0xFFFFFFFF; 2806 Value *offset = NULL; 2807 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { 2808 bcolorAttrib = locate_linkage( 2809 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader); 2810 /* Neither front nor back colors were available. Nothing to load. */ 2811 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) 2812 continue; 2813 /* If there is no front color, just always use the back color. */ 2814 if (linkedAttrib + 1 == 0xFFFFFFFF) 2815 linkedAttrib = bcolorAttrib; 2816 2817 if (bcolorAttrib != 0xFFFFFFFF) { 2818 bcolorAttrib -= 1; 2819 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 2820 swr_fs->constantMask |= 1 << bcolorAttrib; 2821 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 2822 swr_fs->flatConstantMask |= 1 << bcolorAttrib; 2823 } 2824 2825 unsigned diff = 12 * (bcolorAttrib - linkedAttrib); 2826 2827 if (diff) { 2828 Value *back = 2829 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); 2830 2831 offset = MUL(back, C(diff)); 2832 offset->setName("offset"); 2833 } 2834 } 2835 } 2836 2837 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 2838 if (mask & (1 << channel)) { 2839 Value *indexA = C(linkedAttrib * 12 + channel); 2840 Value *indexB = C(linkedAttrib * 12 + channel + 4); 2841 Value *indexC = C(linkedAttrib * 12 + channel + 8); 2842 2843 if (offset) { 2844 indexA = ADD(indexA, offset); 2845 indexB = ADD(indexB, offset); 2846 indexC = ADD(indexC, offset); 2847 } 2848 2849 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); 2850 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); 2851 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); 2852 2853 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 2854 inputs[attrib][channel] = wrap(va); 2855 } else { 2856 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); 2857 2858 vc = FMUL(vk, vc); 2859 2860 Value *interp = FMUL(va, vi); 2861 Value *interp1 = FMUL(vb, vj); 2862 interp = FADD(interp, interp1); 2863 interp = FADD(interp, vc); 2864 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 2865 interpMode == TGSI_INTERPOLATE_COLOR) 2866 interp = FMUL(interp, vw); 2867 inputs[attrib][channel] = wrap(interp); 2868 } 2869 } 2870 } 2871 } 2872 2873 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); 2874 assert(sampler != nullptr); 2875 2876 struct lp_bld_tgsi_system_values system_values; 2877 memset(&system_values, 0, sizeof(system_values)); 2878 2879 struct lp_build_mask_context mask; 2880 bool uses_mask = false; 2881 2882 if (swr_fs->info.base.uses_kill || 2883 key.poly_stipple_enable) { 2884 Value *vActiveMask = NULL; 2885 if (swr_fs->info.base.uses_kill) { 2886 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); 2887 } 2888 if (key.poly_stipple_enable) { 2889 // first get fragment xy coords and clip to stipple bounds 2890 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}); 2891 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}); 2892 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty); 2893 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty); 2894 2895 // stipple pattern is 32x32, which means that one line of stipple 2896 // is stored in one word: 2897 // vXstipple is bit offset inside 32-bit stipple word 2898 // vYstipple is word index is stipple array 2899 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1) 2900 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1) 2901 2902 // grab stipple pattern base address 2903 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0}); 2904 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy); 2905 2906 // peform a gather to grab stipple words for each lane 2907 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple, 2908 VIMMED1(0xffffffff), 4); 2909 2910 // create a mask with one bit corresponding to the x stipple 2911 // and AND it with the pattern, to see if we have a bit 2912 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple); 2913 Value *vStippleMask = AND(vStipple, vBitMask); 2914 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0)); 2915 vStippleMask = VMASK(vStippleMask); 2916 2917 if (swr_fs->info.base.uses_kill) { 2918 vActiveMask = AND(vActiveMask, vStippleMask); 2919 } else { 2920 vActiveMask = vStippleMask; 2921 } 2922 } 2923 lp_build_mask_begin( 2924 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask)); 2925 uses_mask = true; 2926 } 2927 2928 struct lp_build_tgsi_params params; 2929 memset(¶ms, 0, sizeof(params)); 2930 params.type = lp_type_float_vec(32, 32 * 8); 2931 params.mask = uses_mask ? &mask : NULL; 2932 params.consts_ptr = wrap(consts_ptr); 2933 params.const_sizes_ptr = wrap(const_sizes_ptr); 2934 params.system_values = &system_values; 2935 params.inputs = inputs; 2936 params.context_ptr = wrap(hPrivateData); 2937 params.sampler = sampler; 2938 params.info = &swr_fs->info.base; 2939 2940 lp_build_tgsi_soa(gallivm, 2941 swr_fs->pipe.tokens, 2942 ¶ms, 2943 outputs); 2944 2945 sampler->destroy(sampler); 2946 2947 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 2948 2949 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; 2950 attrib++) { 2951 switch (swr_fs->info.base.output_semantic_name[attrib]) { 2952 case TGSI_SEMANTIC_POSITION: { 2953 // write z 2954 LLVMValueRef outZ = 2955 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); 2956 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); 2957 break; 2958 } 2959 case TGSI_SEMANTIC_COLOR: { 2960 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 2961 if (!outputs[attrib][channel]) 2962 continue; 2963 2964 LLVMValueRef out = 2965 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); 2966 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 2967 swr_fs->info.base.output_semantic_index[attrib] == 0) { 2968 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { 2969 STORE(unwrap(out), 2970 pPS, 2971 {0, SWR_PS_CONTEXT_shaded, rt, channel}); 2972 } 2973 } else { 2974 STORE(unwrap(out), 2975 pPS, 2976 {0, 2977 SWR_PS_CONTEXT_shaded, 2978 swr_fs->info.base.output_semantic_index[attrib], 2979 channel}); 2980 } 2981 } 2982 break; 2983 } 2984 default: { 2985 fprintf(stderr, 2986 "unknown output from FS %s[%d]\n", 2987 tgsi_semantic_names[swr_fs->info.base 2988 .output_semantic_name[attrib]], 2989 swr_fs->info.base.output_semantic_index[attrib]); 2990 break; 2991 } 2992 } 2993 } 2994 2995 LLVMValueRef mask_result = 0; 2996 if (uses_mask) { 2997 mask_result = lp_build_mask_end(&mask); 2998 } 2999 3000 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 3001 3002 if (uses_mask) { 3003 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); 3004 } 3005 3006 RET_VOID(); 3007 3008 gallivm_verify_function(gallivm, wrap(pFunction)); 3009 3010 gallivm_compile_module(gallivm); 3011 3012 // after the gallivm passes, we have to lower the core's intrinsics 3013 llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule); 3014 lowerPass.add(createLowerX86Pass(this)); 3015 lowerPass.run(*pFunction); 3016 3017 PFN_PIXEL_KERNEL kernel = 3018 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); 3019 debug_printf("frag shader %p\n", kernel); 3020 assert(kernel && "Error: FragShader = NULL"); 3021 3022 JM()->mIsModuleFinalized = true; 3023 3024 return kernel; 3025} 3026 3027PFN_PIXEL_KERNEL 3028swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) 3029{ 3030 if (!ctx->fs->pipe.tokens) 3031 return NULL; 3032 3033 BuilderSWR builder( 3034 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 3035 "FS"); 3036 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key); 3037 3038 ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func)))); 3039 return func; 3040} 3041