101e04c3fSmrg/**************************************************************************** 201e04c3fSmrg * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg ***************************************************************************/ 2301e04c3fSmrg 247ec681f3Smrg#include <llvm/Config/llvm-config.h> 257ec681f3Smrg 267ec681f3Smrg#if LLVM_VERSION_MAJOR < 7 2701e04c3fSmrg// llvm redefines DEBUG 2801e04c3fSmrg#pragma push_macro("DEBUG") 2901e04c3fSmrg#undef DEBUG 307ec681f3Smrg#endif 317ec681f3Smrg 3201e04c3fSmrg#include "JitManager.h" 3301e04c3fSmrg#include "llvm-c/Core.h" 3401e04c3fSmrg#include "llvm/Support/CBindingWrapping.h" 3501e04c3fSmrg#include "llvm/IR/LegacyPassManager.h" 367ec681f3Smrg 377ec681f3Smrg#if LLVM_VERSION_MAJOR < 7 3801e04c3fSmrg#pragma pop_macro("DEBUG") 397ec681f3Smrg#endif 4001e04c3fSmrg 4101e04c3fSmrg#include "state.h" 4201e04c3fSmrg#include "gen_state_llvm.h" 4301e04c3fSmrg#include "builder.h" 4401e04c3fSmrg#include "functionpasses/passes.h" 4501e04c3fSmrg 4601e04c3fSmrg#include "tgsi/tgsi_strings.h" 477ec681f3Smrg#include "util/format/u_format.h" 4801e04c3fSmrg#include "util/u_prim.h" 4901e04c3fSmrg#include "gallivm/lp_bld_init.h" 5001e04c3fSmrg#include "gallivm/lp_bld_flow.h" 5101e04c3fSmrg#include "gallivm/lp_bld_struct.h" 5201e04c3fSmrg#include "gallivm/lp_bld_tgsi.h" 537ec681f3Smrg#include "gallivm/lp_bld_const.h" 547ec681f3Smrg#include "gallivm/lp_bld_printf.h" 557ec681f3Smrg#include "gallivm/lp_bld_logic.h" 5601e04c3fSmrg 5701e04c3fSmrg#include "swr_context.h" 587ec681f3Smrg#include "gen_surf_state_llvm.h" 5901e04c3fSmrg#include "gen_swr_context_llvm.h" 6001e04c3fSmrg#include "swr_resource.h" 6101e04c3fSmrg#include "swr_state.h" 6201e04c3fSmrg#include "swr_screen.h" 6301e04c3fSmrg 647ec681f3Smrg 657ec681f3Smrg///////////////////////////////////////////////////////////////////////// 667ec681f3Smrg 677ec681f3Smrg#include <stdio.h> 687ec681f3Smrg#include <inttypes.h> 697ec681f3Smrg 707ec681f3Smrg#include "util/u_debug.h" 717ec681f3Smrg#include "util/u_memory.h" 727ec681f3Smrg#include "util/u_string.h" 737ec681f3Smrg 747ec681f3Smrg#include "gallivm/lp_bld_type.h" 757ec681f3Smrg 767ec681f3Smrg#if defined(DEBUG) && defined(SWR_VERBOSE_SHADER) 777ec681f3Smrgconstexpr bool verbose_shader = true; 787ec681f3Smrgconstexpr bool verbose_tcs_shader_in = true; 797ec681f3Smrgconstexpr bool verbose_tcs_shader_out = true; 807ec681f3Smrgconstexpr bool verbose_tcs_shader_loop = true; 817ec681f3Smrgconstexpr bool verbose_vs_shader = true; 827ec681f3Smrg#else 837ec681f3Smrgconstexpr bool verbose_shader = false; 847ec681f3Smrgconstexpr bool verbose_tcs_shader_in = false; 857ec681f3Smrgconstexpr bool verbose_tcs_shader_out = false; 867ec681f3Smrgconstexpr bool verbose_tcs_shader_loop = false; 877ec681f3Smrgconstexpr bool verbose_vs_shader = false; 887ec681f3Smrg#endif 897ec681f3Smrg 9001e04c3fSmrgusing namespace SwrJit; 9101e04c3fSmrg 9201e04c3fSmrgstatic unsigned 9301e04c3fSmrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); 9401e04c3fSmrg 9501e04c3fSmrgbool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) 9601e04c3fSmrg{ 9701e04c3fSmrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 9801e04c3fSmrg} 9901e04c3fSmrg 10001e04c3fSmrgbool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) 10101e04c3fSmrg{ 10201e04c3fSmrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 10301e04c3fSmrg} 10401e04c3fSmrg 10501e04c3fSmrgbool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs) 10601e04c3fSmrg{ 10701e04c3fSmrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 10801e04c3fSmrg} 10901e04c3fSmrg 11001e04c3fSmrgbool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs) 11101e04c3fSmrg{ 11201e04c3fSmrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 11301e04c3fSmrg} 11401e04c3fSmrg 1157ec681f3Smrgbool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs) 1167ec681f3Smrg{ 1177ec681f3Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 1187ec681f3Smrg} 1197ec681f3Smrg 1207ec681f3Smrgbool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs) 1217ec681f3Smrg{ 1227ec681f3Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 1237ec681f3Smrg} 1247ec681f3Smrg 1257ec681f3Smrg 12601e04c3fSmrgstatic void 12701e04c3fSmrgswr_generate_sampler_key(const struct lp_tgsi_info &info, 12801e04c3fSmrg struct swr_context *ctx, 12901e04c3fSmrg enum pipe_shader_type shader_type, 13001e04c3fSmrg struct swr_jit_sampler_key &key) 13101e04c3fSmrg{ 13201e04c3fSmrg key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; 13301e04c3fSmrg 13401e04c3fSmrg for (unsigned i = 0; i < key.nr_samplers; i++) { 13501e04c3fSmrg if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 13601e04c3fSmrg lp_sampler_static_sampler_state( 13701e04c3fSmrg &key.sampler[i].sampler_state, 13801e04c3fSmrg ctx->samplers[shader_type][i]); 13901e04c3fSmrg } 14001e04c3fSmrg } 14101e04c3fSmrg 14201e04c3fSmrg /* 14301e04c3fSmrg * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 14401e04c3fSmrg * are dx10-style? Can't really have mixed opcodes, at least not 14501e04c3fSmrg * if we want to skip the holes here (without rescanning tgsi). 14601e04c3fSmrg */ 14701e04c3fSmrg if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 14801e04c3fSmrg key.nr_sampler_views = 14901e04c3fSmrg info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 15001e04c3fSmrg for (unsigned i = 0; i < key.nr_sampler_views; i++) { 15101e04c3fSmrg if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { 15201e04c3fSmrg const struct pipe_sampler_view *view = 15301e04c3fSmrg ctx->sampler_views[shader_type][i]; 15401e04c3fSmrg lp_sampler_static_texture_state( 15501e04c3fSmrg &key.sampler[i].texture_state, view); 15601e04c3fSmrg if (view) { 15701e04c3fSmrg struct swr_resource *swr_res = swr_resource(view->texture); 15801e04c3fSmrg const struct util_format_description *desc = 15901e04c3fSmrg util_format_description(view->format); 16001e04c3fSmrg if (swr_res->has_depth && swr_res->has_stencil && 16101e04c3fSmrg !util_format_has_depth(desc)) 16201e04c3fSmrg key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 16301e04c3fSmrg } 16401e04c3fSmrg } 16501e04c3fSmrg } 16601e04c3fSmrg } else { 16701e04c3fSmrg key.nr_sampler_views = key.nr_samplers; 16801e04c3fSmrg for (unsigned i = 0; i < key.nr_sampler_views; i++) { 16901e04c3fSmrg if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 17001e04c3fSmrg const struct pipe_sampler_view *view = 17101e04c3fSmrg ctx->sampler_views[shader_type][i]; 17201e04c3fSmrg lp_sampler_static_texture_state( 17301e04c3fSmrg &key.sampler[i].texture_state, view); 17401e04c3fSmrg if (view) { 17501e04c3fSmrg struct swr_resource *swr_res = swr_resource(view->texture); 17601e04c3fSmrg const struct util_format_description *desc = 17701e04c3fSmrg util_format_description(view->format); 17801e04c3fSmrg if (swr_res->has_depth && swr_res->has_stencil && 17901e04c3fSmrg !util_format_has_depth(desc)) 18001e04c3fSmrg key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 18101e04c3fSmrg } 18201e04c3fSmrg } 18301e04c3fSmrg } 18401e04c3fSmrg } 18501e04c3fSmrg} 18601e04c3fSmrg 18701e04c3fSmrgvoid 18801e04c3fSmrgswr_generate_fs_key(struct swr_jit_fs_key &key, 18901e04c3fSmrg struct swr_context *ctx, 19001e04c3fSmrg swr_fragment_shader *swr_fs) 19101e04c3fSmrg{ 1927ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 19301e04c3fSmrg 19401e04c3fSmrg key.nr_cbufs = ctx->framebuffer.nr_cbufs; 19501e04c3fSmrg key.light_twoside = ctx->rasterizer->light_twoside; 19601e04c3fSmrg key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; 19701e04c3fSmrg 19801e04c3fSmrg struct tgsi_shader_info *pPrevShader; 19901e04c3fSmrg if (ctx->gs) 20001e04c3fSmrg pPrevShader = &ctx->gs->info.base; 2017ec681f3Smrg else if (ctx->tes) 2027ec681f3Smrg pPrevShader = &ctx->tes->info.base; 20301e04c3fSmrg else 20401e04c3fSmrg pPrevShader = &ctx->vs->info.base; 20501e04c3fSmrg 20601e04c3fSmrg memcpy(&key.vs_output_semantic_name, 20701e04c3fSmrg &pPrevShader->output_semantic_name, 20801e04c3fSmrg sizeof(key.vs_output_semantic_name)); 20901e04c3fSmrg memcpy(&key.vs_output_semantic_idx, 21001e04c3fSmrg &pPrevShader->output_semantic_index, 21101e04c3fSmrg sizeof(key.vs_output_semantic_idx)); 21201e04c3fSmrg 21301e04c3fSmrg swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); 21401e04c3fSmrg 21501e04c3fSmrg key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable && 21601e04c3fSmrg ctx->poly_stipple.prim_is_poly; 21701e04c3fSmrg} 21801e04c3fSmrg 21901e04c3fSmrgvoid 22001e04c3fSmrgswr_generate_vs_key(struct swr_jit_vs_key &key, 22101e04c3fSmrg struct swr_context *ctx, 22201e04c3fSmrg swr_vertex_shader *swr_vs) 22301e04c3fSmrg{ 2247ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 22501e04c3fSmrg 22601e04c3fSmrg key.clip_plane_mask = 22701e04c3fSmrg swr_vs->info.base.clipdist_writemask ? 22801e04c3fSmrg swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 22901e04c3fSmrg ctx->rasterizer->clip_plane_enable; 23001e04c3fSmrg 23101e04c3fSmrg swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); 23201e04c3fSmrg} 23301e04c3fSmrg 23401e04c3fSmrgvoid 23501e04c3fSmrgswr_generate_fetch_key(struct swr_jit_fetch_key &key, 23601e04c3fSmrg struct swr_vertex_element_state *velems) 23701e04c3fSmrg{ 2387ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 23901e04c3fSmrg 24001e04c3fSmrg key.fsState = velems->fsState; 24101e04c3fSmrg} 24201e04c3fSmrg 24301e04c3fSmrgvoid 24401e04c3fSmrgswr_generate_gs_key(struct swr_jit_gs_key &key, 24501e04c3fSmrg struct swr_context *ctx, 24601e04c3fSmrg swr_geometry_shader *swr_gs) 24701e04c3fSmrg{ 2487ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 24901e04c3fSmrg 2507ec681f3Smrg struct tgsi_shader_info *pPrevShader = nullptr; 2517ec681f3Smrg 2527ec681f3Smrg if (ctx->tes) { 2537ec681f3Smrg pPrevShader = &ctx->tes->info.base; 2547ec681f3Smrg } else { 2557ec681f3Smrg pPrevShader = &ctx->vs->info.base; 2567ec681f3Smrg } 25701e04c3fSmrg 25801e04c3fSmrg memcpy(&key.vs_output_semantic_name, 25901e04c3fSmrg &pPrevShader->output_semantic_name, 26001e04c3fSmrg sizeof(key.vs_output_semantic_name)); 26101e04c3fSmrg memcpy(&key.vs_output_semantic_idx, 26201e04c3fSmrg &pPrevShader->output_semantic_index, 26301e04c3fSmrg sizeof(key.vs_output_semantic_idx)); 26401e04c3fSmrg 26501e04c3fSmrg swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key); 26601e04c3fSmrg} 26701e04c3fSmrg 2687ec681f3Smrgvoid 2697ec681f3Smrgswr_generate_tcs_key(struct swr_jit_tcs_key &key, 2707ec681f3Smrg struct swr_context *ctx, 2717ec681f3Smrg swr_tess_control_shader *swr_tcs) 2727ec681f3Smrg{ 2737ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 2747ec681f3Smrg 2757ec681f3Smrg struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base; 2767ec681f3Smrg 2777ec681f3Smrg memcpy(&key.vs_output_semantic_name, 2787ec681f3Smrg &pPrevShader->output_semantic_name, 2797ec681f3Smrg sizeof(key.vs_output_semantic_name)); 2807ec681f3Smrg memcpy(&key.vs_output_semantic_idx, 2817ec681f3Smrg &pPrevShader->output_semantic_index, 2827ec681f3Smrg sizeof(key.vs_output_semantic_idx)); 2837ec681f3Smrg 2847ec681f3Smrg key.clip_plane_mask = 2857ec681f3Smrg swr_tcs->info.base.clipdist_writemask ? 2867ec681f3Smrg swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 2877ec681f3Smrg ctx->rasterizer->clip_plane_enable; 2887ec681f3Smrg 2897ec681f3Smrg swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key); 2907ec681f3Smrg} 2917ec681f3Smrg 2927ec681f3Smrgvoid 2937ec681f3Smrgswr_generate_tes_key(struct swr_jit_tes_key &key, 2947ec681f3Smrg struct swr_context *ctx, 2957ec681f3Smrg swr_tess_evaluation_shader *swr_tes) 2967ec681f3Smrg{ 2977ec681f3Smrg memset((void*)&key, 0, sizeof(key)); 2987ec681f3Smrg 2997ec681f3Smrg struct tgsi_shader_info *pPrevShader = nullptr; 3007ec681f3Smrg 3017ec681f3Smrg if (ctx->tcs) { 3027ec681f3Smrg pPrevShader = &ctx->tcs->info.base; 3037ec681f3Smrg } 3047ec681f3Smrg else { 3057ec681f3Smrg pPrevShader = &ctx->vs->info.base; 3067ec681f3Smrg } 3077ec681f3Smrg 3087ec681f3Smrg SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined"); 3097ec681f3Smrg 3107ec681f3Smrg memcpy(&key.prev_output_semantic_name, 3117ec681f3Smrg &pPrevShader->output_semantic_name, 3127ec681f3Smrg sizeof(key.prev_output_semantic_name)); 3137ec681f3Smrg memcpy(&key.prev_output_semantic_idx, 3147ec681f3Smrg &pPrevShader->output_semantic_index, 3157ec681f3Smrg sizeof(key.prev_output_semantic_idx)); 3167ec681f3Smrg 3177ec681f3Smrg key.clip_plane_mask = 3187ec681f3Smrg swr_tes->info.base.clipdist_writemask ? 3197ec681f3Smrg swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 3207ec681f3Smrg ctx->rasterizer->clip_plane_enable; 3217ec681f3Smrg 3227ec681f3Smrg swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key); 3237ec681f3Smrg} 3247ec681f3Smrg 32501e04c3fSmrgstruct BuilderSWR : public Builder { 32601e04c3fSmrg BuilderSWR(JitManager *pJitMgr, const char *pName) 32701e04c3fSmrg : Builder(pJitMgr) 32801e04c3fSmrg { 32901e04c3fSmrg pJitMgr->SetupNewModule(); 3307ec681f3Smrg gallivm = gallivm_create(pName, wrap(&JM()->mContext), NULL); 33101e04c3fSmrg pJitMgr->mpCurrentModule = unwrap(gallivm->module); 33201e04c3fSmrg } 33301e04c3fSmrg 33401e04c3fSmrg ~BuilderSWR() { 33501e04c3fSmrg gallivm_free_ir(gallivm); 33601e04c3fSmrg } 33701e04c3fSmrg 33801e04c3fSmrg void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, 33901e04c3fSmrg unsigned slot, unsigned channel); 34001e04c3fSmrg 34101e04c3fSmrg struct gallivm_state *gallivm; 34201e04c3fSmrg PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); 34301e04c3fSmrg PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); 34401e04c3fSmrg PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key); 3457ec681f3Smrg PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key); 3467ec681f3Smrg PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key); 34701e04c3fSmrg 3487ec681f3Smrg // GS-specific emit functions 34901e04c3fSmrg LLVMValueRef 3507ec681f3Smrg swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 3517ec681f3Smrg struct lp_build_context * bld, 35201e04c3fSmrg boolean is_vindex_indirect, 35301e04c3fSmrg LLVMValueRef vertex_index, 35401e04c3fSmrg boolean is_aindex_indirect, 35501e04c3fSmrg LLVMValueRef attrib_index, 35601e04c3fSmrg LLVMValueRef swizzle_index); 35701e04c3fSmrg void 3587ec681f3Smrg swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 3597ec681f3Smrg struct lp_build_context * bld, 36001e04c3fSmrg LLVMValueRef (*outputs)[4], 3617ec681f3Smrg LLVMValueRef emitted_vertices_vec, 3627ec681f3Smrg LLVMValueRef stream_id); 36301e04c3fSmrg 36401e04c3fSmrg void 3657ec681f3Smrg swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 3667ec681f3Smrg struct lp_build_context * bld, 3677ec681f3Smrg LLVMValueRef total_emitted_vertices_vec_ptr, 36801e04c3fSmrg LLVMValueRef verts_per_prim_vec, 3697ec681f3Smrg LLVMValueRef emitted_prims_vec, 3707ec681f3Smrg LLVMValueRef mask_vec); 37101e04c3fSmrg 37201e04c3fSmrg void 3737ec681f3Smrg swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 37401e04c3fSmrg LLVMValueRef total_emitted_vertices_vec, 3757ec681f3Smrg LLVMValueRef emitted_prims_vec, unsigned stream); 3767ec681f3Smrg 3777ec681f3Smrg // TCS-specific emit functions 3787ec681f3Smrg void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld); 3797ec681f3Smrg void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld); 3807ec681f3Smrg 3817ec681f3Smrg LLVMValueRef 3827ec681f3Smrg swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 3837ec681f3Smrg struct lp_build_tgsi_context * bld_base, 3847ec681f3Smrg boolean is_vindex_indirect, 3857ec681f3Smrg LLVMValueRef vertex_index, 3867ec681f3Smrg boolean is_aindex_indirect, 3877ec681f3Smrg LLVMValueRef attrib_index, 3887ec681f3Smrg LLVMValueRef swizzle_index); 3897ec681f3Smrg 3907ec681f3Smrg LLVMValueRef 3917ec681f3Smrg swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 3927ec681f3Smrg struct lp_build_tgsi_context * bld_base, 3937ec681f3Smrg boolean is_vindex_indirect, 3947ec681f3Smrg LLVMValueRef vertex_index, 3957ec681f3Smrg boolean is_aindex_indirect, 3967ec681f3Smrg LLVMValueRef attrib_index, 3977ec681f3Smrg LLVMValueRef swizzle_index, 3987ec681f3Smrg uint32_t name); 3997ec681f3Smrg 4007ec681f3Smrg void 4017ec681f3Smrg swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 4027ec681f3Smrg struct lp_build_tgsi_context * bld_base, 4037ec681f3Smrg unsigned name, 4047ec681f3Smrg boolean is_vindex_indirect, 4057ec681f3Smrg LLVMValueRef vertex_index, 4067ec681f3Smrg boolean is_aindex_indirect, 4077ec681f3Smrg LLVMValueRef attrib_index, 4087ec681f3Smrg LLVMValueRef swizzle_index, 4097ec681f3Smrg LLVMValueRef value, 4107ec681f3Smrg LLVMValueRef mask_vec); 4117ec681f3Smrg 4127ec681f3Smrg // Barrier implementation (available only in TCS) 4137ec681f3Smrg void 4147ec681f3Smrg swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface, 4157ec681f3Smrg struct lp_build_tgsi_context *bld_base); 4167ec681f3Smrg 4177ec681f3Smrg // TES-specific emit functions 4187ec681f3Smrg LLVMValueRef 4197ec681f3Smrg swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 4207ec681f3Smrg struct lp_build_tgsi_context * bld_base, 4217ec681f3Smrg boolean is_vindex_indirect, 4227ec681f3Smrg LLVMValueRef vertex_index, 4237ec681f3Smrg boolean is_aindex_indirect, 4247ec681f3Smrg LLVMValueRef attrib_index, 4257ec681f3Smrg LLVMValueRef swizzle_index); 42601e04c3fSmrg 4277ec681f3Smrg LLVMValueRef 4287ec681f3Smrg swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 4297ec681f3Smrg struct lp_build_tgsi_context * bld_base, 4307ec681f3Smrg boolean is_aindex_indirect, 4317ec681f3Smrg LLVMValueRef attrib_index, 4327ec681f3Smrg LLVMValueRef swizzle_index); 43301e04c3fSmrg}; 43401e04c3fSmrg 43501e04c3fSmrgstruct swr_gs_llvm_iface { 4367ec681f3Smrg struct lp_build_gs_iface base; 43701e04c3fSmrg struct tgsi_shader_info *info; 43801e04c3fSmrg 43901e04c3fSmrg BuilderSWR *pBuilder; 44001e04c3fSmrg 44101e04c3fSmrg Value *pGsCtx; 44201e04c3fSmrg SWR_GS_STATE *pGsState; 44301e04c3fSmrg uint32_t num_outputs; 44401e04c3fSmrg uint32_t num_verts_per_prim; 44501e04c3fSmrg 44601e04c3fSmrg Value *pVtxAttribMap; 44701e04c3fSmrg}; 44801e04c3fSmrg 4497ec681f3Smrgstruct swr_tcs_llvm_iface { 4507ec681f3Smrg struct lp_build_tcs_iface base; 4517ec681f3Smrg struct tgsi_shader_info *info; 4527ec681f3Smrg 4537ec681f3Smrg BuilderSWR *pBuilder; 4547ec681f3Smrg 4557ec681f3Smrg Value *pTcsCtx; 4567ec681f3Smrg SWR_TS_STATE *pTsState; 4577ec681f3Smrg 4587ec681f3Smrg uint32_t output_vertices; 4597ec681f3Smrg 4607ec681f3Smrg LLVMValueRef loop_var; 4617ec681f3Smrg 4627ec681f3Smrg Value *pVtxAttribMap; 4637ec681f3Smrg Value *pVtxOutputAttribMap; 4647ec681f3Smrg Value *pPatchOutputAttribMap; 4657ec681f3Smrg}; 4667ec681f3Smrg 4677ec681f3Smrgstruct swr_tes_llvm_iface { 4687ec681f3Smrg struct lp_build_tes_iface base; 4697ec681f3Smrg struct tgsi_shader_info *info; 4707ec681f3Smrg 4717ec681f3Smrg BuilderSWR *pBuilder; 4727ec681f3Smrg 4737ec681f3Smrg Value *pTesCtx; 4747ec681f3Smrg SWR_TS_STATE *pTsState; 4757ec681f3Smrg 4767ec681f3Smrg uint32_t num_outputs; 4777ec681f3Smrg 4787ec681f3Smrg Value *pVtxAttribMap; 4797ec681f3Smrg Value *pPatchAttribMap; 4807ec681f3Smrg}; 4817ec681f3Smrg 48201e04c3fSmrg// trampoline functions so we can use the builder llvm construction methods 48301e04c3fSmrgstatic LLVMValueRef 4847ec681f3Smrgswr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 4857ec681f3Smrg struct lp_build_context * bld, 48601e04c3fSmrg boolean is_vindex_indirect, 48701e04c3fSmrg LLVMValueRef vertex_index, 48801e04c3fSmrg boolean is_aindex_indirect, 48901e04c3fSmrg LLVMValueRef attrib_index, 49001e04c3fSmrg LLVMValueRef swizzle_index) 49101e04c3fSmrg{ 49201e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 49301e04c3fSmrg 4947ec681f3Smrg return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld, 49501e04c3fSmrg is_vindex_indirect, 49601e04c3fSmrg vertex_index, 49701e04c3fSmrg is_aindex_indirect, 49801e04c3fSmrg attrib_index, 49901e04c3fSmrg swizzle_index); 50001e04c3fSmrg} 50101e04c3fSmrg 50201e04c3fSmrgstatic void 5037ec681f3Smrgswr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 5047ec681f3Smrg struct lp_build_context * bld, 50501e04c3fSmrg LLVMValueRef (*outputs)[4], 5067ec681f3Smrg LLVMValueRef emitted_vertices_vec, 5077ec681f3Smrg LLVMValueRef mask_vec, 5087ec681f3Smrg LLVMValueRef stream_id) 50901e04c3fSmrg{ 51001e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 51101e04c3fSmrg 5127ec681f3Smrg iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld, 51301e04c3fSmrg outputs, 5147ec681f3Smrg emitted_vertices_vec, 5157ec681f3Smrg stream_id); 51601e04c3fSmrg} 51701e04c3fSmrg 51801e04c3fSmrgstatic void 5197ec681f3Smrgswr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 5207ec681f3Smrg struct lp_build_context * bld, 5217ec681f3Smrg LLVMValueRef total_emitted_vertices_vec_ptr, 52201e04c3fSmrg LLVMValueRef verts_per_prim_vec, 5237ec681f3Smrg LLVMValueRef emitted_prims_vec, 5247ec681f3Smrg LLVMValueRef mask_vec, unsigned stream_id) 52501e04c3fSmrg{ 52601e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 52701e04c3fSmrg 5287ec681f3Smrg iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld, 5297ec681f3Smrg total_emitted_vertices_vec_ptr, 53001e04c3fSmrg verts_per_prim_vec, 5317ec681f3Smrg emitted_prims_vec, 5327ec681f3Smrg mask_vec); 53301e04c3fSmrg} 53401e04c3fSmrg 53501e04c3fSmrgstatic void 5367ec681f3Smrgswr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 53701e04c3fSmrg LLVMValueRef total_emitted_vertices_vec, 5387ec681f3Smrg LLVMValueRef emitted_prims_vec, unsigned stream) 53901e04c3fSmrg{ 54001e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 54101e04c3fSmrg 5427ec681f3Smrg iface->pBuilder->swr_gs_llvm_epilogue(gs_base, 54301e04c3fSmrg total_emitted_vertices_vec, 5447ec681f3Smrg emitted_prims_vec, stream); 5457ec681f3Smrg} 5467ec681f3Smrg 5477ec681f3Smrgstatic LLVMValueRef 5487ec681f3Smrgswr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 5497ec681f3Smrg struct lp_build_context * bld, 5507ec681f3Smrg boolean is_vindex_indirect, 5517ec681f3Smrg LLVMValueRef vertex_index, 5527ec681f3Smrg boolean is_aindex_indirect, 5537ec681f3Smrg LLVMValueRef attrib_index, 5547ec681f3Smrg boolean is_sindex_indirect, 5557ec681f3Smrg LLVMValueRef swizzle_index) 5567ec681f3Smrg{ 5577ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 5587ec681f3Smrg struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 5597ec681f3Smrg 5607ec681f3Smrg return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base, 5617ec681f3Smrg is_vindex_indirect, 5627ec681f3Smrg vertex_index, 5637ec681f3Smrg is_aindex_indirect, 5647ec681f3Smrg attrib_index, 5657ec681f3Smrg swizzle_index); 5667ec681f3Smrg} 5677ec681f3Smrg 5687ec681f3Smrgstatic LLVMValueRef 5697ec681f3Smrgswr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 5707ec681f3Smrg struct lp_build_context * bld, 5717ec681f3Smrg boolean is_vindex_indirect, 5727ec681f3Smrg LLVMValueRef vertex_index, 5737ec681f3Smrg boolean is_aindex_indirect, 5747ec681f3Smrg LLVMValueRef attrib_index, 5757ec681f3Smrg boolean is_sindex_indirect, 5767ec681f3Smrg LLVMValueRef swizzle_index, 5777ec681f3Smrg uint32_t name) 5787ec681f3Smrg{ 5797ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 5807ec681f3Smrg struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 5817ec681f3Smrg 5827ec681f3Smrg return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base, 5837ec681f3Smrg is_vindex_indirect, 5847ec681f3Smrg vertex_index, 5857ec681f3Smrg is_aindex_indirect, 5867ec681f3Smrg attrib_index, 5877ec681f3Smrg swizzle_index, 5887ec681f3Smrg name); 5897ec681f3Smrg} 5907ec681f3Smrg 5917ec681f3Smrg 5927ec681f3Smrgstatic void 5937ec681f3Smrgswr_tcs_llvm_emit_prologue(struct lp_build_context* bld) 5947ec681f3Smrg{ 5957ec681f3Smrg lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 5967ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 5977ec681f3Smrg iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base); 5987ec681f3Smrg} 5997ec681f3Smrg 6007ec681f3Smrgstatic void 6017ec681f3Smrgswr_tcs_llvm_emit_epilogue(struct lp_build_context* bld) 6027ec681f3Smrg{ 6037ec681f3Smrg lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 6047ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 6057ec681f3Smrg iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base); 6067ec681f3Smrg} 6077ec681f3Smrg 6087ec681f3Smrgstatic 6097ec681f3Smrgvoid swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 6107ec681f3Smrg struct lp_build_context * bld, 6117ec681f3Smrg unsigned name, 6127ec681f3Smrg boolean is_vindex_indirect, 6137ec681f3Smrg LLVMValueRef vertex_index, 6147ec681f3Smrg boolean is_aindex_indirect, 6157ec681f3Smrg LLVMValueRef attrib_index, 6167ec681f3Smrg boolean is_sindex_indirect, 6177ec681f3Smrg LLVMValueRef swizzle_index, 6187ec681f3Smrg LLVMValueRef value, 6197ec681f3Smrg LLVMValueRef mask_vec) 6207ec681f3Smrg{ 6217ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 6227ec681f3Smrg struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 6237ec681f3Smrg 6247ec681f3Smrg iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface, 6257ec681f3Smrg bld_base, 6267ec681f3Smrg name, 6277ec681f3Smrg is_vindex_indirect, 6287ec681f3Smrg vertex_index, 6297ec681f3Smrg is_aindex_indirect, 6307ec681f3Smrg attrib_index, 6317ec681f3Smrg swizzle_index, 6327ec681f3Smrg value, 6337ec681f3Smrg mask_vec); 6347ec681f3Smrg} 6357ec681f3Smrg 6367ec681f3Smrg 6377ec681f3Smrgstatic 6387ec681f3Smrgvoid swr_tcs_llvm_emit_barrier(struct lp_build_context *bld) 6397ec681f3Smrg{ 6407ec681f3Smrg lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld; 6417ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface; 6427ec681f3Smrg 6437ec681f3Smrg iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base); 6447ec681f3Smrg} 6457ec681f3Smrg 6467ec681f3Smrg 6477ec681f3Smrgstatic LLVMValueRef 6487ec681f3Smrgswr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 6497ec681f3Smrg struct lp_build_context * bld, 6507ec681f3Smrg boolean is_vindex_indirect, 6517ec681f3Smrg LLVMValueRef vertex_index, 6527ec681f3Smrg boolean is_aindex_indirect, 6537ec681f3Smrg LLVMValueRef attrib_index, 6547ec681f3Smrg boolean is_sindex_indirect, 6557ec681f3Smrg LLVMValueRef swizzle_index) 6567ec681f3Smrg{ 6577ec681f3Smrg swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 6587ec681f3Smrg struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 6597ec681f3Smrg 6607ec681f3Smrg return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base, 6617ec681f3Smrg is_vindex_indirect, 6627ec681f3Smrg vertex_index, 6637ec681f3Smrg is_aindex_indirect, 6647ec681f3Smrg attrib_index, 6657ec681f3Smrg swizzle_index); 6667ec681f3Smrg} 6677ec681f3Smrg 6687ec681f3Smrgstatic LLVMValueRef 6697ec681f3Smrgswr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 6707ec681f3Smrg struct lp_build_context * bld, 6717ec681f3Smrg boolean is_aindex_indirect, 6727ec681f3Smrg LLVMValueRef attrib_index, 6737ec681f3Smrg LLVMValueRef swizzle_index) 6747ec681f3Smrg{ 6757ec681f3Smrg swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 6767ec681f3Smrg struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld; 6777ec681f3Smrg 6787ec681f3Smrg return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base, 6797ec681f3Smrg is_aindex_indirect, 6807ec681f3Smrg attrib_index, 6817ec681f3Smrg swizzle_index); 68201e04c3fSmrg} 68301e04c3fSmrg 68401e04c3fSmrgLLVMValueRef 6857ec681f3SmrgBuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface, 6867ec681f3Smrg struct lp_build_context * bld, 68701e04c3fSmrg boolean is_vindex_indirect, 68801e04c3fSmrg LLVMValueRef vertex_index, 68901e04c3fSmrg boolean is_aindex_indirect, 69001e04c3fSmrg LLVMValueRef attrib_index, 69101e04c3fSmrg LLVMValueRef swizzle_index) 69201e04c3fSmrg{ 69301e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 69401e04c3fSmrg Value *vert_index = unwrap(vertex_index); 69501e04c3fSmrg Value *attr_index = unwrap(attrib_index); 69601e04c3fSmrg 69701e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 69801e04c3fSmrg 69901e04c3fSmrg if (is_vindex_indirect || is_aindex_indirect) { 70001e04c3fSmrg int i; 7017ec681f3Smrg Value *res = unwrap(bld->zero); 7027ec681f3Smrg struct lp_type type = bld->type; 70301e04c3fSmrg 70401e04c3fSmrg for (i = 0; i < type.length; i++) { 70501e04c3fSmrg Value *vert_chan_index = vert_index; 70601e04c3fSmrg Value *attr_chan_index = attr_index; 70701e04c3fSmrg 70801e04c3fSmrg if (is_vindex_indirect) { 70901e04c3fSmrg vert_chan_index = VEXTRACT(vert_index, C(i)); 71001e04c3fSmrg } 71101e04c3fSmrg if (is_aindex_indirect) { 71201e04c3fSmrg attr_chan_index = VEXTRACT(attr_index, C(i)); 71301e04c3fSmrg } 71401e04c3fSmrg 71501e04c3fSmrg Value *attrib = 71601e04c3fSmrg LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 71701e04c3fSmrg 71801e04c3fSmrg Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 71901e04c3fSmrg Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 72001e04c3fSmrg 72101e04c3fSmrg Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib); 72201e04c3fSmrg Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 72301e04c3fSmrg 72401e04c3fSmrg Value *value = VEXTRACT(pInput, C(i)); 72501e04c3fSmrg res = VINSERT(res, value, C(i)); 72601e04c3fSmrg } 72701e04c3fSmrg 72801e04c3fSmrg return wrap(res); 72901e04c3fSmrg } else { 73001e04c3fSmrg Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 73101e04c3fSmrg 73201e04c3fSmrg Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 73301e04c3fSmrg Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 73401e04c3fSmrg 73501e04c3fSmrg Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib); 73601e04c3fSmrg 73701e04c3fSmrg Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 73801e04c3fSmrg 73901e04c3fSmrg return wrap(pInput); 74001e04c3fSmrg } 74101e04c3fSmrg} 74201e04c3fSmrg 74301e04c3fSmrg// GS output stream layout 74401e04c3fSmrg#define VERTEX_COUNT_SIZE 32 74501e04c3fSmrg#define CONTROL_HEADER_SIZE (8*32) 74601e04c3fSmrg 74701e04c3fSmrgvoid 7487ec681f3SmrgBuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, 7497ec681f3Smrg struct lp_build_context * bld, 75001e04c3fSmrg LLVMValueRef (*outputs)[4], 7517ec681f3Smrg LLVMValueRef emitted_vertices_vec, 7527ec681f3Smrg LLVMValueRef stream_id) 75301e04c3fSmrg{ 75401e04c3fSmrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 75501e04c3fSmrg 75601e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 75701e04c3fSmrg const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE; 75801e04c3fSmrg const uint32_t attribSize = 4 * sizeof(float); 75901e04c3fSmrg const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS; 76001e04c3fSmrg Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize)); 76101e04c3fSmrg 7627ec681f3Smrg Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask}); 7637ec681f3Smrg Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, mVWidth)); 7647ec681f3Smrg 7657ec681f3Smrg Value *pStack = STACKSAVE(); 7667ec681f3Smrg Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking 7677ec681f3Smrg 7687ec681f3Smrg for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) { 7697ec681f3Smrg uint32_t attribSlot = attrib; 7707ec681f3Smrg uint32_t sgvChannel = 0; 7717ec681f3Smrg if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 7727ec681f3Smrg attribSlot = VERTEX_SGV_SLOT; 7737ec681f3Smrg sgvChannel = VERTEX_SGV_POINT_SIZE_COMP; 7747ec681f3Smrg } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) { 7757ec681f3Smrg attribSlot = VERTEX_SGV_SLOT; 7767ec681f3Smrg sgvChannel = VERTEX_SGV_RTAI_COMP; 7777ec681f3Smrg } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) { 7787ec681f3Smrg attribSlot = VERTEX_SGV_SLOT; 7797ec681f3Smrg sgvChannel = VERTEX_SGV_VAI_COMP; 7807ec681f3Smrg } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 7817ec681f3Smrg attribSlot = VERTEX_POSITION_SLOT; 7827ec681f3Smrg } else { 7837ec681f3Smrg attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; 7847ec681f3Smrg if (iface->info->writes_position) { 7857ec681f3Smrg attribSlot--; 7867ec681f3Smrg } 7877ec681f3Smrg } 7887ec681f3Smrg 7897ec681f3Smrg Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ? 7907ec681f3Smrg 7917ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) { 7927ec681f3Smrg Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane)); 7937ec681f3Smrg Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 7947ec681f3Smrg Value *pStreamOffset = GEP(pStream, pLaneOffset); 7957ec681f3Smrg pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy); 7967ec681f3Smrg 7977ec681f3Smrg Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 7987ec681f3Smrg pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 7997ec681f3Smrg 8007ec681f3Smrg for (uint32_t channel = 0; channel < 4; ++channel) { 8017ec681f3Smrg Value *vData; 8027ec681f3Smrg 8037ec681f3Smrg if (attribSlot == VERTEX_SGV_SLOT) 8047ec681f3Smrg vData = LOAD(unwrap(outputs[attrib][0])); 8057ec681f3Smrg else 8067ec681f3Smrg vData = LOAD(unwrap(outputs[attrib][channel])); 8077ec681f3Smrg 8087ec681f3Smrg if (attribSlot != VERTEX_SGV_SLOT || 8097ec681f3Smrg sgvChannel == channel) { 8107ec681f3Smrg vData = VEXTRACT(vData, C(lane)); 8117ec681f3Smrg STORE(vData, pStreamOffset); 8127ec681f3Smrg } 8137ec681f3Smrg pStreamOffset = GEP(pStreamOffset, C(1)); 8147ec681f3Smrg } 8157ec681f3Smrg } 8167ec681f3Smrg } 8177ec681f3Smrg 8187ec681f3Smrg /* When the output type is not points, the geometry shader may not 8197ec681f3Smrg * output data to multiple streams. So early exit here. 8207ec681f3Smrg */ 8217ec681f3Smrg if(iface->pGsState->outputTopology != TOP_POINT_LIST) { 8227ec681f3Smrg STACKRESTORE(pStack); 8237ec681f3Smrg return; 8247ec681f3Smrg } 8257ec681f3Smrg 8267ec681f3Smrg // Info about stream id for each vertex 8277ec681f3Smrg // is coded in 2 bits (4 vert per byte "box"): 8287ec681f3Smrg // ----------------- ----------------- ---- 8297ec681f3Smrg // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |... 8307ec681f3Smrg // ----------------- ----------------- ---- 8317ec681f3Smrg 8327ec681f3Smrg // Calculate where need to put stream id for current vert 8337ec681f3Smrg // in 1 byte "box". 8347ec681f3Smrg Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2)); 8357ec681f3Smrg 8367ec681f3Smrg // Calculate in which box put stream id for current vert. 8377ec681f3Smrg Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2)); 8387ec681f3Smrg 8397ec681f3Smrg // Skip count header 8407ec681f3Smrg Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE)); 8417ec681f3Smrg 8427ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) { 8437ec681f3Smrg Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty); 8447ec681f3Smrg Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 8457ec681f3Smrg 8467ec681f3Smrg Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane))); 8477ec681f3Smrg 8487ec681f3Smrg // Just make sure that not overflow max - stream id = (0,1,2,3) 8497ec681f3Smrg Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty); 8507ec681f3Smrg 8517ec681f3Smrg // Shift it to correct position in byte "box" 8527ec681f3Smrg vVal = SHL(vVal, pShift); 8537ec681f3Smrg 8547ec681f3Smrg // Info about other vertices can be already stored 8557ec681f3Smrg // so we need to read and add bits from current vert info. 8567ec681f3Smrg Value *storedValue = LOAD(pStreamOffset); 8577ec681f3Smrg vVal = OR(storedValue, vVal); 8587ec681f3Smrg STORE(vVal, pStreamOffset); 8597ec681f3Smrg } 8607ec681f3Smrg 8617ec681f3Smrg STACKRESTORE(pStack); 8627ec681f3Smrg} 8637ec681f3Smrg 8647ec681f3Smrgvoid 8657ec681f3SmrgBuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, 8667ec681f3Smrg struct lp_build_context * bld, 8677ec681f3Smrg LLVMValueRef total_emitted_vertices_vec, 8687ec681f3Smrg LLVMValueRef verts_per_prim_vec, 8697ec681f3Smrg LLVMValueRef emitted_prims_vec, 8707ec681f3Smrg LLVMValueRef mask_vec) 8717ec681f3Smrg{ 8727ec681f3Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 8737ec681f3Smrg 8747ec681f3Smrg /* When the output type is points, the geometry shader may output data 8757ec681f3Smrg * to multiple streams, and end_primitive has no effect. Info about 8767ec681f3Smrg * stream id for vertices is stored into the same place in memory where 8777ec681f3Smrg * end primitive info is stored so early exit in this case. 8787ec681f3Smrg */ 8797ec681f3Smrg if (iface->pGsState->outputTopology == TOP_POINT_LIST) { 8807ec681f3Smrg return; 8817ec681f3Smrg } 8827ec681f3Smrg 8837ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 8847ec681f3Smrg 8857ec681f3Smrg Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); 8867ec681f3Smrg Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, 8)); 8877ec681f3Smrg 8887ec681f3Smrg uint32_t vertsPerPrim = iface->num_verts_per_prim; 8897ec681f3Smrg 8907ec681f3Smrg Value *vCount = 8917ec681f3Smrg ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)), 8927ec681f3Smrg unwrap(verts_per_prim_vec)); 8937ec681f3Smrg 8947ec681f3Smrg vCount = unwrap(total_emitted_vertices_vec); 8957ec681f3Smrg 8967ec681f3Smrg Value *mask = unwrap(mask_vec); 8977ec681f3Smrg Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0))); 8987ec681f3Smrg mask = AND(mask, cmpMask); 8997ec681f3Smrg vMask1 = TRUNC(mask, getVectorType(mInt1Ty, 8)); 9007ec681f3Smrg 9017ec681f3Smrg vCount = SUB(vCount, VIMMED1(1)); 9027ec681f3Smrg Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE)); 9037ec681f3Smrg Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8))); 9047ec681f3Smrg 9057ec681f3Smrg vValue = TRUNC(vValue, getVectorType(mInt8Ty, 8)); 9067ec681f3Smrg 9077ec681f3Smrg Value *pStack = STACKSAVE(); 9087ec681f3Smrg Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking 9097ec681f3Smrg 9107ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) { 9117ec681f3Smrg Value *vLaneOffset = VEXTRACT(vOffset, C(lane)); 9127ec681f3Smrg Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 9137ec681f3Smrg Value *pStreamOffset = GEP(pStream, vLaneOffset); 9147ec681f3Smrg 9157ec681f3Smrg Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 9167ec681f3Smrg pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 9177ec681f3Smrg 9187ec681f3Smrg Value *vVal = LOAD(pStreamOffset); 9197ec681f3Smrg vVal = OR(vVal, VEXTRACT(vValue, C(lane))); 9207ec681f3Smrg STORE(vVal, pStreamOffset); 9217ec681f3Smrg } 9227ec681f3Smrg 9237ec681f3Smrg STACKRESTORE(pStack); 9247ec681f3Smrg} 9257ec681f3Smrg 9267ec681f3Smrgvoid 9277ec681f3SmrgBuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base, 9287ec681f3Smrg LLVMValueRef total_emitted_vertices_vec, 9297ec681f3Smrg LLVMValueRef emitted_prims_vec, unsigned stream) 9307ec681f3Smrg{ 9317ec681f3Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 9327ec681f3Smrg 9337ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 9347ec681f3Smrg 9357ec681f3Smrg // Store emit count to each output stream in the first DWORD 9367ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) 9377ec681f3Smrg { 9387ec681f3Smrg Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 9397ec681f3Smrg pStream = BITCAST(pStream, mInt32PtrTy); 9407ec681f3Smrg Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane)); 9417ec681f3Smrg STORE(pLaneCount, pStream); 9427ec681f3Smrg } 9437ec681f3Smrg} 9447ec681f3Smrg 9457ec681f3Smrgvoid 9467ec681f3SmrgBuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld) 9477ec681f3Smrg{ 9487ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface; 9497ec681f3Smrg 9507ec681f3Smrg Value* loop_var = ALLOCA(mSimdInt32Ty); 9517ec681f3Smrg STORE(VBROADCAST(C(0)), loop_var); 9527ec681f3Smrg 9537ec681f3Smrg iface->loop_var = wrap(loop_var); 9547ec681f3Smrg 9557ec681f3Smrg lp_exec_bgnloop(&bld->exec_mask, true); 9567ec681f3Smrg 9577ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 9587ec681f3Smrg bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var)))); 9597ec681f3Smrg 9607ec681f3Smrg if (verbose_tcs_shader_loop) { 9617ec681f3Smrg lp_build_print_value(gallivm, "Prologue LOOP Iteration BEGIN:", bld->system_values.invocation_id); 9627ec681f3Smrg } 9637ec681f3Smrg 9647ec681f3Smrg} 9657ec681f3Smrg 9667ec681f3Smrgvoid 9677ec681f3SmrgBuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld) 9687ec681f3Smrg{ 9697ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface; 9707ec681f3Smrg 9717ec681f3Smrg struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 9727ec681f3Smrg 9737ec681f3Smrg STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var)); 9747ec681f3Smrg if (verbose_tcs_shader_loop) { 9757ec681f3Smrg lp_build_print_value(gallivm, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface->loop_var)))); 9767ec681f3Smrg } 9777ec681f3Smrg 9787ec681f3Smrg LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))), 9797ec681f3Smrg wrap(VBROADCAST(C(iface->output_vertices)))); 9807ec681f3Smrg lp_exec_mask_cond_push(&bld->exec_mask, tmp); 9817ec681f3Smrg lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false); 9827ec681f3Smrg lp_exec_mask_cond_pop(&bld->exec_mask); 9837ec681f3Smrg lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask); 9847ec681f3Smrg} 9857ec681f3Smrg 9867ec681f3SmrgLLVMValueRef 9877ec681f3SmrgBuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface, 9887ec681f3Smrg struct lp_build_tgsi_context * bld_base, 9897ec681f3Smrg boolean is_vindex_indirect, 9907ec681f3Smrg LLVMValueRef vertex_index, 9917ec681f3Smrg boolean is_aindex_indirect, 9927ec681f3Smrg LLVMValueRef attrib_index, 9937ec681f3Smrg LLVMValueRef swizzle_index) 9947ec681f3Smrg{ 9957ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 9967ec681f3Smrg 9977ec681f3Smrg Value *vert_index = unwrap(vertex_index); 9987ec681f3Smrg Value *attr_index = unwrap(attrib_index); 9997ec681f3Smrg 10007ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 10017ec681f3Smrg 10027ec681f3Smrg if (verbose_tcs_shader_in) { 10037ec681f3Smrg lp_build_printf(gallivm, "[TCS IN][VTX] ======================================\n"); 10047ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] vertex_index: ", vertex_index); 10057ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 10067ec681f3Smrg lp_build_printf(gallivm, "[TCS IN][VTX] --------------------------------------\n"); 10077ec681f3Smrg } 10087ec681f3Smrg 10097ec681f3Smrg Value *res = unwrap(bld_base->base.zero); 10107ec681f3Smrg if (is_vindex_indirect || is_aindex_indirect) { 10117ec681f3Smrg int i; 10127ec681f3Smrg struct lp_type type = bld_base->base.type; 10137ec681f3Smrg 10147ec681f3Smrg for (i = 0; i < type.length; i++) { 10157ec681f3Smrg Value *vert_chan_index = vert_index; 10167ec681f3Smrg Value *attr_chan_index = attr_index; 10177ec681f3Smrg 10187ec681f3Smrg if (is_vindex_indirect) { 10197ec681f3Smrg vert_chan_index = VEXTRACT(vert_index, C(i)); 10207ec681f3Smrg } 10217ec681f3Smrg if (is_aindex_indirect) { 10227ec681f3Smrg attr_chan_index = VEXTRACT(attr_index, C(i)); 10237ec681f3Smrg } 10247ec681f3Smrg 10257ec681f3Smrg Value *attrib = 10267ec681f3Smrg LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 10277ec681f3Smrg 10287ec681f3Smrg Value *pBase = GEP(iface->pTcsCtx, 10297ec681f3Smrg { C(0), C(SWR_HS_CONTEXT_vert), vert_chan_index, 10307ec681f3Smrg C(simdvertex_attrib), attrib, unwrap(swizzle_index), C(i) }); 10317ec681f3Smrg 10327ec681f3Smrg Value *val = LOAD(pBase); 10337ec681f3Smrg 10347ec681f3Smrg if (verbose_tcs_shader_in) { 10357ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index)); 10367ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 10377ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index)); 10387ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib)); 10397ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index); 10407ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(val)); 10417ec681f3Smrg } 10427ec681f3Smrg res = VINSERT(res, val, C(i)); 10437ec681f3Smrg } 10447ec681f3Smrg } else { 10457ec681f3Smrg Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 10467ec681f3Smrg 10477ec681f3Smrg Value *pBase = GEP(iface->pTcsCtx, 10487ec681f3Smrg { C(0), C(SWR_HS_CONTEXT_vert), vert_index, 10497ec681f3Smrg C(simdvertex_attrib), attrib, unwrap(swizzle_index) }); 10507ec681f3Smrg 10517ec681f3Smrg res = LOAD(pBase); 10527ec681f3Smrg 10537ec681f3Smrg if (verbose_tcs_shader_in) { 10547ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index); 10557ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index)); 10567ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib)); 10577ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index); 10587ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(res)); 10597ec681f3Smrg } 10607ec681f3Smrg } 10617ec681f3Smrg if (verbose_tcs_shader_in) { 10627ec681f3Smrg lp_build_print_value(gallivm, "[TCS IN][VTX] returning: ", wrap(res)); 10637ec681f3Smrg } 10647ec681f3Smrg return wrap(res); 10657ec681f3Smrg} 10667ec681f3Smrg 10677ec681f3SmrgLLVMValueRef 10687ec681f3SmrgBuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface, 10697ec681f3Smrg struct lp_build_tgsi_context * bld_base, 10707ec681f3Smrg boolean is_vindex_indirect, 10717ec681f3Smrg LLVMValueRef vertex_index, 10727ec681f3Smrg boolean is_aindex_indirect, 10737ec681f3Smrg LLVMValueRef attrib_index, 10747ec681f3Smrg LLVMValueRef swizzle_index, 10757ec681f3Smrg uint32_t name) 10767ec681f3Smrg{ 10777ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 10787ec681f3Smrg 10797ec681f3Smrg Value *vert_index = unwrap(vertex_index); 10807ec681f3Smrg Value *attr_index = unwrap(attrib_index); 10817ec681f3Smrg 10827ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 10837ec681f3Smrg 10847ec681f3Smrg if (verbose_tcs_shader_in) { 10857ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT] Vertex index: ", vertex_index); 10867ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT] Attrib index: ", wrap(attr_index)); 10877ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT] Swizzle index: ", swizzle_index); 10887ec681f3Smrg } 10897ec681f3Smrg 10907ec681f3Smrg Value* res = unwrap(bld_base->base.zero); 10917ec681f3Smrg 10927ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; lane++) { 10937ec681f3Smrg Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout}); 10947ec681f3Smrg Value* pCpOut = GEP(p1, {lane}); 10957ec681f3Smrg 10967ec681f3Smrg Value *vert_chan_index = vert_index; 10977ec681f3Smrg Value *attr_chan_index = attr_index; 10987ec681f3Smrg 10997ec681f3Smrg if (is_vindex_indirect) { 11007ec681f3Smrg vert_chan_index = VEXTRACT(vert_index, C(lane)); 11017ec681f3Smrg if (verbose_tcs_shader_in) { 11027ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index)); 11037ec681f3Smrg } 11047ec681f3Smrg } 11057ec681f3Smrg 11067ec681f3Smrg if (is_aindex_indirect) { 11077ec681f3Smrg attr_chan_index = VEXTRACT(attr_index, C(lane)); 11087ec681f3Smrg if (verbose_tcs_shader_in) { 11097ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index)); 11107ec681f3Smrg } 11117ec681f3Smrg } 11127ec681f3Smrg 11137ec681f3Smrg if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) { 11147ec681f3Smrg Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors}); 11157ec681f3Smrg Value* tessFactorArray = nullptr; 11167ec681f3Smrg if (name == TGSI_SEMANTIC_TESSOUTER) { 11177ec681f3Smrg tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors}); 11187ec681f3Smrg } else { 11197ec681f3Smrg tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors}); 11207ec681f3Smrg } 11217ec681f3Smrg Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)}); 11227ec681f3Smrg res = VINSERT(res, LOAD(tessFactor), C(lane)); 11237ec681f3Smrg if (verbose_tcs_shader_in) { 11247ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane))); 11257ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] loaded value: ", wrap(res)); 11267ec681f3Smrg } 11277ec681f3Smrg } else if (name == TGSI_SEMANTIC_PATCH) { 11287ec681f3Smrg Value* attr_index_from_map = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_chan_index})); 11297ec681f3Smrg Value* attr_value = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index_from_map, unwrap(swizzle_index)}); 11307ec681f3Smrg res = VINSERT(res, LOAD(attr_value), C(lane)); 11317ec681f3Smrg if (verbose_tcs_shader_in) { 11327ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map)); 11337ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane))); 11347ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][PATCH] loaded value: ", wrap(res)); 11357ec681f3Smrg } 11367ec681f3Smrg } else { 11377ec681f3Smrg // Generic attribute 11387ec681f3Smrg Value *attrib = 11397ec681f3Smrg LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_chan_index})); 11407ec681f3Smrg if (verbose_tcs_shader_in) { 11417ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib)); 11427ec681f3Smrg } 11437ec681f3Smrg Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_chan_index, 11447ec681f3Smrg C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)}); 11457ec681f3Smrg 11467ec681f3Smrg res = VINSERT(res, LOAD(attr_chan), C(lane)); 11477ec681f3Smrg if (verbose_tcs_shader_in) { 11487ec681f3Smrg lp_build_print_value(gallivm, "[TCS INOUT][VTX] loaded value: ", wrap(res)); 11497ec681f3Smrg } 11507ec681f3Smrg } 11517ec681f3Smrg } 11527ec681f3Smrg 11537ec681f3Smrg return wrap(res); 11547ec681f3Smrg} 11557ec681f3Smrg 11567ec681f3Smrgvoid 11577ec681f3SmrgBuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface, 11587ec681f3Smrg struct lp_build_tgsi_context *bld_base, 11597ec681f3Smrg unsigned name, 11607ec681f3Smrg boolean is_vindex_indirect, 11617ec681f3Smrg LLVMValueRef vertex_index, 11627ec681f3Smrg boolean is_aindex_indirect, 11637ec681f3Smrg LLVMValueRef attrib_index, 11647ec681f3Smrg LLVMValueRef swizzle_index, 11657ec681f3Smrg LLVMValueRef value, 11667ec681f3Smrg LLVMValueRef mask_vec) 11677ec681f3Smrg{ 11687ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 11697ec681f3Smrg struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base; 11707ec681f3Smrg 11717ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 11727ec681f3Smrg 11737ec681f3Smrg if (verbose_tcs_shader_out) { 11747ec681f3Smrg lp_build_printf(gallivm, "[TCS OUT] =============================================\n"); 11757ec681f3Smrg } 11767ec681f3Smrg 11777ec681f3Smrg if (verbose_tcs_shader_out) { 11787ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask); 11797ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value); 11807ec681f3Smrg } 11817ec681f3Smrg 11827ec681f3Smrg Value *vert_index = unwrap(vertex_index); 11837ec681f3Smrg Value *attr_index = unwrap(attrib_index); 11847ec681f3Smrg 11857ec681f3Smrg if (verbose_tcs_shader_out) { 11867ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index); 11877ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index)); 11887ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index); 11897ec681f3Smrg } 11907ec681f3Smrg 11917ec681f3Smrg if (is_vindex_indirect) { 11927ec681f3Smrg vert_index = VEXTRACT(vert_index, C(0)); 11937ec681f3Smrg if (verbose_tcs_shader_out) { 11947ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index); 11957ec681f3Smrg } 11967ec681f3Smrg } 11977ec681f3Smrg 11987ec681f3Smrg if (is_aindex_indirect) { 11997ec681f3Smrg attr_index = VEXTRACT(attr_index, C(0)); 12007ec681f3Smrg if (verbose_tcs_shader_out) { 12017ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index)); 12027ec681f3Smrg } 12037ec681f3Smrg } 12047ec681f3Smrg 12057ec681f3Smrg if (verbose_tcs_shader_out) { 12067ec681f3Smrg if (bld->exec_mask.has_mask) { 12077ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT] Exec mask: ", bld->exec_mask.exec_mask); 12087ec681f3Smrg } 12097ec681f3Smrg else { 12107ec681f3Smrg lp_build_printf(gallivm, "[TCS OUT] has no mask\n"); 12117ec681f3Smrg } 12127ec681f3Smrg } 12137ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; lane++) { 12147ec681f3Smrg Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout}); 12157ec681f3Smrg Value* pCpOut = GEP(p1, {lane}); 12167ec681f3Smrg 12177ec681f3Smrg if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) { 12187ec681f3Smrg Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors}); 12197ec681f3Smrg Value* tessFactorArray = nullptr; 12207ec681f3Smrg if (name == TGSI_SEMANTIC_TESSOUTER) { 12217ec681f3Smrg tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors}); 12227ec681f3Smrg } else { 12237ec681f3Smrg tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors}); 12247ec681f3Smrg } 12257ec681f3Smrg Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)}); 12267ec681f3Smrg Value* valueToStore = VEXTRACT(unwrap(value), C(lane)); 12277ec681f3Smrg valueToStore = BITCAST(valueToStore, mFP32Ty); 12287ec681f3Smrg if (mask_vec) { 12297ec681f3Smrg Value *originalVal = LOAD(tessFactor); 12307ec681f3Smrg Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 12317ec681f3Smrg valueToStore = SELECT(vMask, valueToStore, originalVal); 12327ec681f3Smrg } 12337ec681f3Smrg STORE(valueToStore, tessFactor); 12347ec681f3Smrg if (verbose_tcs_shader_out) 12357ec681f3Smrg { 12367ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec); 12377ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore)); 12387ec681f3Smrg } 12397ec681f3Smrg } else if (name == TGSI_SEMANTIC_PATCH) { 12407ec681f3Smrg Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index})); 12417ec681f3Smrg if (verbose_tcs_shader_out) { 12427ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index)); 12437ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index)); 12447ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect))); 12457ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect))); 12467ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib)); 12477ec681f3Smrg } 12487ec681f3Smrg Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib}); 12497ec681f3Smrg Value* value_to_store = VEXTRACT(unwrap(value), C(lane)); 12507ec681f3Smrg if (verbose_tcs_shader_out) { 12517ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane))); 12527ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value); 12537ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store)); 12547ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index); 12557ec681f3Smrg } 12567ec681f3Smrg value_to_store = BITCAST(value_to_store, mFP32Ty); 12577ec681f3Smrg if (mask_vec) { 12587ec681f3Smrg Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)}); 12597ec681f3Smrg Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 12607ec681f3Smrg value_to_store = SELECT(vMask, value_to_store, originalVal); 12617ec681f3Smrg if (verbose_tcs_shader_out) { 12627ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", mask_vec); 12637ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal)); 12647ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask)); 12657ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store)); 12667ec681f3Smrg } 12677ec681f3Smrg } 12687ec681f3Smrg STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)}); 12697ec681f3Smrg if (verbose_tcs_shader_out) { 12707ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store)); 12717ec681f3Smrg } 12727ec681f3Smrg } else { 12737ec681f3Smrg Value* value_to_store = VEXTRACT(unwrap(value), C(lane)); 12747ec681f3Smrg Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index})); 12757ec681f3Smrg 12767ec681f3Smrg if (verbose_tcs_shader_out) { 12777ec681f3Smrg lp_build_printf(gallivm, "[TCS OUT] Writting attribute\n"); 12787ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id); 12797ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index)); 12807ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib)); 12817ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index); 12827ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value); 12837ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store)); 12847ec681f3Smrg } 12857ec681f3Smrg 12867ec681f3Smrg Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), 12877ec681f3Smrg VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)), 12887ec681f3Smrg C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)}); 12897ec681f3Smrg 12907ec681f3Smrg // Mask output values if needed 12917ec681f3Smrg value_to_store = BITCAST(value_to_store, mFP32Ty); 12927ec681f3Smrg if (mask_vec) { 12937ec681f3Smrg Value *originalVal = LOAD(attr_chan); 12947ec681f3Smrg Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty); 12957ec681f3Smrg value_to_store = SELECT(vMask, value_to_store, originalVal); 12967ec681f3Smrg } 12977ec681f3Smrg STORE(value_to_store, attr_chan); 12987ec681f3Smrg if (verbose_tcs_shader_out) { 12997ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec); 13007ec681f3Smrg lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store)); 13017ec681f3Smrg } 13027ec681f3Smrg } 13037ec681f3Smrg } 13047ec681f3Smrg} 13057ec681f3Smrg 13067ec681f3Smrgvoid 13077ec681f3SmrgBuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface, 13087ec681f3Smrg struct lp_build_tgsi_context *bld_base) 13097ec681f3Smrg{ 13107ec681f3Smrg swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface; 13117ec681f3Smrg struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base; 13127ec681f3Smrg 13137ec681f3Smrg if (verbose_tcs_shader_loop) { 13147ec681f3Smrg lp_build_print_value(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_var); 13157ec681f3Smrg } 13167ec681f3Smrg 13177ec681f3Smrg struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 13187ec681f3Smrg 13197ec681f3Smrg STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var)); 13207ec681f3Smrg 13217ec681f3Smrg LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))), 13227ec681f3Smrg wrap(VBROADCAST(C(iface->output_vertices)))); 13237ec681f3Smrg 13247ec681f3Smrg lp_exec_mask_cond_push(&bld->exec_mask, tmp); 13257ec681f3Smrg lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false); 13267ec681f3Smrg lp_exec_mask_cond_pop(&bld->exec_mask); 13277ec681f3Smrg lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask); 13287ec681f3Smrg 13297ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 13307ec681f3Smrg 13317ec681f3Smrg STORE(VBROADCAST(C(0)), unwrap(iface->loop_var)); 13327ec681f3Smrg lp_exec_bgnloop(&bld->exec_mask, true); 13337ec681f3Smrg 13347ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 13357ec681f3Smrg 13367ec681f3Smrg bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var)))); 13377ec681f3Smrg 13387ec681f3Smrg if (verbose_tcs_shader_loop) { 13397ec681f3Smrg lp_build_print_value(gallivm, "Barrier LOOP: Iteration BEGIN: ", iface->loop_var); 13407ec681f3Smrg lp_build_print_value(gallivm, "Barrier LOOP: InvocationId: \n", bld->system_values.invocation_id); 13417ec681f3Smrg } 13427ec681f3Smrg} 13437ec681f3Smrg 13447ec681f3Smrg 13457ec681f3SmrgLLVMValueRef 13467ec681f3SmrgBuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, 13477ec681f3Smrg struct lp_build_tgsi_context * bld_base, 13487ec681f3Smrg boolean is_aindex_indirect, 13497ec681f3Smrg LLVMValueRef attrib_index, 13507ec681f3Smrg LLVMValueRef swizzle_index) 13517ec681f3Smrg{ 13527ec681f3Smrg swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 13537ec681f3Smrg Value *attr_index = unwrap(attrib_index); 13547ec681f3Smrg Value *res = unwrap(bld_base->base.zero); 13557ec681f3Smrg 13567ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 13577ec681f3Smrg 13587ec681f3Smrg if (verbose_shader) { 13597ec681f3Smrg lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n"); 13607ec681f3Smrg } 13617ec681f3Smrg 13627ec681f3Smrg if (is_aindex_indirect) { 13637ec681f3Smrg int i; 13647ec681f3Smrg struct lp_type type = bld_base->base.type; 13657ec681f3Smrg 13667ec681f3Smrg for (i = 0; i < type.length; i++) { 13677ec681f3Smrg Value *attr_chan_index = attr_index; 13687ec681f3Smrg 13697ec681f3Smrg if (is_aindex_indirect) { 13707ec681f3Smrg attr_chan_index = VEXTRACT(attr_index, C(i)); 13717ec681f3Smrg } 13727ec681f3Smrg 13737ec681f3Smrg Value *attrib = 13747ec681f3Smrg LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index})); 13757ec681f3Smrg 13767ec681f3Smrg Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 13777ec681f3Smrg Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData}); 13787ec681f3Smrg Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib}); 13797ec681f3Smrg Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)}); 13807ec681f3Smrg if (verbose_shader) { 13817ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index); 13827ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index)); 13837ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib)); 13847ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index); 13857ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val)); 13867ec681f3Smrg } 13877ec681f3Smrg res = VINSERT(res, Val, C(i)); 13887ec681f3Smrg } 13897ec681f3Smrg } else { 13907ec681f3Smrg Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index})); 13917ec681f3Smrg 13927ec681f3Smrg Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 13937ec681f3Smrg Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData}); 13947ec681f3Smrg Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib}); 13957ec681f3Smrg Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)}); 13967ec681f3Smrg if (verbose_shader) { 13977ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index); 13987ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index)); 13997ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib)); 14007ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index); 14017ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val)); 14027ec681f3Smrg } 14037ec681f3Smrg res = VBROADCAST(Val); 14047ec681f3Smrg } 14057ec681f3Smrg if (verbose_shader) { 14067ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res)); 14077ec681f3Smrg } 14087ec681f3Smrg return wrap(res); 14097ec681f3Smrg} 14107ec681f3Smrg 14117ec681f3Smrg 14127ec681f3Smrg 14137ec681f3SmrgLLVMValueRef 14147ec681f3SmrgBuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface, 14157ec681f3Smrg struct lp_build_tgsi_context * bld_base, 14167ec681f3Smrg boolean is_vindex_indirect, 14177ec681f3Smrg LLVMValueRef vertex_index, 14187ec681f3Smrg boolean is_aindex_indirect, 14197ec681f3Smrg LLVMValueRef attrib_index, 14207ec681f3Smrg LLVMValueRef swizzle_index) 14217ec681f3Smrg{ 14227ec681f3Smrg swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface; 14237ec681f3Smrg Value *vert_index = unwrap(vertex_index); 14247ec681f3Smrg Value *attr_index = unwrap(attrib_index); 14257ec681f3Smrg 14267ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 14277ec681f3Smrg 14287ec681f3Smrg if (verbose_shader) { 14297ec681f3Smrg lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n"); 14307ec681f3Smrg } 14317ec681f3Smrg 14327ec681f3Smrg Value *res = unwrap(bld_base->base.zero); 14337ec681f3Smrg if (is_vindex_indirect || is_aindex_indirect) { 14347ec681f3Smrg int i; 14357ec681f3Smrg struct lp_type type = bld_base->base.type; 14367ec681f3Smrg 14377ec681f3Smrg for (i = 0; i < type.length; i++) { 14387ec681f3Smrg Value *vert_chan_index = vert_index; 14397ec681f3Smrg Value *attr_chan_index = attr_index; 14407ec681f3Smrg 14417ec681f3Smrg if (is_vindex_indirect) { 14427ec681f3Smrg vert_chan_index = VEXTRACT(vert_index, C(i)); 14437ec681f3Smrg } 14447ec681f3Smrg if (is_aindex_indirect) { 14457ec681f3Smrg attr_chan_index = VEXTRACT(attr_index, C(i)); 14467ec681f3Smrg } 14477ec681f3Smrg 14487ec681f3Smrg Value *attrib = 14497ec681f3Smrg LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 14507ec681f3Smrg 14517ec681f3Smrg Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 14527ec681f3Smrg Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp}); 14537ec681f3Smrg Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index}); 14547ec681f3Smrg Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)}); 14557ec681f3Smrg Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib}); 14567ec681f3Smrg Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)}); 14577ec681f3Smrg if (verbose_shader) { 14587ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index); 14597ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index)); 14607ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib)); 14617ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index); 14627ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val)); 14637ec681f3Smrg } 14647ec681f3Smrg res = VINSERT(res, Val, C(i)); 14657ec681f3Smrg } 14667ec681f3Smrg } else { 14677ec681f3Smrg Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 14687ec681f3Smrg 14697ec681f3Smrg Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn"); 14707ec681f3Smrg Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp}); 14717ec681f3Smrg Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index}); 14727ec681f3Smrg Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)}); 14737ec681f3Smrg Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib}); 14747ec681f3Smrg Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)}); 14757ec681f3Smrg if (verbose_shader) { 14767ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index); 14777ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index)); 14787ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib)); 14797ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index); 14807ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val)); 14817ec681f3Smrg } 14827ec681f3Smrg res = VBROADCAST(Val); 14837ec681f3Smrg } 14847ec681f3Smrg if (verbose_shader) { 14857ec681f3Smrg lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res)); 14867ec681f3Smrg } 14877ec681f3Smrg return wrap(res); 14887ec681f3Smrg} 14897ec681f3Smrg 14907ec681f3Smrg 14917ec681f3Smrg 14927ec681f3Smrg 14937ec681f3SmrgPFN_GS_FUNC 14947ec681f3SmrgBuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) 14957ec681f3Smrg{ 14967ec681f3Smrg SWR_GS_STATE *pGS = &ctx->gs->gsState; 14977ec681f3Smrg struct tgsi_shader_info *info = &ctx->gs->info.base; 14987ec681f3Smrg 14997ec681f3Smrg memset(pGS, 0, sizeof(*pGS)); 15007ec681f3Smrg 15017ec681f3Smrg pGS->gsEnable = true; 15027ec681f3Smrg 15037ec681f3Smrg pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs; 15047ec681f3Smrg pGS->outputTopology = 15057ec681f3Smrg swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0); 15067ec681f3Smrg 15077ec681f3Smrg /* It's +1 because emit_vertex in swr is always called exactly one time more 15087ec681f3Smrg * than max_vertices passed in Geometry Shader. We need to allocate more memory 15097ec681f3Smrg * to avoid crash/memory overwritten. 15107ec681f3Smrg */ 15117ec681f3Smrg pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1; 15127ec681f3Smrg pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; 15137ec681f3Smrg 15147ec681f3Smrg // If point primitive then assume to use multiple streams 15157ec681f3Smrg if(pGS->outputTopology == TOP_POINT_LIST) { 15167ec681f3Smrg pGS->isSingleStream = false; 15177ec681f3Smrg } else { 15187ec681f3Smrg pGS->isSingleStream = true; 15197ec681f3Smrg pGS->singleStreamID = 0; 15207ec681f3Smrg } 15217ec681f3Smrg 15227ec681f3Smrg pGS->vertexAttribOffset = VERTEX_POSITION_SLOT; 15237ec681f3Smrg pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset; 15247ec681f3Smrg pGS->outputVertexSize = SWR_VTX_NUM_SLOTS; 15257ec681f3Smrg pGS->controlDataSize = 8; // GS outputs max of 8 32B units 15267ec681f3Smrg pGS->controlDataOffset = VERTEX_COUNT_SIZE; 15277ec681f3Smrg pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE; 15287ec681f3Smrg 15297ec681f3Smrg pGS->allocationSize = 15307ec681f3Smrg VERTEX_COUNT_SIZE + // vertex count 15317ec681f3Smrg CONTROL_HEADER_SIZE + // control header 15327ec681f3Smrg (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex 15337ec681f3Smrg pGS->maxNumVerts; // num verts 15347ec681f3Smrg 15357ec681f3Smrg struct swr_geometry_shader *gs = ctx->gs; 15367ec681f3Smrg 15377ec681f3Smrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 15387ec681f3Smrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 15397ec681f3Smrg 15407ec681f3Smrg memset(outputs, 0, sizeof(outputs)); 15417ec681f3Smrg 15427ec681f3Smrg AttrBuilder attrBuilder; 15437ec681f3Smrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 15447ec681f3Smrg 15457ec681f3Smrg std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 15467ec681f3Smrg PointerType::get(mInt8Ty, 0), 15477ec681f3Smrg PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)}; 15487ec681f3Smrg FunctionType *vsFuncType = 15497ec681f3Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false); 15507ec681f3Smrg 15517ec681f3Smrg // create new vertex shader function 15527ec681f3Smrg auto pFunction = Function::Create(vsFuncType, 15537ec681f3Smrg GlobalValue::ExternalLinkage, 15547ec681f3Smrg "GS", 15557ec681f3Smrg JM()->mpCurrentModule); 15567ec681f3Smrg#if LLVM_VERSION_MAJOR < 5 15577ec681f3Smrg AttributeSet attrSet = AttributeSet::get( 15587ec681f3Smrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 15597ec681f3Smrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 15607ec681f3Smrg#else 15617ec681f3Smrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 15627ec681f3Smrg#endif 15637ec681f3Smrg 15647ec681f3Smrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 15657ec681f3Smrg IRB()->SetInsertPoint(block); 15667ec681f3Smrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 15677ec681f3Smrg 15687ec681f3Smrg auto argitr = pFunction->arg_begin(); 15697ec681f3Smrg Value *hPrivateData = &*argitr++; 15707ec681f3Smrg hPrivateData->setName("hPrivateData"); 15717ec681f3Smrg Value *pWorkerData = &*argitr++; 15727ec681f3Smrg pWorkerData->setName("pWorkerData"); 15737ec681f3Smrg Value *pGsCtx = &*argitr++; 15747ec681f3Smrg pGsCtx->setName("gsCtx"); 15757ec681f3Smrg 15767ec681f3Smrg Value *consts_ptr = 15777ec681f3Smrg GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)}); 15787ec681f3Smrg consts_ptr->setName("gs_constants"); 15797ec681f3Smrg Value *const_sizes_ptr = 15807ec681f3Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsGS}); 15817ec681f3Smrg const_sizes_ptr->setName("num_gs_constants"); 15827ec681f3Smrg 15837ec681f3Smrg struct lp_build_sampler_soa *sampler = 15847ec681f3Smrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY); 15857ec681f3Smrg assert(sampler != nullptr); 15867ec681f3Smrg 15877ec681f3Smrg struct lp_bld_tgsi_system_values system_values; 15887ec681f3Smrg memset(&system_values, 0, sizeof(system_values)); 15897ec681f3Smrg system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID})); 15907ec681f3Smrg system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID})); 15917ec681f3Smrg 15927ec681f3Smrg std::vector<Constant*> mapConstants; 15937ec681f3Smrg Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 15947ec681f3Smrg for (unsigned slot = 0; slot < info->num_inputs; slot++) { 15957ec681f3Smrg ubyte semantic_name = info->input_semantic_name[slot]; 15967ec681f3Smrg ubyte semantic_idx = info->input_semantic_index[slot]; 15977ec681f3Smrg 15987ec681f3Smrg unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 15997ec681f3Smrg assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS); 16007ec681f3Smrg 16017ec681f3Smrg vs_slot += VERTEX_ATTRIB_START_SLOT; 16027ec681f3Smrg 16037ec681f3Smrg if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 16047ec681f3Smrg vs_slot--; 16057ec681f3Smrg 16067ec681f3Smrg if (semantic_name == TGSI_SEMANTIC_POSITION) 16077ec681f3Smrg vs_slot = VERTEX_POSITION_SLOT; 16087ec681f3Smrg 16097ec681f3Smrg STORE(C(vs_slot), vtxAttribMap, {0, slot}); 16107ec681f3Smrg mapConstants.push_back(C(vs_slot)); 16117ec681f3Smrg } 16127ec681f3Smrg 16137ec681f3Smrg struct lp_build_mask_context mask; 16147ec681f3Smrg Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask"); 16157ec681f3Smrg lp_build_mask_begin(&mask, gallivm, 16167ec681f3Smrg lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 16177ec681f3Smrg 16187ec681f3Smrg // zero out cut buffer so we can load/modify/store bits 16197ec681f3Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) 16207ec681f3Smrg { 16217ec681f3Smrg Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 16227ec681f3Smrg#if LLVM_VERSION_MAJOR >= 10 16237ec681f3Smrg MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH)); 16247ec681f3Smrg#else 16257ec681f3Smrg MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH); 16267ec681f3Smrg#endif 16277ec681f3Smrg } 16287ec681f3Smrg 16297ec681f3Smrg struct swr_gs_llvm_iface gs_iface; 16307ec681f3Smrg gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input; 16317ec681f3Smrg gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex; 16327ec681f3Smrg gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive; 16337ec681f3Smrg gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue; 16347ec681f3Smrg gs_iface.pBuilder = this; 16357ec681f3Smrg gs_iface.pGsCtx = pGsCtx; 16367ec681f3Smrg gs_iface.pGsState = pGS; 16377ec681f3Smrg gs_iface.num_outputs = gs->info.base.num_outputs; 16387ec681f3Smrg gs_iface.num_verts_per_prim = 16397ec681f3Smrg u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); 16407ec681f3Smrg gs_iface.info = info; 16417ec681f3Smrg gs_iface.pVtxAttribMap = vtxAttribMap; 16427ec681f3Smrg 16437ec681f3Smrg struct lp_build_tgsi_params params; 16447ec681f3Smrg memset(¶ms, 0, sizeof(params)); 16457ec681f3Smrg params.type = lp_type_float_vec(32, 32 * 8); 16467ec681f3Smrg params.mask = & mask; 16477ec681f3Smrg params.consts_ptr = wrap(consts_ptr); 16487ec681f3Smrg params.const_sizes_ptr = wrap(const_sizes_ptr); 16497ec681f3Smrg params.system_values = &system_values; 16507ec681f3Smrg params.inputs = inputs; 16517ec681f3Smrg params.context_ptr = wrap(hPrivateData); 16527ec681f3Smrg params.sampler = sampler; 16537ec681f3Smrg params.info = &gs->info.base; 16547ec681f3Smrg params.gs_iface = &gs_iface.base; 16557ec681f3Smrg 16567ec681f3Smrg lp_build_tgsi_soa(gallivm, 16577ec681f3Smrg gs->pipe.tokens, 16587ec681f3Smrg ¶ms, 16597ec681f3Smrg outputs); 16607ec681f3Smrg 16617ec681f3Smrg lp_build_mask_end(&mask); 16627ec681f3Smrg 16637ec681f3Smrg sampler->destroy(sampler); 16647ec681f3Smrg 16657ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 16667ec681f3Smrg 16677ec681f3Smrg RET_VOID(); 16687ec681f3Smrg 16697ec681f3Smrg gallivm_verify_function(gallivm, wrap(pFunction)); 16707ec681f3Smrg gallivm_compile_module(gallivm); 16717ec681f3Smrg 16727ec681f3Smrg PFN_GS_FUNC pFunc = 16737ec681f3Smrg (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 16747ec681f3Smrg 16757ec681f3Smrg debug_printf("geom shader %p\n", pFunc); 16767ec681f3Smrg assert(pFunc && "Error: GeomShader = NULL"); 16777ec681f3Smrg 16787ec681f3Smrg JM()->mIsModuleFinalized = true; 16797ec681f3Smrg 16807ec681f3Smrg return pFunc; 16817ec681f3Smrg} 16827ec681f3Smrg 16837ec681f3SmrgPFN_TES_FUNC 16847ec681f3SmrgBuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key) 16857ec681f3Smrg{ 16867ec681f3Smrg SWR_TS_STATE *pTS = &ctx->tsState; 16877ec681f3Smrg struct tgsi_shader_info *info = &ctx->tes->info.base; 16887ec681f3Smrg 16897ec681f3Smrg // tessellation is enabled if TES is present 16907ec681f3Smrg // clear tessellation state here then 16917ec681f3Smrg memset(pTS, 0, sizeof(*pTS)); 16927ec681f3Smrg 16937ec681f3Smrg pTS->tsEnable = true; 16947ec681f3Smrg 16957ec681f3Smrg unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; 16967ec681f3Smrg unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; 16977ec681f3Smrg bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; 16987ec681f3Smrg bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; 16997ec681f3Smrg SWR_TS_DOMAIN type = SWR_TS_ISOLINE; 17007ec681f3Smrg SWR_TS_PARTITIONING partitioning = SWR_TS_EVEN_FRACTIONAL; 17017ec681f3Smrg SWR_TS_OUTPUT_TOPOLOGY topology = SWR_TS_OUTPUT_POINT; 17027ec681f3Smrg PRIMITIVE_TOPOLOGY postDSTopology = TOP_POINT_LIST; 17037ec681f3Smrg 17047ec681f3Smrg // TESS_TODO: move this to helper functions to improve readability 17057ec681f3Smrg switch (tes_prim_mode) { 17067ec681f3Smrg case PIPE_PRIM_LINES: 17077ec681f3Smrg type = SWR_TS_ISOLINE; 17087ec681f3Smrg postDSTopology = TOP_LINE_LIST; 17097ec681f3Smrg break; 17107ec681f3Smrg case PIPE_PRIM_TRIANGLES: 17117ec681f3Smrg type = SWR_TS_TRI; 17127ec681f3Smrg postDSTopology = TOP_TRIANGLE_LIST; 17137ec681f3Smrg break; 17147ec681f3Smrg case PIPE_PRIM_QUADS: 17157ec681f3Smrg type = SWR_TS_QUAD; 17167ec681f3Smrg // See OpenGL spec - quads are tessellated into triangles 17177ec681f3Smrg postDSTopology = TOP_TRIANGLE_LIST; 17187ec681f3Smrg break; 17197ec681f3Smrg default: 17207ec681f3Smrg assert(0); 17217ec681f3Smrg } 17227ec681f3Smrg 17237ec681f3Smrg switch (tes_spacing) { 17247ec681f3Smrg case PIPE_TESS_SPACING_FRACTIONAL_ODD: 17257ec681f3Smrg partitioning = SWR_TS_ODD_FRACTIONAL; 17267ec681f3Smrg break; 17277ec681f3Smrg case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 17287ec681f3Smrg partitioning = SWR_TS_EVEN_FRACTIONAL; 17297ec681f3Smrg break; 17307ec681f3Smrg case PIPE_TESS_SPACING_EQUAL: 17317ec681f3Smrg partitioning = SWR_TS_INTEGER; 17327ec681f3Smrg break; 17337ec681f3Smrg default: 17347ec681f3Smrg assert(0); 17357ec681f3Smrg } 17367ec681f3Smrg 17377ec681f3Smrg if (tes_point_mode) { 17387ec681f3Smrg topology = SWR_TS_OUTPUT_POINT; 17397ec681f3Smrg postDSTopology = TOP_POINT_LIST; 17407ec681f3Smrg } 17417ec681f3Smrg else if (tes_prim_mode == PIPE_PRIM_LINES) { 17427ec681f3Smrg topology = SWR_TS_OUTPUT_LINE; 17437ec681f3Smrg } 17447ec681f3Smrg else if (tes_vertex_order_cw) { 17457ec681f3Smrg topology = SWR_TS_OUTPUT_TRI_CW; 17467ec681f3Smrg } 17477ec681f3Smrg else { 17487ec681f3Smrg topology = SWR_TS_OUTPUT_TRI_CCW; 17497ec681f3Smrg } 17507ec681f3Smrg 17517ec681f3Smrg pTS->domain = type; 17527ec681f3Smrg pTS->tsOutputTopology = topology; 17537ec681f3Smrg pTS->partitioning = partitioning; 17547ec681f3Smrg pTS->numDsOutputAttribs = info->num_outputs; 17557ec681f3Smrg pTS->postDSTopology = postDSTopology; 17567ec681f3Smrg 17577ec681f3Smrg pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM; 17587ec681f3Smrg pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 17597ec681f3Smrg pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 17607ec681f3Smrg pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT; 17617ec681f3Smrg 17627ec681f3Smrg struct swr_tess_evaluation_shader *tes = ctx->tes; 17637ec681f3Smrg 17647ec681f3Smrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 17657ec681f3Smrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 17667ec681f3Smrg 17677ec681f3Smrg memset(outputs, 0, sizeof(outputs)); 17687ec681f3Smrg 17697ec681f3Smrg AttrBuilder attrBuilder; 17707ec681f3Smrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 17717ec681f3Smrg 17727ec681f3Smrg std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 17737ec681f3Smrg PointerType::get(mInt8Ty, 0), 17747ec681f3Smrg PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)}; 17757ec681f3Smrg FunctionType *tesFuncType = 17767ec681f3Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false); 17777ec681f3Smrg 17787ec681f3Smrg // create new vertex shader function 17797ec681f3Smrg auto pFunction = Function::Create(tesFuncType, 17807ec681f3Smrg GlobalValue::ExternalLinkage, 17817ec681f3Smrg "TES", 17827ec681f3Smrg JM()->mpCurrentModule); 17837ec681f3Smrg 17847ec681f3Smrg#if LLVM_VERSION_MAJOR < 5 17857ec681f3Smrg AttributeSet attrSet = AttributeSet::get( 17867ec681f3Smrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 17877ec681f3Smrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 17887ec681f3Smrg#else 17897ec681f3Smrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 17907ec681f3Smrg#endif 17917ec681f3Smrg 17927ec681f3Smrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 17937ec681f3Smrg IRB()->SetInsertPoint(block); 17947ec681f3Smrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 17957ec681f3Smrg 17967ec681f3Smrg auto argitr = pFunction->arg_begin(); 17977ec681f3Smrg Value *hPrivateData = &*argitr++; 17987ec681f3Smrg hPrivateData->setName("hPrivateData"); 17997ec681f3Smrg Value *pWorkerData = &*argitr++; 18007ec681f3Smrg pWorkerData->setName("pWorkerData"); 18017ec681f3Smrg Value *pTesCtx = &*argitr++; 18027ec681f3Smrg pTesCtx->setName("tesCtx"); 18037ec681f3Smrg 18047ec681f3Smrg Value *consts_ptr = 18057ec681f3Smrg GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)}); 18067ec681f3Smrg consts_ptr->setName("tes_constants"); 18077ec681f3Smrg Value *const_sizes_ptr = 18087ec681f3Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsTES}); 18097ec681f3Smrg const_sizes_ptr->setName("num_tes_constants"); 18107ec681f3Smrg 18117ec681f3Smrg struct lp_build_sampler_soa *sampler = 18127ec681f3Smrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL); 18137ec681f3Smrg assert(sampler != nullptr); 18147ec681f3Smrg 18157ec681f3Smrg struct lp_bld_tgsi_system_values system_values; 18167ec681f3Smrg memset(&system_values, 0, sizeof(system_values)); 181701e04c3fSmrg 18187ec681f3Smrg // Load and calculate system values 18197ec681f3Smrg // Tessellation coordinates (gl_TessCoord) 18207ec681f3Smrg Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset"); 18217ec681f3Smrg Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride"); 18227ec681f3Smrg Value *vecIndex = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}); 18237ec681f3Smrg 18247ec681f3Smrg Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3)); 18257ec681f3Smrg 18267ec681f3Smrg Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU"); 18277ec681f3Smrg STORE(tessCoordU, tess_coord, {0, 0}); 18287ec681f3Smrg Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV"); 18297ec681f3Smrg STORE(tessCoordV, tess_coord, {0, 1}); 18307ec681f3Smrg Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW"); 18317ec681f3Smrg STORE(tessCoordW, tess_coord, {0, 2}); 18327ec681f3Smrg system_values.tess_coord = wrap(tess_coord); 18337ec681f3Smrg 18347ec681f3Smrg // Primitive ID 18357ec681f3Smrg system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID")); 18367ec681f3Smrg 18377ec681f3Smrg // Tessellation factors 18387ec681f3Smrg Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}); 18397ec681f3Smrg Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)}); 18407ec681f3Smrg 18417ec681f3Smrg assert(SWR_NUM_OUTER_TESS_FACTORS == 4); 18427ec681f3Smrg Value* sys_value_outer_factors = UndefValue::get(getVectorType(mFP32Ty, 4)); 18437ec681f3Smrg for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) { 18447ec681f3Smrg Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i}); 18457ec681f3Smrg sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter"); 18467ec681f3Smrg } 18477ec681f3Smrg system_values.tess_outer = wrap(sys_value_outer_factors); 184801e04c3fSmrg 18497ec681f3Smrg assert(SWR_NUM_INNER_TESS_FACTORS == 2); 18507ec681f3Smrg Value* sys_value_inner_factors = UndefValue::get(getVectorType(mFP32Ty, 4)); 18517ec681f3Smrg for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) { 18527ec681f3Smrg Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i}); 18537ec681f3Smrg sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner"); 18547ec681f3Smrg } 18557ec681f3Smrg system_values.tess_inner = wrap(sys_value_inner_factors); 185601e04c3fSmrg 18577ec681f3Smrg if (verbose_shader) 18587ec681f3Smrg { 18597ec681f3Smrg lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord); 18607ec681f3Smrg } 186101e04c3fSmrg 18627ec681f3Smrg struct tgsi_shader_info *pPrevShader = nullptr; 186301e04c3fSmrg 18647ec681f3Smrg if (ctx->tcs) { 18657ec681f3Smrg pPrevShader = &ctx->tcs->info.base; 18667ec681f3Smrg } 18677ec681f3Smrg else { 18687ec681f3Smrg pPrevShader = &ctx->vs->info.base; 18697ec681f3Smrg } 187001e04c3fSmrg 18717ec681f3Smrg // Figure out how many per-patch attributes we have 18727ec681f3Smrg unsigned perPatchAttrs = 0; 18737ec681f3Smrg unsigned genericAttrs = 0; 18747ec681f3Smrg unsigned tessLevelAttrs = 0; 18757ec681f3Smrg unsigned sgvAttrs = 0; 18767ec681f3Smrg for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) { 18777ec681f3Smrg switch (pPrevShader->output_semantic_name[slot]) { 18787ec681f3Smrg case TGSI_SEMANTIC_PATCH: 18797ec681f3Smrg perPatchAttrs++; 18807ec681f3Smrg break; 18817ec681f3Smrg case TGSI_SEMANTIC_GENERIC: 18827ec681f3Smrg genericAttrs++; 18837ec681f3Smrg break; 18847ec681f3Smrg case TGSI_SEMANTIC_TESSINNER: 18857ec681f3Smrg case TGSI_SEMANTIC_TESSOUTER: 18867ec681f3Smrg tessLevelAttrs++; 18877ec681f3Smrg break; 18887ec681f3Smrg case TGSI_SEMANTIC_POSITION: 18897ec681f3Smrg case TGSI_SEMANTIC_CLIPDIST: 18907ec681f3Smrg case TGSI_SEMANTIC_PSIZE: 18917ec681f3Smrg sgvAttrs++; 18927ec681f3Smrg break; 18937ec681f3Smrg default: 18947ec681f3Smrg assert(!"Unknown semantic input in TES"); 18957ec681f3Smrg } 18967ec681f3Smrg } 189701e04c3fSmrg 18987ec681f3Smrg std::vector<Constant *> mapConstants; 18997ec681f3Smrg Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 19007ec681f3Smrg Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 19017ec681f3Smrg for (unsigned slot = 0; slot < info->num_inputs; slot++) { 19027ec681f3Smrg ubyte semantic_name = info->input_semantic_name[slot]; 19037ec681f3Smrg ubyte semantic_idx = info->input_semantic_index[slot]; 190401e04c3fSmrg 19057ec681f3Smrg // Where in TCS output is my attribute? 19067ec681f3Smrg // TESS_TODO: revisit after implement pass-through TCS 19077ec681f3Smrg unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader); 19087ec681f3Smrg assert(tcs_slot < PIPE_MAX_SHADER_OUTPUTS); 190901e04c3fSmrg 19107ec681f3Smrg // Skip tessellation levels - these go to the tessellator, not TES 19117ec681f3Smrg switch (semantic_name) { 19127ec681f3Smrg case TGSI_SEMANTIC_GENERIC: 19137ec681f3Smrg tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs; 19147ec681f3Smrg break; 19157ec681f3Smrg case TGSI_SEMANTIC_PATCH: 19167ec681f3Smrg tcs_slot = semantic_idx; 19177ec681f3Smrg break; 19187ec681f3Smrg case TGSI_SEMANTIC_POSITION: 19197ec681f3Smrg tcs_slot = VERTEX_POSITION_SLOT; 19207ec681f3Smrg break; 19217ec681f3Smrg case TGSI_SEMANTIC_CLIPDIST: 19227ec681f3Smrg case TGSI_SEMANTIC_PSIZE: 19237ec681f3Smrg break; 19247ec681f3Smrg default: 19257ec681f3Smrg assert(!"Unexpected semantic found while building TES input map"); 19267ec681f3Smrg } 19277ec681f3Smrg if (semantic_name == TGSI_SEMANTIC_PATCH) { 19287ec681f3Smrg STORE(C(tcs_slot), patchAttribMap, {0, slot}); 19297ec681f3Smrg } else { 19307ec681f3Smrg STORE(C(tcs_slot), vtxAttribMap, {0, slot}); 19317ec681f3Smrg } 19327ec681f3Smrg mapConstants.push_back(C(tcs_slot)); 19337ec681f3Smrg } 193401e04c3fSmrg 19357ec681f3Smrg // Build execution mask 19367ec681f3Smrg struct lp_build_mask_context mask; 19377ec681f3Smrg Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask"); 193801e04c3fSmrg 19397ec681f3Smrg if (verbose_shader) 19407ec681f3Smrg lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val)); 194101e04c3fSmrg 19427ec681f3Smrg lp_build_mask_begin(&mask, gallivm, 19437ec681f3Smrg lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 194401e04c3fSmrg 19457ec681f3Smrg struct swr_tes_llvm_iface tes_iface; 19467ec681f3Smrg 19477ec681f3Smrg tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input; 19487ec681f3Smrg tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input; 19497ec681f3Smrg 19507ec681f3Smrg tes_iface.pBuilder = this; 19517ec681f3Smrg tes_iface.pTesCtx = pTesCtx; 19527ec681f3Smrg tes_iface.pTsState = pTS; 19537ec681f3Smrg tes_iface.num_outputs = tes->info.base.num_outputs; 19547ec681f3Smrg tes_iface.info = info; 19557ec681f3Smrg tes_iface.pVtxAttribMap = vtxAttribMap; 19567ec681f3Smrg tes_iface.pPatchAttribMap = patchAttribMap; 19577ec681f3Smrg 19587ec681f3Smrg struct lp_build_tgsi_params params; 19597ec681f3Smrg memset(¶ms, 0, sizeof(params)); 19607ec681f3Smrg params.type = lp_type_float_vec(32, 32 * 8); 19617ec681f3Smrg params.mask = & mask; 19627ec681f3Smrg params.consts_ptr = wrap(consts_ptr); 19637ec681f3Smrg params.const_sizes_ptr = wrap(const_sizes_ptr); 19647ec681f3Smrg params.system_values = &system_values; 19657ec681f3Smrg params.inputs = inputs; 19667ec681f3Smrg params.context_ptr = wrap(hPrivateData); 19677ec681f3Smrg params.sampler = sampler; 19687ec681f3Smrg params.info = &tes->info.base; 19697ec681f3Smrg params.tes_iface = &tes_iface.base; 19707ec681f3Smrg 19717ec681f3Smrg // Build LLVM IR 19727ec681f3Smrg lp_build_tgsi_soa(gallivm, 19737ec681f3Smrg tes->pipe.tokens, 19747ec681f3Smrg ¶ms, 19757ec681f3Smrg outputs); 197601e04c3fSmrg 19777ec681f3Smrg lp_build_mask_end(&mask); 197801e04c3fSmrg 19797ec681f3Smrg sampler->destroy(sampler); 198001e04c3fSmrg 19817ec681f3Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 198201e04c3fSmrg 19837ec681f3Smrg // Write output attributes 19847ec681f3Smrg Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut"); 198501e04c3fSmrg 19867ec681f3Smrg for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 19877ec681f3Smrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 19887ec681f3Smrg if (!outputs[attrib][channel]) 19897ec681f3Smrg continue; 199001e04c3fSmrg 19917ec681f3Smrg Value *val = LOAD(unwrap(outputs[attrib][channel]));; 19927ec681f3Smrg Value *attribOffset = 19937ec681f3Smrg LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset}); 19947ec681f3Smrg 19957ec681f3Smrg // Assume we write possition 19967ec681f3Smrg Value* outputSlot = C(VERTEX_POSITION_SLOT); 19977ec681f3Smrg if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) { 19987ec681f3Smrg // No, it's a generic attribute, not a position - let's calculate output slot 19997ec681f3Smrg uint32_t outSlot = attrib; 20007ec681f3Smrg if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 20017ec681f3Smrg // this shader will write position, so in shader's term 20027ec681f3Smrg // output starts at attrib 1, but we will handle that separately, 20037ec681f3Smrg // so let's fix the outSlot 20047ec681f3Smrg outSlot--; 20057ec681f3Smrg } 20067ec681f3Smrg outputSlot = ADD(attribOffset, C(outSlot)); 20077ec681f3Smrg } 200801e04c3fSmrg 20097ec681f3Smrg Value *attribVecIndex = 20107ec681f3Smrg ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset); 201101e04c3fSmrg 20127ec681f3Smrg uint32_t outputComponent = 0; 20137ec681f3Smrg uint32_t curComp = outputComponent + channel; 20147ec681f3Smrg auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp))); 20157ec681f3Smrg STOREV(val, dclOut, {outValIndex}); 201601e04c3fSmrg 20177ec681f3Smrg if (verbose_shader) { 20187ec681f3Smrg lp_build_printf(gallivm, 20197ec681f3Smrg "TES output [%d][%d]", 20207ec681f3Smrg C(attrib), 20217ec681f3Smrg C(channel)); 20227ec681f3Smrg lp_build_print_value(gallivm, " = ", wrap(val)); 20237ec681f3Smrg } 20247ec681f3Smrg } 20257ec681f3Smrg } 202601e04c3fSmrg 20277ec681f3Smrg RET_VOID(); 202801e04c3fSmrg 20297ec681f3Smrg JM()->DumpToFile(pFunction, "src"); 20307ec681f3Smrg gallivm_verify_function(gallivm, wrap(pFunction)); 203101e04c3fSmrg 20327ec681f3Smrg gallivm_compile_module(gallivm); 20337ec681f3Smrg JM()->DumpToFile(pFunction, "optimized"); 203401e04c3fSmrg 20357ec681f3Smrg PFN_TES_FUNC pFunc = 20367ec681f3Smrg (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 203701e04c3fSmrg 20387ec681f3Smrg debug_printf("tess evaluation shader %p\n", pFunc); 20397ec681f3Smrg assert(pFunc && "Error: TessEvaluationShader = NULL"); 204001e04c3fSmrg 20417ec681f3Smrg JM()->DumpAsm(pFunction, "asm"); 204201e04c3fSmrg 20437ec681f3Smrg JM()->mIsModuleFinalized = true; 204401e04c3fSmrg 20457ec681f3Smrg return pFunc; 20467ec681f3Smrg} 204701e04c3fSmrg 20487ec681f3SmrgPFN_TCS_FUNC 20497ec681f3SmrgBuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key) 20507ec681f3Smrg{ 20517ec681f3Smrg SWR_TS_STATE *pTS = &ctx->tsState; 20527ec681f3Smrg struct tgsi_shader_info *info = &ctx->tcs->info.base; 205301e04c3fSmrg 20547ec681f3Smrg pTS->numHsInputAttribs = info->num_inputs; 20557ec681f3Smrg pTS->numHsOutputAttribs = info->num_outputs; 205601e04c3fSmrg 20577ec681f3Smrg pTS->hsAllocationSize = sizeof(ScalarPatch); 205801e04c3fSmrg 20597ec681f3Smrg pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 20607ec681f3Smrg pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; 206101e04c3fSmrg 20627ec681f3Smrg struct swr_tess_control_shader *tcs = ctx->tcs; 206301e04c3fSmrg 206401e04c3fSmrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 206501e04c3fSmrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 206601e04c3fSmrg 206701e04c3fSmrg memset(outputs, 0, sizeof(outputs)); 206801e04c3fSmrg 206901e04c3fSmrg AttrBuilder attrBuilder; 207001e04c3fSmrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 207101e04c3fSmrg 20727ec681f3Smrg std::vector<Type *> tcsArgs{ 20737ec681f3Smrg PointerType::get(Gen_swr_draw_context(JM()), 0), 20747ec681f3Smrg PointerType::get(mInt8Ty, 0), 20757ec681f3Smrg PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)}; 20767ec681f3Smrg FunctionType *tcsFuncType = 20777ec681f3Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false); 207801e04c3fSmrg 207901e04c3fSmrg // create new vertex shader function 20807ec681f3Smrg auto pFunction = Function::Create(tcsFuncType, 208101e04c3fSmrg GlobalValue::ExternalLinkage, 20827ec681f3Smrg "TCS", 208301e04c3fSmrg JM()->mpCurrentModule); 20847ec681f3Smrg 20857ec681f3Smrg#if LLVM_VERSION_MAJOR < 5 208601e04c3fSmrg AttributeSet attrSet = AttributeSet::get( 208701e04c3fSmrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 208801e04c3fSmrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 208901e04c3fSmrg#else 209001e04c3fSmrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 209101e04c3fSmrg#endif 209201e04c3fSmrg 209301e04c3fSmrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 209401e04c3fSmrg IRB()->SetInsertPoint(block); 209501e04c3fSmrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 209601e04c3fSmrg 209701e04c3fSmrg auto argitr = pFunction->arg_begin(); 209801e04c3fSmrg Value *hPrivateData = &*argitr++; 209901e04c3fSmrg hPrivateData->setName("hPrivateData"); 210001e04c3fSmrg Value *pWorkerData = &*argitr++; 210101e04c3fSmrg pWorkerData->setName("pWorkerData"); 21027ec681f3Smrg Value *pTcsCtx = &*argitr++; 21037ec681f3Smrg pTcsCtx->setName("tcsCtx"); 210401e04c3fSmrg 210501e04c3fSmrg Value *consts_ptr = 21067ec681f3Smrg GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)}); 21077ec681f3Smrg consts_ptr->setName("tcs_constants"); 210801e04c3fSmrg Value *const_sizes_ptr = 21097ec681f3Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS}); 21107ec681f3Smrg const_sizes_ptr->setName("num_tcs_constants"); 211101e04c3fSmrg 211201e04c3fSmrg struct lp_build_sampler_soa *sampler = 21137ec681f3Smrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL); 21147ec681f3Smrg assert(sampler != nullptr); 211501e04c3fSmrg 211601e04c3fSmrg struct lp_bld_tgsi_system_values system_values; 211701e04c3fSmrg memset(&system_values, 0, sizeof(system_values)); 211801e04c3fSmrg 21197ec681f3Smrg system_values.prim_id = 21207ec681f3Smrg wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID})); 21217ec681f3Smrg 21227ec681f3Smrg system_values.invocation_id = wrap(VBROADCAST(C(0))); 21237ec681f3Smrg system_values.vertices_in = wrap(C(tcs->vertices_per_patch)); 21247ec681f3Smrg 21257ec681f3Smrg if (verbose_shader) { 21267ec681f3Smrg lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id); 21277ec681f3Smrg lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id); 21287ec681f3Smrg lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in); 21297ec681f3Smrg } 21307ec681f3Smrg 21317ec681f3Smrg std::vector<Constant *> mapConstants; 21327ec681f3Smrg Value *vtxAttribMap = 21337ec681f3Smrg ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 21347ec681f3Smrg 213501e04c3fSmrg for (unsigned slot = 0; slot < info->num_inputs; slot++) { 213601e04c3fSmrg ubyte semantic_name = info->input_semantic_name[slot]; 213701e04c3fSmrg ubyte semantic_idx = info->input_semantic_index[slot]; 213801e04c3fSmrg 21397ec681f3Smrg unsigned vs_slot = 21407ec681f3Smrg locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 21417ec681f3Smrg assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS); 214201e04c3fSmrg 214301e04c3fSmrg vs_slot += VERTEX_ATTRIB_START_SLOT; 214401e04c3fSmrg 21457ec681f3Smrg if (ctx->vs->info.base.output_semantic_name[0] 21467ec681f3Smrg == TGSI_SEMANTIC_POSITION) 214701e04c3fSmrg vs_slot--; 214801e04c3fSmrg 214901e04c3fSmrg if (semantic_name == TGSI_SEMANTIC_POSITION) 215001e04c3fSmrg vs_slot = VERTEX_POSITION_SLOT; 215101e04c3fSmrg 215201e04c3fSmrg STORE(C(vs_slot), vtxAttribMap, {0, slot}); 215301e04c3fSmrg mapConstants.push_back(C(vs_slot)); 215401e04c3fSmrg } 215501e04c3fSmrg 21567ec681f3Smrg // Prepare map of output attributes. Needed when shader instance wants 21577ec681f3Smrg // to read own output or output of other instance, which is allowed in TCS 21587ec681f3Smrg Value *vtxOutputAttribMap = 21597ec681f3Smrg ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 21607ec681f3Smrg // Map for per-patch attributes 21617ec681f3Smrg Value *patchOutputAttribMap = 21627ec681f3Smrg ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 21637ec681f3Smrg for (unsigned slot = 0; slot < info->num_outputs; slot++) { 21647ec681f3Smrg ubyte name = info->output_semantic_name[slot]; 21657ec681f3Smrg int32_t idx = info->output_semantic_index[slot]; 21667ec681f3Smrg if (name == TGSI_SEMANTIC_PATCH) { 21677ec681f3Smrg STORE(C(idx), patchOutputAttribMap, {0, slot}); 21687ec681f3Smrg } else { 21697ec681f3Smrg int32_t target_slot = slot; 21707ec681f3Smrg if (name == TGSI_SEMANTIC_GENERIC) { 21717ec681f3Smrg target_slot += VERTEX_ATTRIB_START_SLOT; 21727ec681f3Smrg } 21737ec681f3Smrg // Now normalize target slot 21747ec681f3Smrg for (ubyte as = 0; as < slot; as++) { 21757ec681f3Smrg ubyte name = info->output_semantic_name[as]; 21767ec681f3Smrg switch (name) { 21777ec681f3Smrg case TGSI_SEMANTIC_TESSOUTER: 21787ec681f3Smrg case TGSI_SEMANTIC_TESSINNER: 21797ec681f3Smrg case TGSI_SEMANTIC_PATCH: 21807ec681f3Smrg case TGSI_SEMANTIC_POSITION: 21817ec681f3Smrg target_slot--; 21827ec681f3Smrg } 21837ec681f3Smrg } 21847ec681f3Smrg if (name == TGSI_SEMANTIC_POSITION) { 21857ec681f3Smrg target_slot = VERTEX_POSITION_SLOT; 21867ec681f3Smrg } 21877ec681f3Smrg STORE(C(target_slot), vtxOutputAttribMap, {0, slot}); 21887ec681f3Smrg mapConstants.push_back(C(target_slot)); 21897ec681f3Smrg } 219001e04c3fSmrg } 219101e04c3fSmrg 21927ec681f3Smrg struct lp_build_mask_context mask; 21937ec681f3Smrg Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask"); 21947ec681f3Smrg lp_build_mask_begin( 21957ec681f3Smrg &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 21967ec681f3Smrg 21977ec681f3Smrg struct swr_tcs_llvm_iface tcs_iface; 21987ec681f3Smrg 21997ec681f3Smrg tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output; 22007ec681f3Smrg tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input; 22017ec681f3Smrg tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output; 22027ec681f3Smrg tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier; 22037ec681f3Smrg tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue; 22047ec681f3Smrg tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue; 22057ec681f3Smrg 22067ec681f3Smrg tcs_iface.pBuilder = this; 22077ec681f3Smrg tcs_iface.pTcsCtx = pTcsCtx; 22087ec681f3Smrg tcs_iface.pTsState = pTS; 22097ec681f3Smrg tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; 22107ec681f3Smrg tcs_iface.info = info; 22117ec681f3Smrg tcs_iface.pVtxAttribMap = vtxAttribMap; 22127ec681f3Smrg tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap; 22137ec681f3Smrg tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap; 22147ec681f3Smrg 22157ec681f3Smrg struct lp_build_tgsi_params params; 22167ec681f3Smrg memset(¶ms, 0, sizeof(params)); 22177ec681f3Smrg params.type = lp_type_float_vec(32, 32 * 8); 22187ec681f3Smrg params.mask = &mask; 22197ec681f3Smrg params.consts_ptr = wrap(consts_ptr); 22207ec681f3Smrg params.const_sizes_ptr = wrap(const_sizes_ptr); 22217ec681f3Smrg params.system_values = &system_values; 22227ec681f3Smrg params.inputs = inputs; 22237ec681f3Smrg params.context_ptr = wrap(hPrivateData); 22247ec681f3Smrg params.sampler = sampler; 22257ec681f3Smrg params.info = &tcs->info.base; 22267ec681f3Smrg params.tcs_iface = &tcs_iface.base; 22277ec681f3Smrg 22287ec681f3Smrg lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, ¶ms, outputs); 222901e04c3fSmrg 223001e04c3fSmrg lp_build_mask_end(&mask); 223101e04c3fSmrg 223201e04c3fSmrg sampler->destroy(sampler); 223301e04c3fSmrg 223401e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 223501e04c3fSmrg RET_VOID(); 223601e04c3fSmrg 22377ec681f3Smrg JM()->DumpToFile(pFunction, "src"); 223801e04c3fSmrg gallivm_verify_function(gallivm, wrap(pFunction)); 223901e04c3fSmrg gallivm_compile_module(gallivm); 22407ec681f3Smrg JM()->DumpToFile(pFunction, "optimized"); 224101e04c3fSmrg 22427ec681f3Smrg PFN_TCS_FUNC pFunc = 22437ec681f3Smrg (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 224401e04c3fSmrg 22457ec681f3Smrg debug_printf("tess control shader %p\n", pFunc); 22467ec681f3Smrg assert(pFunc && "Error: TessControlShader = NULL"); 22477ec681f3Smrg JM()->DumpAsm(pFunction, "asm"); 224801e04c3fSmrg 224901e04c3fSmrg JM()->mIsModuleFinalized = true; 225001e04c3fSmrg 225101e04c3fSmrg return pFunc; 225201e04c3fSmrg} 225301e04c3fSmrg 22547ec681f3Smrg 225501e04c3fSmrgPFN_GS_FUNC 225601e04c3fSmrgswr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key) 225701e04c3fSmrg{ 225801e04c3fSmrg BuilderSWR builder( 225901e04c3fSmrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 226001e04c3fSmrg "GS"); 226101e04c3fSmrg PFN_GS_FUNC func = builder.CompileGS(ctx, key); 226201e04c3fSmrg 22637ec681f3Smrg ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func)))); 22647ec681f3Smrg return func; 22657ec681f3Smrg} 22667ec681f3Smrg 22677ec681f3SmrgPFN_TCS_FUNC 22687ec681f3Smrgswr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key) 22697ec681f3Smrg{ 22707ec681f3Smrg BuilderSWR builder( 22717ec681f3Smrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 22727ec681f3Smrg "TCS"); 22737ec681f3Smrg PFN_TCS_FUNC func = builder.CompileTCS(ctx, key); 22747ec681f3Smrg 22757ec681f3Smrg ctx->tcs->map.insert( 22767ec681f3Smrg std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func)))); 22777ec681f3Smrg 22787ec681f3Smrg return func; 22797ec681f3Smrg} 22807ec681f3Smrg 22817ec681f3SmrgPFN_TES_FUNC 22827ec681f3Smrgswr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key) 22837ec681f3Smrg{ 22847ec681f3Smrg BuilderSWR builder( 22857ec681f3Smrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 22867ec681f3Smrg "TES"); 22877ec681f3Smrg PFN_TES_FUNC func = builder.CompileTES(ctx, key); 22887ec681f3Smrg 22897ec681f3Smrg ctx->tes->map.insert( 22907ec681f3Smrg std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func)))); 22917ec681f3Smrg 229201e04c3fSmrg return func; 229301e04c3fSmrg} 229401e04c3fSmrg 229501e04c3fSmrgvoid 229601e04c3fSmrgBuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel) 229701e04c3fSmrg{ 229801e04c3fSmrg#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS 229901e04c3fSmrg // interleave the simdvertex components into the dest simd16vertex 230001e04c3fSmrg // slot16offset = slot8offset * 2 230101e04c3fSmrg // comp16offset = comp8offset * 2 + alternateOffset 230201e04c3fSmrg 230301e04c3fSmrg Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset }); 230401e04c3fSmrg Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } ); 230501e04c3fSmrg STORE(pVal, pOut, {channel * 2}); 230601e04c3fSmrg#else 230701e04c3fSmrg Value *pOut = GEP(pVtxOutput, {0, 0, slot}); 230801e04c3fSmrg STORE(pVal, pOut, {0, channel}); 23097ec681f3Smrg if (verbose_vs_shader) { 23107ec681f3Smrg lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel)); 23117ec681f3Smrg lp_build_print_value(gallivm, "", wrap(pVal)); 23127ec681f3Smrg } 231301e04c3fSmrg#endif 231401e04c3fSmrg} 231501e04c3fSmrg 231601e04c3fSmrgPFN_VERTEX_FUNC 231701e04c3fSmrgBuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) 231801e04c3fSmrg{ 231901e04c3fSmrg struct swr_vertex_shader *swr_vs = ctx->vs; 232001e04c3fSmrg 232101e04c3fSmrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 232201e04c3fSmrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 232301e04c3fSmrg 232401e04c3fSmrg memset(outputs, 0, sizeof(outputs)); 232501e04c3fSmrg 232601e04c3fSmrg AttrBuilder attrBuilder; 232701e04c3fSmrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 232801e04c3fSmrg 232901e04c3fSmrg std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 233001e04c3fSmrg PointerType::get(mInt8Ty, 0), 233101e04c3fSmrg PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; 233201e04c3fSmrg FunctionType *vsFuncType = 233301e04c3fSmrg FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); 233401e04c3fSmrg 233501e04c3fSmrg // create new vertex shader function 233601e04c3fSmrg auto pFunction = Function::Create(vsFuncType, 233701e04c3fSmrg GlobalValue::ExternalLinkage, 233801e04c3fSmrg "VS", 233901e04c3fSmrg JM()->mpCurrentModule); 23407ec681f3Smrg#if LLVM_VERSION_MAJOR < 5 234101e04c3fSmrg AttributeSet attrSet = AttributeSet::get( 234201e04c3fSmrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 234301e04c3fSmrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 234401e04c3fSmrg#else 234501e04c3fSmrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 234601e04c3fSmrg#endif 234701e04c3fSmrg 234801e04c3fSmrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 234901e04c3fSmrg IRB()->SetInsertPoint(block); 235001e04c3fSmrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 235101e04c3fSmrg 235201e04c3fSmrg auto argitr = pFunction->arg_begin(); 235301e04c3fSmrg Value *hPrivateData = &*argitr++; 235401e04c3fSmrg hPrivateData->setName("hPrivateData"); 235501e04c3fSmrg Value *pWorkerData = &*argitr++; 235601e04c3fSmrg pWorkerData->setName("pWorkerData"); 235701e04c3fSmrg Value *pVsCtx = &*argitr++; 235801e04c3fSmrg pVsCtx->setName("vsCtx"); 23597ec681f3Smrg 236001e04c3fSmrg Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); 236101e04c3fSmrg 236201e04c3fSmrg consts_ptr->setName("vs_constants"); 236301e04c3fSmrg Value *const_sizes_ptr = 236401e04c3fSmrg GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); 236501e04c3fSmrg const_sizes_ptr->setName("num_vs_constants"); 236601e04c3fSmrg 236701e04c3fSmrg Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); 236801e04c3fSmrg#if USE_SIMD16_VS 236901e04c3fSmrg vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0)); 237001e04c3fSmrg#endif 237101e04c3fSmrg 237201e04c3fSmrg for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 237301e04c3fSmrg const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; 237401e04c3fSmrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 237501e04c3fSmrg if (mask & (1 << channel)) { 237601e04c3fSmrg inputs[attrib][channel] = 237701e04c3fSmrg wrap(LOAD(vtxInput, {0, 0, attrib, channel})); 237801e04c3fSmrg } 237901e04c3fSmrg } 238001e04c3fSmrg } 238101e04c3fSmrg 238201e04c3fSmrg struct lp_build_sampler_soa *sampler = 238301e04c3fSmrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); 23847ec681f3Smrg assert(sampler != nullptr); 238501e04c3fSmrg 238601e04c3fSmrg struct lp_bld_tgsi_system_values system_values; 238701e04c3fSmrg memset(&system_values, 0, sizeof(system_values)); 238801e04c3fSmrg system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); 238901e04c3fSmrg 239001e04c3fSmrg#if USE_SIMD16_VS 239101e04c3fSmrg system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16})); 239201e04c3fSmrg#else 239301e04c3fSmrg system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); 239401e04c3fSmrg#endif 239501e04c3fSmrg 239601e04c3fSmrg#if USE_SIMD16_VS 239701e04c3fSmrg uint32_t vectorWidth = mVWidth16; 239801e04c3fSmrg#else 239901e04c3fSmrg uint32_t vectorWidth = mVWidth; 240001e04c3fSmrg#endif 240101e04c3fSmrg 24027ec681f3Smrg struct lp_build_tgsi_params params; 24037ec681f3Smrg memset(¶ms, 0, sizeof(params)); 24047ec681f3Smrg params.type = lp_type_float_vec(32, 32 * vectorWidth); 24057ec681f3Smrg params.consts_ptr = wrap(consts_ptr); 24067ec681f3Smrg params.const_sizes_ptr = wrap(const_sizes_ptr); 24077ec681f3Smrg params.system_values = &system_values; 24087ec681f3Smrg params.inputs = inputs; 24097ec681f3Smrg params.context_ptr = wrap(hPrivateData); 24107ec681f3Smrg params.sampler = sampler; 24117ec681f3Smrg params.info = &swr_vs->info.base; 24127ec681f3Smrg 241301e04c3fSmrg lp_build_tgsi_soa(gallivm, 241401e04c3fSmrg swr_vs->pipe.tokens, 24157ec681f3Smrg ¶ms, 24167ec681f3Smrg outputs); 241701e04c3fSmrg 241801e04c3fSmrg sampler->destroy(sampler); 241901e04c3fSmrg 242001e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 242101e04c3fSmrg 242201e04c3fSmrg Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); 242301e04c3fSmrg#if USE_SIMD16_VS 242401e04c3fSmrg vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0)); 242501e04c3fSmrg#endif 242601e04c3fSmrg 242701e04c3fSmrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 242801e04c3fSmrg for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 242901e04c3fSmrg if (!outputs[attrib][channel]) 243001e04c3fSmrg continue; 243101e04c3fSmrg 243201e04c3fSmrg Value *val; 243301e04c3fSmrg uint32_t outSlot; 243401e04c3fSmrg 243501e04c3fSmrg if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 243601e04c3fSmrg if (channel != VERTEX_SGV_POINT_SIZE_COMP) 243701e04c3fSmrg continue; 243801e04c3fSmrg val = LOAD(unwrap(outputs[attrib][0])); 243901e04c3fSmrg outSlot = VERTEX_SGV_SLOT; 244001e04c3fSmrg } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 244101e04c3fSmrg val = LOAD(unwrap(outputs[attrib][channel])); 244201e04c3fSmrg outSlot = VERTEX_POSITION_SLOT; 244301e04c3fSmrg } else { 244401e04c3fSmrg val = LOAD(unwrap(outputs[attrib][channel])); 244501e04c3fSmrg outSlot = VERTEX_ATTRIB_START_SLOT + attrib; 244601e04c3fSmrg if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 244701e04c3fSmrg outSlot--; 244801e04c3fSmrg } 244901e04c3fSmrg 245001e04c3fSmrg WriteVS(val, pVsCtx, vtxOutput, outSlot, channel); 245101e04c3fSmrg } 245201e04c3fSmrg } 245301e04c3fSmrg 245401e04c3fSmrg if (ctx->rasterizer->clip_plane_enable || 245501e04c3fSmrg swr_vs->info.base.culldist_writemask) { 245601e04c3fSmrg unsigned clip_mask = ctx->rasterizer->clip_plane_enable; 245701e04c3fSmrg 245801e04c3fSmrg unsigned cv = 0; 245901e04c3fSmrg if (swr_vs->info.base.writes_clipvertex) { 246001e04c3fSmrg cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, 246101e04c3fSmrg &swr_vs->info.base); 246201e04c3fSmrg } else { 246301e04c3fSmrg for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 246401e04c3fSmrg if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 246501e04c3fSmrg swr_vs->info.base.output_semantic_index[i] == 0) { 246601e04c3fSmrg cv = i; 246701e04c3fSmrg break; 246801e04c3fSmrg } 246901e04c3fSmrg } 247001e04c3fSmrg } 24717ec681f3Smrg assert(cv < PIPE_MAX_SHADER_OUTPUTS); 247201e04c3fSmrg LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], ""); 247301e04c3fSmrg LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], ""); 247401e04c3fSmrg LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], ""); 247501e04c3fSmrg LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], ""); 247601e04c3fSmrg 24777ec681f3Smrg tgsi_shader_info *pLastFE = &ctx->vs->info.base; 24787ec681f3Smrg 24797ec681f3Smrg if (ctx->gs) { 24807ec681f3Smrg pLastFE = &ctx->gs->info.base; 24817ec681f3Smrg } 24827ec681f3Smrg else if (ctx->tes) { 24837ec681f3Smrg pLastFE = &ctx->tes->info.base; 24847ec681f3Smrg } 24857ec681f3Smrg else if (ctx->tcs) { 24867ec681f3Smrg pLastFE = &ctx->tcs->info.base; 24877ec681f3Smrg } 24887ec681f3Smrg 248901e04c3fSmrg for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) { 249001e04c3fSmrg // clip distance overrides user clip planes 24917ec681f3Smrg if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) || 24927ec681f3Smrg ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) { 24937ec681f3Smrg unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE); 24947ec681f3Smrg assert(cv < PIPE_MAX_SHADER_OUTPUTS); 249501e04c3fSmrg if (val < 4) { 249601e04c3fSmrg LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); 249701e04c3fSmrg WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 249801e04c3fSmrg } else { 249901e04c3fSmrg LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], ""); 250001e04c3fSmrg WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 250101e04c3fSmrg } 250201e04c3fSmrg continue; 250301e04c3fSmrg } 250401e04c3fSmrg 250501e04c3fSmrg if (!(clip_mask & (1 << val))) 250601e04c3fSmrg continue; 250701e04c3fSmrg 250801e04c3fSmrg Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0})); 250901e04c3fSmrg Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1})); 251001e04c3fSmrg Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2})); 251101e04c3fSmrg Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3})); 251201e04c3fSmrg#if USE_SIMD16_VS 251301e04c3fSmrg Value *bpx = VBROADCAST_16(px); 251401e04c3fSmrg Value *bpy = VBROADCAST_16(py); 251501e04c3fSmrg Value *bpz = VBROADCAST_16(pz); 251601e04c3fSmrg Value *bpw = VBROADCAST_16(pw); 251701e04c3fSmrg#else 251801e04c3fSmrg Value *bpx = VBROADCAST(px); 251901e04c3fSmrg Value *bpy = VBROADCAST(py); 252001e04c3fSmrg Value *bpz = VBROADCAST(pz); 252101e04c3fSmrg Value *bpw = VBROADCAST(pw); 252201e04c3fSmrg#endif 252301e04c3fSmrg Value *dist = FADD(FMUL(unwrap(cx), bpx), 252401e04c3fSmrg FADD(FMUL(unwrap(cy), bpy), 252501e04c3fSmrg FADD(FMUL(unwrap(cz), bpz), 252601e04c3fSmrg FMUL(unwrap(cw), bpw)))); 252701e04c3fSmrg 252801e04c3fSmrg if (val < 4) 252901e04c3fSmrg WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 253001e04c3fSmrg else 253101e04c3fSmrg WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 253201e04c3fSmrg } 253301e04c3fSmrg } 253401e04c3fSmrg 253501e04c3fSmrg RET_VOID(); 253601e04c3fSmrg 25377ec681f3Smrg JM()->DumpToFile(pFunction, "vs_function1"); 253801e04c3fSmrg gallivm_verify_function(gallivm, wrap(pFunction)); 253901e04c3fSmrg gallivm_compile_module(gallivm); 25407ec681f3Smrg JM()->DumpToFile(pFunction, "vs_function2"); 254101e04c3fSmrg 254201e04c3fSmrg // lp_debug_dump_value(func); 254301e04c3fSmrg 254401e04c3fSmrg PFN_VERTEX_FUNC pFunc = 254501e04c3fSmrg (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 254601e04c3fSmrg 25477ec681f3Smrg JM()->DumpAsm(pFunction, "vs_function_asm"); 254801e04c3fSmrg debug_printf("vert shader %p\n", pFunc); 254901e04c3fSmrg assert(pFunc && "Error: VertShader = NULL"); 255001e04c3fSmrg 255101e04c3fSmrg JM()->mIsModuleFinalized = true; 255201e04c3fSmrg 255301e04c3fSmrg return pFunc; 255401e04c3fSmrg} 255501e04c3fSmrg 255601e04c3fSmrgPFN_VERTEX_FUNC 255701e04c3fSmrgswr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) 255801e04c3fSmrg{ 255901e04c3fSmrg if (!ctx->vs->pipe.tokens) 256001e04c3fSmrg return NULL; 256101e04c3fSmrg 256201e04c3fSmrg BuilderSWR builder( 256301e04c3fSmrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 256401e04c3fSmrg "VS"); 256501e04c3fSmrg PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); 256601e04c3fSmrg 25677ec681f3Smrg ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func)))); 256801e04c3fSmrg return func; 256901e04c3fSmrg} 257001e04c3fSmrg 257101e04c3fSmrgunsigned 257201e04c3fSmrgswr_so_adjust_attrib(unsigned in_attrib, 257301e04c3fSmrg swr_vertex_shader *swr_vs) 257401e04c3fSmrg{ 257501e04c3fSmrg ubyte semantic_name; 257601e04c3fSmrg unsigned attrib; 257701e04c3fSmrg 257801e04c3fSmrg attrib = in_attrib + VERTEX_ATTRIB_START_SLOT; 257901e04c3fSmrg 258001e04c3fSmrg if (swr_vs) { 258101e04c3fSmrg semantic_name = swr_vs->info.base.output_semantic_name[in_attrib]; 258201e04c3fSmrg if (semantic_name == TGSI_SEMANTIC_POSITION) { 258301e04c3fSmrg attrib = VERTEX_POSITION_SLOT; 258401e04c3fSmrg } else if (semantic_name == TGSI_SEMANTIC_PSIZE) { 258501e04c3fSmrg attrib = VERTEX_SGV_SLOT; 258601e04c3fSmrg } else if (semantic_name == TGSI_SEMANTIC_LAYER) { 258701e04c3fSmrg attrib = VERTEX_SGV_SLOT; 258801e04c3fSmrg } else { 258901e04c3fSmrg if (swr_vs->info.base.writes_position) { 259001e04c3fSmrg attrib--; 259101e04c3fSmrg } 259201e04c3fSmrg } 259301e04c3fSmrg } 259401e04c3fSmrg 259501e04c3fSmrg return attrib; 259601e04c3fSmrg} 259701e04c3fSmrg 259801e04c3fSmrgstatic unsigned 259901e04c3fSmrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) 260001e04c3fSmrg{ 260101e04c3fSmrg for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 260201e04c3fSmrg if ((info->output_semantic_name[i] == name) 260301e04c3fSmrg && (info->output_semantic_index[i] == index)) { 260401e04c3fSmrg return i; 260501e04c3fSmrg } 260601e04c3fSmrg } 260701e04c3fSmrg 260801e04c3fSmrg return 0xFFFFFFFF; 260901e04c3fSmrg} 261001e04c3fSmrg 261101e04c3fSmrgPFN_PIXEL_KERNEL 261201e04c3fSmrgBuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) 261301e04c3fSmrg{ 261401e04c3fSmrg struct swr_fragment_shader *swr_fs = ctx->fs; 261501e04c3fSmrg 261601e04c3fSmrg struct tgsi_shader_info *pPrevShader; 261701e04c3fSmrg if (ctx->gs) 261801e04c3fSmrg pPrevShader = &ctx->gs->info.base; 26197ec681f3Smrg else if (ctx->tes) 26207ec681f3Smrg pPrevShader = &ctx->tes->info.base; 262101e04c3fSmrg else 262201e04c3fSmrg pPrevShader = &ctx->vs->info.base; 262301e04c3fSmrg 262401e04c3fSmrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 262501e04c3fSmrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 262601e04c3fSmrg 262701e04c3fSmrg memset(inputs, 0, sizeof(inputs)); 262801e04c3fSmrg memset(outputs, 0, sizeof(outputs)); 262901e04c3fSmrg 263001e04c3fSmrg struct lp_build_sampler_soa *sampler = NULL; 263101e04c3fSmrg 263201e04c3fSmrg AttrBuilder attrBuilder; 263301e04c3fSmrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 263401e04c3fSmrg 263501e04c3fSmrg std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 263601e04c3fSmrg PointerType::get(mInt8Ty, 0), 263701e04c3fSmrg PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; 263801e04c3fSmrg FunctionType *funcType = 263901e04c3fSmrg FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); 264001e04c3fSmrg 264101e04c3fSmrg auto pFunction = Function::Create(funcType, 264201e04c3fSmrg GlobalValue::ExternalLinkage, 264301e04c3fSmrg "FS", 264401e04c3fSmrg JM()->mpCurrentModule); 26457ec681f3Smrg#if LLVM_VERSION_MAJOR < 5 264601e04c3fSmrg AttributeSet attrSet = AttributeSet::get( 264701e04c3fSmrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 264801e04c3fSmrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 264901e04c3fSmrg#else 265001e04c3fSmrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 265101e04c3fSmrg#endif 265201e04c3fSmrg 265301e04c3fSmrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 265401e04c3fSmrg IRB()->SetInsertPoint(block); 265501e04c3fSmrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 265601e04c3fSmrg 265701e04c3fSmrg auto args = pFunction->arg_begin(); 265801e04c3fSmrg Value *hPrivateData = &*args++; 265901e04c3fSmrg hPrivateData->setName("hPrivateData"); 266001e04c3fSmrg Value *pWorkerData = &*args++; 266101e04c3fSmrg pWorkerData->setName("pWorkerData"); 266201e04c3fSmrg Value *pPS = &*args++; 266301e04c3fSmrg pPS->setName("psCtx"); 266401e04c3fSmrg 266501e04c3fSmrg Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); 266601e04c3fSmrg consts_ptr->setName("fs_constants"); 266701e04c3fSmrg Value *const_sizes_ptr = 266801e04c3fSmrg GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); 266901e04c3fSmrg const_sizes_ptr->setName("num_fs_constants"); 267001e04c3fSmrg 267101e04c3fSmrg // load *pAttribs, *pPerspAttribs 267201e04c3fSmrg Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); 267301e04c3fSmrg Value *pPerspAttribs = 267401e04c3fSmrg LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); 267501e04c3fSmrg 267601e04c3fSmrg swr_fs->constantMask = 0; 267701e04c3fSmrg swr_fs->flatConstantMask = 0; 267801e04c3fSmrg swr_fs->pointSpriteMask = 0; 267901e04c3fSmrg 268001e04c3fSmrg for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 268101e04c3fSmrg const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; 268201e04c3fSmrg const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; 268301e04c3fSmrg const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; 268401e04c3fSmrg 268501e04c3fSmrg if (!mask) 268601e04c3fSmrg continue; 268701e04c3fSmrg 268801e04c3fSmrg // load i,j 268901e04c3fSmrg Value *vi = nullptr, *vj = nullptr; 269001e04c3fSmrg switch (interpLoc) { 269101e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTER: 269201e04c3fSmrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); 269301e04c3fSmrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); 269401e04c3fSmrg break; 269501e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTROID: 269601e04c3fSmrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); 269701e04c3fSmrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); 269801e04c3fSmrg break; 269901e04c3fSmrg case TGSI_INTERPOLATE_LOC_SAMPLE: 270001e04c3fSmrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); 270101e04c3fSmrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); 270201e04c3fSmrg break; 270301e04c3fSmrg } 270401e04c3fSmrg 270501e04c3fSmrg // load/compute w 270601e04c3fSmrg Value *vw = nullptr, *pAttribs; 270701e04c3fSmrg if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 270801e04c3fSmrg interpMode == TGSI_INTERPOLATE_COLOR) { 270901e04c3fSmrg pAttribs = pPerspAttribs; 271001e04c3fSmrg switch (interpLoc) { 271101e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTER: 271201e04c3fSmrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); 271301e04c3fSmrg break; 271401e04c3fSmrg case TGSI_INTERPOLATE_LOC_CENTROID: 271501e04c3fSmrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); 271601e04c3fSmrg break; 271701e04c3fSmrg case TGSI_INTERPOLATE_LOC_SAMPLE: 271801e04c3fSmrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); 271901e04c3fSmrg break; 272001e04c3fSmrg } 272101e04c3fSmrg } else { 272201e04c3fSmrg pAttribs = pRawAttribs; 272301e04c3fSmrg vw = VIMMED1(1.f); 272401e04c3fSmrg } 272501e04c3fSmrg 272601e04c3fSmrg vw->setName("w"); 272701e04c3fSmrg 272801e04c3fSmrg ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; 272901e04c3fSmrg ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; 273001e04c3fSmrg 273101e04c3fSmrg if (semantic_name == TGSI_SEMANTIC_FACE) { 273201e04c3fSmrg Value *ff = 273301e04c3fSmrg UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); 273401e04c3fSmrg ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); 273501e04c3fSmrg ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); 273601e04c3fSmrg 273701e04c3fSmrg inputs[attrib][0] = wrap(ff); 273801e04c3fSmrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 273901e04c3fSmrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 274001e04c3fSmrg inputs[attrib][3] = wrap(VIMMED1(1.0f)); 274101e04c3fSmrg continue; 274201e04c3fSmrg } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord 274301e04c3fSmrg if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == 274401e04c3fSmrg TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) { 274501e04c3fSmrg inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); 274601e04c3fSmrg inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); 274701e04c3fSmrg } else { 274801e04c3fSmrg inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX")); 274901e04c3fSmrg inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY")); 275001e04c3fSmrg } 275101e04c3fSmrg inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); 275201e04c3fSmrg inputs[attrib][3] = 275301e04c3fSmrg wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); 275401e04c3fSmrg continue; 27557ec681f3Smrg } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer 27567ec681f3Smrg Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex}); 27577ec681f3Smrg ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex"); 27587ec681f3Smrg inputs[attrib][0] = wrap(ff); 27597ec681f3Smrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 27607ec681f3Smrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 27617ec681f3Smrg inputs[attrib][3] = wrap(VIMMED1(0.0f)); 27627ec681f3Smrg continue; 27637ec681f3Smrg } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex 27647ec681f3Smrg Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex}); 27657ec681f3Smrg ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex"); 27667ec681f3Smrg inputs[attrib][0] = wrap(ff); 27677ec681f3Smrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 27687ec681f3Smrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 27697ec681f3Smrg inputs[attrib][3] = wrap(VIMMED1(0.0f)); 27707ec681f3Smrg continue; 277101e04c3fSmrg } 277201e04c3fSmrg unsigned linkedAttrib = 277301e04c3fSmrg locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1; 277401e04c3fSmrg 277501e04c3fSmrg uint32_t extraAttribs = 0; 277601e04c3fSmrg if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) { 277701e04c3fSmrg /* non-gs generated primID - need to grab from swizzleMap override */ 277801e04c3fSmrg linkedAttrib = pPrevShader->num_outputs - 1; 277901e04c3fSmrg swr_fs->constantMask |= 1 << linkedAttrib; 278001e04c3fSmrg extraAttribs++; 278101e04c3fSmrg } else if (semantic_name == TGSI_SEMANTIC_GENERIC && 278201e04c3fSmrg key.sprite_coord_enable & (1 << semantic_idx)) { 278301e04c3fSmrg /* we add an extra attrib to the backendState in swr_update_derived. */ 278401e04c3fSmrg linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1; 278501e04c3fSmrg swr_fs->pointSpriteMask |= (1 << linkedAttrib); 278601e04c3fSmrg extraAttribs++; 27877ec681f3Smrg } else if (linkedAttrib + 1 == 0xFFFFFFFF) { 278801e04c3fSmrg inputs[attrib][0] = wrap(VIMMED1(0.0f)); 278901e04c3fSmrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 279001e04c3fSmrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 279101e04c3fSmrg inputs[attrib][3] = wrap(VIMMED1(1.0f)); 279201e04c3fSmrg /* If we're reading in color and 2-sided lighting is enabled, we have 279301e04c3fSmrg * to keep going. 279401e04c3fSmrg */ 279501e04c3fSmrg if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside) 279601e04c3fSmrg continue; 279701e04c3fSmrg } else { 279801e04c3fSmrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 279901e04c3fSmrg swr_fs->constantMask |= 1 << linkedAttrib; 280001e04c3fSmrg } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 280101e04c3fSmrg swr_fs->flatConstantMask |= 1 << linkedAttrib; 280201e04c3fSmrg } 280301e04c3fSmrg } 280401e04c3fSmrg 280501e04c3fSmrg unsigned bcolorAttrib = 0xFFFFFFFF; 280601e04c3fSmrg Value *offset = NULL; 280701e04c3fSmrg if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { 280801e04c3fSmrg bcolorAttrib = locate_linkage( 28097ec681f3Smrg TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader); 281001e04c3fSmrg /* Neither front nor back colors were available. Nothing to load. */ 281101e04c3fSmrg if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) 281201e04c3fSmrg continue; 281301e04c3fSmrg /* If there is no front color, just always use the back color. */ 28147ec681f3Smrg if (linkedAttrib + 1 == 0xFFFFFFFF) 281501e04c3fSmrg linkedAttrib = bcolorAttrib; 281601e04c3fSmrg 281701e04c3fSmrg if (bcolorAttrib != 0xFFFFFFFF) { 28187ec681f3Smrg bcolorAttrib -= 1; 281901e04c3fSmrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 282001e04c3fSmrg swr_fs->constantMask |= 1 << bcolorAttrib; 282101e04c3fSmrg } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 282201e04c3fSmrg swr_fs->flatConstantMask |= 1 << bcolorAttrib; 282301e04c3fSmrg } 282401e04c3fSmrg 282501e04c3fSmrg unsigned diff = 12 * (bcolorAttrib - linkedAttrib); 282601e04c3fSmrg 282701e04c3fSmrg if (diff) { 282801e04c3fSmrg Value *back = 282901e04c3fSmrg XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); 283001e04c3fSmrg 283101e04c3fSmrg offset = MUL(back, C(diff)); 283201e04c3fSmrg offset->setName("offset"); 283301e04c3fSmrg } 283401e04c3fSmrg } 283501e04c3fSmrg } 283601e04c3fSmrg 283701e04c3fSmrg for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 283801e04c3fSmrg if (mask & (1 << channel)) { 283901e04c3fSmrg Value *indexA = C(linkedAttrib * 12 + channel); 284001e04c3fSmrg Value *indexB = C(linkedAttrib * 12 + channel + 4); 284101e04c3fSmrg Value *indexC = C(linkedAttrib * 12 + channel + 8); 284201e04c3fSmrg 284301e04c3fSmrg if (offset) { 284401e04c3fSmrg indexA = ADD(indexA, offset); 284501e04c3fSmrg indexB = ADD(indexB, offset); 284601e04c3fSmrg indexC = ADD(indexC, offset); 284701e04c3fSmrg } 284801e04c3fSmrg 284901e04c3fSmrg Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); 285001e04c3fSmrg Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); 285101e04c3fSmrg Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); 285201e04c3fSmrg 285301e04c3fSmrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 285401e04c3fSmrg inputs[attrib][channel] = wrap(va); 285501e04c3fSmrg } else { 285601e04c3fSmrg Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); 285701e04c3fSmrg 285801e04c3fSmrg vc = FMUL(vk, vc); 285901e04c3fSmrg 286001e04c3fSmrg Value *interp = FMUL(va, vi); 286101e04c3fSmrg Value *interp1 = FMUL(vb, vj); 286201e04c3fSmrg interp = FADD(interp, interp1); 286301e04c3fSmrg interp = FADD(interp, vc); 286401e04c3fSmrg if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 286501e04c3fSmrg interpMode == TGSI_INTERPOLATE_COLOR) 286601e04c3fSmrg interp = FMUL(interp, vw); 286701e04c3fSmrg inputs[attrib][channel] = wrap(interp); 286801e04c3fSmrg } 286901e04c3fSmrg } 287001e04c3fSmrg } 287101e04c3fSmrg } 287201e04c3fSmrg 287301e04c3fSmrg sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); 28747ec681f3Smrg assert(sampler != nullptr); 287501e04c3fSmrg 287601e04c3fSmrg struct lp_bld_tgsi_system_values system_values; 287701e04c3fSmrg memset(&system_values, 0, sizeof(system_values)); 287801e04c3fSmrg 287901e04c3fSmrg struct lp_build_mask_context mask; 288001e04c3fSmrg bool uses_mask = false; 288101e04c3fSmrg 288201e04c3fSmrg if (swr_fs->info.base.uses_kill || 288301e04c3fSmrg key.poly_stipple_enable) { 288401e04c3fSmrg Value *vActiveMask = NULL; 288501e04c3fSmrg if (swr_fs->info.base.uses_kill) { 288601e04c3fSmrg vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); 288701e04c3fSmrg } 288801e04c3fSmrg if (key.poly_stipple_enable) { 288901e04c3fSmrg // first get fragment xy coords and clip to stipple bounds 289001e04c3fSmrg Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}); 289101e04c3fSmrg Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}); 289201e04c3fSmrg Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty); 289301e04c3fSmrg Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty); 289401e04c3fSmrg 289501e04c3fSmrg // stipple pattern is 32x32, which means that one line of stipple 289601e04c3fSmrg // is stored in one word: 289701e04c3fSmrg // vXstipple is bit offset inside 32-bit stipple word 289801e04c3fSmrg // vYstipple is word index is stipple array 289901e04c3fSmrg Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1) 290001e04c3fSmrg Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1) 290101e04c3fSmrg 290201e04c3fSmrg // grab stipple pattern base address 290301e04c3fSmrg Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0}); 290401e04c3fSmrg stipplePtr = BITCAST(stipplePtr, mInt8PtrTy); 290501e04c3fSmrg 290601e04c3fSmrg // peform a gather to grab stipple words for each lane 290701e04c3fSmrg Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple, 290801e04c3fSmrg VIMMED1(0xffffffff), 4); 290901e04c3fSmrg 291001e04c3fSmrg // create a mask with one bit corresponding to the x stipple 291101e04c3fSmrg // and AND it with the pattern, to see if we have a bit 291201e04c3fSmrg Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple); 291301e04c3fSmrg Value *vStippleMask = AND(vStipple, vBitMask); 291401e04c3fSmrg vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0)); 291501e04c3fSmrg vStippleMask = VMASK(vStippleMask); 291601e04c3fSmrg 291701e04c3fSmrg if (swr_fs->info.base.uses_kill) { 291801e04c3fSmrg vActiveMask = AND(vActiveMask, vStippleMask); 291901e04c3fSmrg } else { 292001e04c3fSmrg vActiveMask = vStippleMask; 292101e04c3fSmrg } 292201e04c3fSmrg } 292301e04c3fSmrg lp_build_mask_begin( 292401e04c3fSmrg &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask)); 292501e04c3fSmrg uses_mask = true; 292601e04c3fSmrg } 292701e04c3fSmrg 29287ec681f3Smrg struct lp_build_tgsi_params params; 29297ec681f3Smrg memset(¶ms, 0, sizeof(params)); 29307ec681f3Smrg params.type = lp_type_float_vec(32, 32 * 8); 29317ec681f3Smrg params.mask = uses_mask ? &mask : NULL; 29327ec681f3Smrg params.consts_ptr = wrap(consts_ptr); 29337ec681f3Smrg params.const_sizes_ptr = wrap(const_sizes_ptr); 29347ec681f3Smrg params.system_values = &system_values; 29357ec681f3Smrg params.inputs = inputs; 29367ec681f3Smrg params.context_ptr = wrap(hPrivateData); 29377ec681f3Smrg params.sampler = sampler; 29387ec681f3Smrg params.info = &swr_fs->info.base; 29397ec681f3Smrg 294001e04c3fSmrg lp_build_tgsi_soa(gallivm, 294101e04c3fSmrg swr_fs->pipe.tokens, 29427ec681f3Smrg ¶ms, 29437ec681f3Smrg outputs); 294401e04c3fSmrg 294501e04c3fSmrg sampler->destroy(sampler); 294601e04c3fSmrg 294701e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 294801e04c3fSmrg 294901e04c3fSmrg for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; 295001e04c3fSmrg attrib++) { 295101e04c3fSmrg switch (swr_fs->info.base.output_semantic_name[attrib]) { 295201e04c3fSmrg case TGSI_SEMANTIC_POSITION: { 295301e04c3fSmrg // write z 295401e04c3fSmrg LLVMValueRef outZ = 295501e04c3fSmrg LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); 295601e04c3fSmrg STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); 295701e04c3fSmrg break; 295801e04c3fSmrg } 295901e04c3fSmrg case TGSI_SEMANTIC_COLOR: { 296001e04c3fSmrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 296101e04c3fSmrg if (!outputs[attrib][channel]) 296201e04c3fSmrg continue; 296301e04c3fSmrg 296401e04c3fSmrg LLVMValueRef out = 296501e04c3fSmrg LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); 296601e04c3fSmrg if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 296701e04c3fSmrg swr_fs->info.base.output_semantic_index[attrib] == 0) { 296801e04c3fSmrg for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { 296901e04c3fSmrg STORE(unwrap(out), 297001e04c3fSmrg pPS, 297101e04c3fSmrg {0, SWR_PS_CONTEXT_shaded, rt, channel}); 297201e04c3fSmrg } 297301e04c3fSmrg } else { 297401e04c3fSmrg STORE(unwrap(out), 297501e04c3fSmrg pPS, 297601e04c3fSmrg {0, 297701e04c3fSmrg SWR_PS_CONTEXT_shaded, 297801e04c3fSmrg swr_fs->info.base.output_semantic_index[attrib], 297901e04c3fSmrg channel}); 298001e04c3fSmrg } 298101e04c3fSmrg } 298201e04c3fSmrg break; 298301e04c3fSmrg } 298401e04c3fSmrg default: { 298501e04c3fSmrg fprintf(stderr, 298601e04c3fSmrg "unknown output from FS %s[%d]\n", 298701e04c3fSmrg tgsi_semantic_names[swr_fs->info.base 298801e04c3fSmrg .output_semantic_name[attrib]], 298901e04c3fSmrg swr_fs->info.base.output_semantic_index[attrib]); 299001e04c3fSmrg break; 299101e04c3fSmrg } 299201e04c3fSmrg } 299301e04c3fSmrg } 299401e04c3fSmrg 299501e04c3fSmrg LLVMValueRef mask_result = 0; 299601e04c3fSmrg if (uses_mask) { 299701e04c3fSmrg mask_result = lp_build_mask_end(&mask); 299801e04c3fSmrg } 299901e04c3fSmrg 300001e04c3fSmrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 300101e04c3fSmrg 300201e04c3fSmrg if (uses_mask) { 300301e04c3fSmrg STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); 300401e04c3fSmrg } 300501e04c3fSmrg 300601e04c3fSmrg RET_VOID(); 300701e04c3fSmrg 300801e04c3fSmrg gallivm_verify_function(gallivm, wrap(pFunction)); 300901e04c3fSmrg 301001e04c3fSmrg gallivm_compile_module(gallivm); 301101e04c3fSmrg 301201e04c3fSmrg // after the gallivm passes, we have to lower the core's intrinsics 301301e04c3fSmrg llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule); 301401e04c3fSmrg lowerPass.add(createLowerX86Pass(this)); 301501e04c3fSmrg lowerPass.run(*pFunction); 301601e04c3fSmrg 301701e04c3fSmrg PFN_PIXEL_KERNEL kernel = 301801e04c3fSmrg (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); 301901e04c3fSmrg debug_printf("frag shader %p\n", kernel); 302001e04c3fSmrg assert(kernel && "Error: FragShader = NULL"); 302101e04c3fSmrg 302201e04c3fSmrg JM()->mIsModuleFinalized = true; 302301e04c3fSmrg 302401e04c3fSmrg return kernel; 302501e04c3fSmrg} 302601e04c3fSmrg 302701e04c3fSmrgPFN_PIXEL_KERNEL 302801e04c3fSmrgswr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) 302901e04c3fSmrg{ 303001e04c3fSmrg if (!ctx->fs->pipe.tokens) 303101e04c3fSmrg return NULL; 303201e04c3fSmrg 303301e04c3fSmrg BuilderSWR builder( 303401e04c3fSmrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 303501e04c3fSmrg "FS"); 303601e04c3fSmrg PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key); 303701e04c3fSmrg 30387ec681f3Smrg ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func)))); 303901e04c3fSmrg return func; 304001e04c3fSmrg} 3041