1b8e80941Smrg/**************************************************************************** 2b8e80941Smrg * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg ***************************************************************************/ 23b8e80941Smrg 24b8e80941Smrg// llvm redefines DEBUG 25b8e80941Smrg#pragma push_macro("DEBUG") 26b8e80941Smrg#undef DEBUG 27b8e80941Smrg#include "JitManager.h" 28b8e80941Smrg#include "llvm-c/Core.h" 29b8e80941Smrg#include "llvm/Support/CBindingWrapping.h" 30b8e80941Smrg#include "llvm/IR/LegacyPassManager.h" 31b8e80941Smrg#pragma pop_macro("DEBUG") 32b8e80941Smrg 33b8e80941Smrg#include "state.h" 34b8e80941Smrg#include "gen_state_llvm.h" 35b8e80941Smrg#include "builder.h" 36b8e80941Smrg#include "functionpasses/passes.h" 37b8e80941Smrg 38b8e80941Smrg#include "tgsi/tgsi_strings.h" 39b8e80941Smrg#include "util/u_format.h" 40b8e80941Smrg#include "util/u_prim.h" 41b8e80941Smrg#include "gallivm/lp_bld_init.h" 42b8e80941Smrg#include "gallivm/lp_bld_flow.h" 43b8e80941Smrg#include "gallivm/lp_bld_struct.h" 44b8e80941Smrg#include "gallivm/lp_bld_tgsi.h" 45b8e80941Smrg 46b8e80941Smrg#include "swr_context.h" 47b8e80941Smrg#include "gen_swr_context_llvm.h" 48b8e80941Smrg#include "swr_resource.h" 49b8e80941Smrg#include "swr_state.h" 50b8e80941Smrg#include "swr_screen.h" 51b8e80941Smrg 52b8e80941Smrgusing namespace SwrJit; 53b8e80941Smrgusing namespace llvm; 54b8e80941Smrg 55b8e80941Smrgstatic unsigned 56b8e80941Smrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); 57b8e80941Smrg 58b8e80941Smrgbool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) 59b8e80941Smrg{ 60b8e80941Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 61b8e80941Smrg} 62b8e80941Smrg 63b8e80941Smrgbool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) 64b8e80941Smrg{ 65b8e80941Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 66b8e80941Smrg} 67b8e80941Smrg 68b8e80941Smrgbool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs) 69b8e80941Smrg{ 70b8e80941Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 71b8e80941Smrg} 72b8e80941Smrg 73b8e80941Smrgbool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs) 74b8e80941Smrg{ 75b8e80941Smrg return !memcmp(&lhs, &rhs, sizeof(lhs)); 76b8e80941Smrg} 77b8e80941Smrg 78b8e80941Smrgstatic void 79b8e80941Smrgswr_generate_sampler_key(const struct lp_tgsi_info &info, 80b8e80941Smrg struct swr_context *ctx, 81b8e80941Smrg enum pipe_shader_type shader_type, 82b8e80941Smrg struct swr_jit_sampler_key &key) 83b8e80941Smrg{ 84b8e80941Smrg key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; 85b8e80941Smrg 86b8e80941Smrg for (unsigned i = 0; i < key.nr_samplers; i++) { 87b8e80941Smrg if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 88b8e80941Smrg lp_sampler_static_sampler_state( 89b8e80941Smrg &key.sampler[i].sampler_state, 90b8e80941Smrg ctx->samplers[shader_type][i]); 91b8e80941Smrg } 92b8e80941Smrg } 93b8e80941Smrg 94b8e80941Smrg /* 95b8e80941Smrg * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 96b8e80941Smrg * are dx10-style? Can't really have mixed opcodes, at least not 97b8e80941Smrg * if we want to skip the holes here (without rescanning tgsi). 98b8e80941Smrg */ 99b8e80941Smrg if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 100b8e80941Smrg key.nr_sampler_views = 101b8e80941Smrg info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 102b8e80941Smrg for (unsigned i = 0; i < key.nr_sampler_views; i++) { 103b8e80941Smrg if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { 104b8e80941Smrg const struct pipe_sampler_view *view = 105b8e80941Smrg ctx->sampler_views[shader_type][i]; 106b8e80941Smrg lp_sampler_static_texture_state( 107b8e80941Smrg &key.sampler[i].texture_state, view); 108b8e80941Smrg if (view) { 109b8e80941Smrg struct swr_resource *swr_res = swr_resource(view->texture); 110b8e80941Smrg const struct util_format_description *desc = 111b8e80941Smrg util_format_description(view->format); 112b8e80941Smrg if (swr_res->has_depth && swr_res->has_stencil && 113b8e80941Smrg !util_format_has_depth(desc)) 114b8e80941Smrg key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 115b8e80941Smrg } 116b8e80941Smrg } 117b8e80941Smrg } 118b8e80941Smrg } else { 119b8e80941Smrg key.nr_sampler_views = key.nr_samplers; 120b8e80941Smrg for (unsigned i = 0; i < key.nr_sampler_views; i++) { 121b8e80941Smrg if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 122b8e80941Smrg const struct pipe_sampler_view *view = 123b8e80941Smrg ctx->sampler_views[shader_type][i]; 124b8e80941Smrg lp_sampler_static_texture_state( 125b8e80941Smrg &key.sampler[i].texture_state, view); 126b8e80941Smrg if (view) { 127b8e80941Smrg struct swr_resource *swr_res = swr_resource(view->texture); 128b8e80941Smrg const struct util_format_description *desc = 129b8e80941Smrg util_format_description(view->format); 130b8e80941Smrg if (swr_res->has_depth && swr_res->has_stencil && 131b8e80941Smrg !util_format_has_depth(desc)) 132b8e80941Smrg key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 133b8e80941Smrg } 134b8e80941Smrg } 135b8e80941Smrg } 136b8e80941Smrg } 137b8e80941Smrg} 138b8e80941Smrg 139b8e80941Smrgvoid 140b8e80941Smrgswr_generate_fs_key(struct swr_jit_fs_key &key, 141b8e80941Smrg struct swr_context *ctx, 142b8e80941Smrg swr_fragment_shader *swr_fs) 143b8e80941Smrg{ 144b8e80941Smrg memset(&key, 0, sizeof(key)); 145b8e80941Smrg 146b8e80941Smrg key.nr_cbufs = ctx->framebuffer.nr_cbufs; 147b8e80941Smrg key.light_twoside = ctx->rasterizer->light_twoside; 148b8e80941Smrg key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; 149b8e80941Smrg 150b8e80941Smrg struct tgsi_shader_info *pPrevShader; 151b8e80941Smrg if (ctx->gs) 152b8e80941Smrg pPrevShader = &ctx->gs->info.base; 153b8e80941Smrg else 154b8e80941Smrg pPrevShader = &ctx->vs->info.base; 155b8e80941Smrg 156b8e80941Smrg memcpy(&key.vs_output_semantic_name, 157b8e80941Smrg &pPrevShader->output_semantic_name, 158b8e80941Smrg sizeof(key.vs_output_semantic_name)); 159b8e80941Smrg memcpy(&key.vs_output_semantic_idx, 160b8e80941Smrg &pPrevShader->output_semantic_index, 161b8e80941Smrg sizeof(key.vs_output_semantic_idx)); 162b8e80941Smrg 163b8e80941Smrg swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); 164b8e80941Smrg 165b8e80941Smrg key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable && 166b8e80941Smrg ctx->poly_stipple.prim_is_poly; 167b8e80941Smrg} 168b8e80941Smrg 169b8e80941Smrgvoid 170b8e80941Smrgswr_generate_vs_key(struct swr_jit_vs_key &key, 171b8e80941Smrg struct swr_context *ctx, 172b8e80941Smrg swr_vertex_shader *swr_vs) 173b8e80941Smrg{ 174b8e80941Smrg memset(&key, 0, sizeof(key)); 175b8e80941Smrg 176b8e80941Smrg key.clip_plane_mask = 177b8e80941Smrg swr_vs->info.base.clipdist_writemask ? 178b8e80941Smrg swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 179b8e80941Smrg ctx->rasterizer->clip_plane_enable; 180b8e80941Smrg 181b8e80941Smrg swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); 182b8e80941Smrg} 183b8e80941Smrg 184b8e80941Smrgvoid 185b8e80941Smrgswr_generate_fetch_key(struct swr_jit_fetch_key &key, 186b8e80941Smrg struct swr_vertex_element_state *velems) 187b8e80941Smrg{ 188b8e80941Smrg memset(&key, 0, sizeof(key)); 189b8e80941Smrg 190b8e80941Smrg key.fsState = velems->fsState; 191b8e80941Smrg} 192b8e80941Smrg 193b8e80941Smrgvoid 194b8e80941Smrgswr_generate_gs_key(struct swr_jit_gs_key &key, 195b8e80941Smrg struct swr_context *ctx, 196b8e80941Smrg swr_geometry_shader *swr_gs) 197b8e80941Smrg{ 198b8e80941Smrg memset(&key, 0, sizeof(key)); 199b8e80941Smrg 200b8e80941Smrg struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base; 201b8e80941Smrg 202b8e80941Smrg memcpy(&key.vs_output_semantic_name, 203b8e80941Smrg &pPrevShader->output_semantic_name, 204b8e80941Smrg sizeof(key.vs_output_semantic_name)); 205b8e80941Smrg memcpy(&key.vs_output_semantic_idx, 206b8e80941Smrg &pPrevShader->output_semantic_index, 207b8e80941Smrg sizeof(key.vs_output_semantic_idx)); 208b8e80941Smrg 209b8e80941Smrg swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key); 210b8e80941Smrg} 211b8e80941Smrg 212b8e80941Smrgstruct BuilderSWR : public Builder { 213b8e80941Smrg BuilderSWR(JitManager *pJitMgr, const char *pName) 214b8e80941Smrg : Builder(pJitMgr) 215b8e80941Smrg { 216b8e80941Smrg pJitMgr->SetupNewModule(); 217b8e80941Smrg gallivm = gallivm_create(pName, wrap(&JM()->mContext)); 218b8e80941Smrg pJitMgr->mpCurrentModule = unwrap(gallivm->module); 219b8e80941Smrg } 220b8e80941Smrg 221b8e80941Smrg ~BuilderSWR() { 222b8e80941Smrg gallivm_free_ir(gallivm); 223b8e80941Smrg } 224b8e80941Smrg 225b8e80941Smrg void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, 226b8e80941Smrg unsigned slot, unsigned channel); 227b8e80941Smrg 228b8e80941Smrg struct gallivm_state *gallivm; 229b8e80941Smrg PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); 230b8e80941Smrg PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); 231b8e80941Smrg PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key); 232b8e80941Smrg 233b8e80941Smrg LLVMValueRef 234b8e80941Smrg swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, 235b8e80941Smrg struct lp_build_tgsi_context * bld_base, 236b8e80941Smrg boolean is_vindex_indirect, 237b8e80941Smrg LLVMValueRef vertex_index, 238b8e80941Smrg boolean is_aindex_indirect, 239b8e80941Smrg LLVMValueRef attrib_index, 240b8e80941Smrg LLVMValueRef swizzle_index); 241b8e80941Smrg void 242b8e80941Smrg swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, 243b8e80941Smrg struct lp_build_tgsi_context * bld_base, 244b8e80941Smrg LLVMValueRef (*outputs)[4], 245b8e80941Smrg LLVMValueRef emitted_vertices_vec); 246b8e80941Smrg 247b8e80941Smrg void 248b8e80941Smrg swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, 249b8e80941Smrg struct lp_build_tgsi_context * bld_base, 250b8e80941Smrg LLVMValueRef verts_per_prim_vec, 251b8e80941Smrg LLVMValueRef emitted_prims_vec); 252b8e80941Smrg 253b8e80941Smrg void 254b8e80941Smrg swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, 255b8e80941Smrg struct lp_build_tgsi_context * bld_base, 256b8e80941Smrg LLVMValueRef total_emitted_vertices_vec, 257b8e80941Smrg LLVMValueRef emitted_prims_vec); 258b8e80941Smrg 259b8e80941Smrg}; 260b8e80941Smrg 261b8e80941Smrgstruct swr_gs_llvm_iface { 262b8e80941Smrg struct lp_build_tgsi_gs_iface base; 263b8e80941Smrg struct tgsi_shader_info *info; 264b8e80941Smrg 265b8e80941Smrg BuilderSWR *pBuilder; 266b8e80941Smrg 267b8e80941Smrg Value *pGsCtx; 268b8e80941Smrg SWR_GS_STATE *pGsState; 269b8e80941Smrg uint32_t num_outputs; 270b8e80941Smrg uint32_t num_verts_per_prim; 271b8e80941Smrg 272b8e80941Smrg Value *pVtxAttribMap; 273b8e80941Smrg}; 274b8e80941Smrg 275b8e80941Smrg// trampoline functions so we can use the builder llvm construction methods 276b8e80941Smrgstatic LLVMValueRef 277b8e80941Smrgswr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, 278b8e80941Smrg struct lp_build_tgsi_context * bld_base, 279b8e80941Smrg boolean is_vindex_indirect, 280b8e80941Smrg LLVMValueRef vertex_index, 281b8e80941Smrg boolean is_aindex_indirect, 282b8e80941Smrg LLVMValueRef attrib_index, 283b8e80941Smrg LLVMValueRef swizzle_index) 284b8e80941Smrg{ 285b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 286b8e80941Smrg 287b8e80941Smrg return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld_base, 288b8e80941Smrg is_vindex_indirect, 289b8e80941Smrg vertex_index, 290b8e80941Smrg is_aindex_indirect, 291b8e80941Smrg attrib_index, 292b8e80941Smrg swizzle_index); 293b8e80941Smrg} 294b8e80941Smrg 295b8e80941Smrgstatic void 296b8e80941Smrgswr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, 297b8e80941Smrg struct lp_build_tgsi_context * bld_base, 298b8e80941Smrg LLVMValueRef (*outputs)[4], 299b8e80941Smrg LLVMValueRef emitted_vertices_vec) 300b8e80941Smrg{ 301b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 302b8e80941Smrg 303b8e80941Smrg iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld_base, 304b8e80941Smrg outputs, 305b8e80941Smrg emitted_vertices_vec); 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrgstatic void 309b8e80941Smrgswr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, 310b8e80941Smrg struct lp_build_tgsi_context * bld_base, 311b8e80941Smrg LLVMValueRef verts_per_prim_vec, 312b8e80941Smrg LLVMValueRef emitted_prims_vec) 313b8e80941Smrg{ 314b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 315b8e80941Smrg 316b8e80941Smrg iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld_base, 317b8e80941Smrg verts_per_prim_vec, 318b8e80941Smrg emitted_prims_vec); 319b8e80941Smrg} 320b8e80941Smrg 321b8e80941Smrgstatic void 322b8e80941Smrgswr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, 323b8e80941Smrg struct lp_build_tgsi_context * bld_base, 324b8e80941Smrg LLVMValueRef total_emitted_vertices_vec, 325b8e80941Smrg LLVMValueRef emitted_prims_vec) 326b8e80941Smrg{ 327b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 328b8e80941Smrg 329b8e80941Smrg iface->pBuilder->swr_gs_llvm_epilogue(gs_base, bld_base, 330b8e80941Smrg total_emitted_vertices_vec, 331b8e80941Smrg emitted_prims_vec); 332b8e80941Smrg} 333b8e80941Smrg 334b8e80941SmrgLLVMValueRef 335b8e80941SmrgBuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, 336b8e80941Smrg struct lp_build_tgsi_context * bld_base, 337b8e80941Smrg boolean is_vindex_indirect, 338b8e80941Smrg LLVMValueRef vertex_index, 339b8e80941Smrg boolean is_aindex_indirect, 340b8e80941Smrg LLVMValueRef attrib_index, 341b8e80941Smrg LLVMValueRef swizzle_index) 342b8e80941Smrg{ 343b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; 344b8e80941Smrg Value *vert_index = unwrap(vertex_index); 345b8e80941Smrg Value *attr_index = unwrap(attrib_index); 346b8e80941Smrg 347b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 348b8e80941Smrg 349b8e80941Smrg if (is_vindex_indirect || is_aindex_indirect) { 350b8e80941Smrg int i; 351b8e80941Smrg Value *res = unwrap(bld_base->base.zero); 352b8e80941Smrg struct lp_type type = bld_base->base.type; 353b8e80941Smrg 354b8e80941Smrg for (i = 0; i < type.length; i++) { 355b8e80941Smrg Value *vert_chan_index = vert_index; 356b8e80941Smrg Value *attr_chan_index = attr_index; 357b8e80941Smrg 358b8e80941Smrg if (is_vindex_indirect) { 359b8e80941Smrg vert_chan_index = VEXTRACT(vert_index, C(i)); 360b8e80941Smrg } 361b8e80941Smrg if (is_aindex_indirect) { 362b8e80941Smrg attr_chan_index = VEXTRACT(attr_index, C(i)); 363b8e80941Smrg } 364b8e80941Smrg 365b8e80941Smrg Value *attrib = 366b8e80941Smrg LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index})); 367b8e80941Smrg 368b8e80941Smrg Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 369b8e80941Smrg Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 370b8e80941Smrg 371b8e80941Smrg Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib); 372b8e80941Smrg Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 373b8e80941Smrg 374b8e80941Smrg Value *value = VEXTRACT(pInput, C(i)); 375b8e80941Smrg res = VINSERT(res, value, C(i)); 376b8e80941Smrg } 377b8e80941Smrg 378b8e80941Smrg return wrap(res); 379b8e80941Smrg } else { 380b8e80941Smrg Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index})); 381b8e80941Smrg 382b8e80941Smrg Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts}); 383b8e80941Smrg Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride}); 384b8e80941Smrg 385b8e80941Smrg Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib); 386b8e80941Smrg 387b8e80941Smrg Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)})); 388b8e80941Smrg 389b8e80941Smrg return wrap(pInput); 390b8e80941Smrg } 391b8e80941Smrg} 392b8e80941Smrg 393b8e80941Smrg// GS output stream layout 394b8e80941Smrg#define VERTEX_COUNT_SIZE 32 395b8e80941Smrg#define CONTROL_HEADER_SIZE (8*32) 396b8e80941Smrg 397b8e80941Smrgvoid 398b8e80941SmrgBuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, 399b8e80941Smrg struct lp_build_tgsi_context * bld_base, 400b8e80941Smrg LLVMValueRef (*outputs)[4], 401b8e80941Smrg LLVMValueRef emitted_vertices_vec) 402b8e80941Smrg{ 403b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 404b8e80941Smrg 405b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 406b8e80941Smrg 407b8e80941Smrg const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE; 408b8e80941Smrg const uint32_t attribSize = 4 * sizeof(float); 409b8e80941Smrg const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS; 410b8e80941Smrg Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize)); 411b8e80941Smrg 412b8e80941Smrg Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask}); 413b8e80941Smrg Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth)); 414b8e80941Smrg 415b8e80941Smrg Value *pStack = STACKSAVE(); 416b8e80941Smrg Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking 417b8e80941Smrg 418b8e80941Smrg for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) { 419b8e80941Smrg uint32_t attribSlot = attrib; 420b8e80941Smrg uint32_t sgvChannel = 0; 421b8e80941Smrg if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 422b8e80941Smrg attribSlot = VERTEX_SGV_SLOT; 423b8e80941Smrg sgvChannel = VERTEX_SGV_POINT_SIZE_COMP; 424b8e80941Smrg } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) { 425b8e80941Smrg attribSlot = VERTEX_SGV_SLOT; 426b8e80941Smrg sgvChannel = VERTEX_SGV_RTAI_COMP; 427b8e80941Smrg } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 428b8e80941Smrg attribSlot = VERTEX_POSITION_SLOT; 429b8e80941Smrg } else { 430b8e80941Smrg attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; 431b8e80941Smrg if (iface->info->writes_position) { 432b8e80941Smrg attribSlot--; 433b8e80941Smrg } 434b8e80941Smrg } 435b8e80941Smrg 436b8e80941Smrg Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ? 437b8e80941Smrg 438b8e80941Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) { 439b8e80941Smrg Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane)); 440b8e80941Smrg Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 441b8e80941Smrg Value *pStreamOffset = GEP(pStream, pLaneOffset); 442b8e80941Smrg pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy); 443b8e80941Smrg 444b8e80941Smrg Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 445b8e80941Smrg pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 446b8e80941Smrg 447b8e80941Smrg for (uint32_t channel = 0; channel < 4; ++channel) { 448b8e80941Smrg Value *vData; 449b8e80941Smrg 450b8e80941Smrg if (attribSlot == VERTEX_SGV_SLOT) 451b8e80941Smrg vData = LOAD(unwrap(outputs[attrib][0])); 452b8e80941Smrg else 453b8e80941Smrg vData = LOAD(unwrap(outputs[attrib][channel])); 454b8e80941Smrg 455b8e80941Smrg if (attribSlot != VERTEX_SGV_SLOT || 456b8e80941Smrg sgvChannel == channel) { 457b8e80941Smrg vData = VEXTRACT(vData, C(lane)); 458b8e80941Smrg STORE(vData, pStreamOffset); 459b8e80941Smrg } 460b8e80941Smrg pStreamOffset = GEP(pStreamOffset, C(1)); 461b8e80941Smrg } 462b8e80941Smrg } 463b8e80941Smrg } 464b8e80941Smrg 465b8e80941Smrg STACKRESTORE(pStack); 466b8e80941Smrg} 467b8e80941Smrg 468b8e80941Smrgvoid 469b8e80941SmrgBuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, 470b8e80941Smrg struct lp_build_tgsi_context * bld_base, 471b8e80941Smrg LLVMValueRef verts_per_prim_vec, 472b8e80941Smrg LLVMValueRef emitted_prims_vec) 473b8e80941Smrg{ 474b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 475b8e80941Smrg 476b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 477b8e80941Smrg 478b8e80941Smrg Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); 479b8e80941Smrg Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8)); 480b8e80941Smrg 481b8e80941Smrg uint32_t vertsPerPrim = iface->num_verts_per_prim; 482b8e80941Smrg 483b8e80941Smrg Value *vCount = 484b8e80941Smrg ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)), 485b8e80941Smrg unwrap(verts_per_prim_vec)); 486b8e80941Smrg 487b8e80941Smrg struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 488b8e80941Smrg vCount = LOAD(unwrap(bld->total_emitted_vertices_vec_ptr)); 489b8e80941Smrg 490b8e80941Smrg struct lp_exec_mask *exec_mask = &bld->exec_mask; 491b8e80941Smrg Value *mask = unwrap(lp_build_mask_value(bld->mask)); 492b8e80941Smrg if (exec_mask->has_mask) 493b8e80941Smrg mask = AND(mask, unwrap(exec_mask->exec_mask)); 494b8e80941Smrg 495b8e80941Smrg Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0))); 496b8e80941Smrg mask = AND(mask, cmpMask); 497b8e80941Smrg vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8)); 498b8e80941Smrg 499b8e80941Smrg vCount = SUB(vCount, VIMMED1(1)); 500b8e80941Smrg Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE)); 501b8e80941Smrg Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8))); 502b8e80941Smrg 503b8e80941Smrg vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8)); 504b8e80941Smrg 505b8e80941Smrg Value *pStack = STACKSAVE(); 506b8e80941Smrg Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking 507b8e80941Smrg 508b8e80941Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) { 509b8e80941Smrg Value *vLaneOffset = VEXTRACT(vOffset, C(lane)); 510b8e80941Smrg Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 511b8e80941Smrg Value *pStreamOffset = GEP(pStream, vLaneOffset); 512b8e80941Smrg 513b8e80941Smrg Value *pLaneMask = VEXTRACT(vMask1, C(lane)); 514b8e80941Smrg pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr); 515b8e80941Smrg 516b8e80941Smrg Value *vVal = LOAD(pStreamOffset); 517b8e80941Smrg vVal = OR(vVal, VEXTRACT(vValue, C(lane))); 518b8e80941Smrg STORE(vVal, pStreamOffset); 519b8e80941Smrg } 520b8e80941Smrg 521b8e80941Smrg STACKRESTORE(pStack); 522b8e80941Smrg} 523b8e80941Smrg 524b8e80941Smrgvoid 525b8e80941SmrgBuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, 526b8e80941Smrg struct lp_build_tgsi_context * bld_base, 527b8e80941Smrg LLVMValueRef total_emitted_vertices_vec, 528b8e80941Smrg LLVMValueRef emitted_prims_vec) 529b8e80941Smrg{ 530b8e80941Smrg swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; 531b8e80941Smrg 532b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 533b8e80941Smrg 534b8e80941Smrg // Store emit count to each output stream in the first DWORD 535b8e80941Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) 536b8e80941Smrg { 537b8e80941Smrg Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 538b8e80941Smrg pStream = BITCAST(pStream, mInt32PtrTy); 539b8e80941Smrg Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane)); 540b8e80941Smrg STORE(pLaneCount, pStream); 541b8e80941Smrg } 542b8e80941Smrg} 543b8e80941Smrg 544b8e80941SmrgPFN_GS_FUNC 545b8e80941SmrgBuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) 546b8e80941Smrg{ 547b8e80941Smrg SWR_GS_STATE *pGS = &ctx->gs->gsState; 548b8e80941Smrg struct tgsi_shader_info *info = &ctx->gs->info.base; 549b8e80941Smrg 550b8e80941Smrg memset(pGS, 0, sizeof(*pGS)); 551b8e80941Smrg 552b8e80941Smrg pGS->gsEnable = true; 553b8e80941Smrg 554b8e80941Smrg pGS->numInputAttribs = info->num_inputs; 555b8e80941Smrg pGS->outputTopology = 556b8e80941Smrg swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); 557b8e80941Smrg pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 558b8e80941Smrg pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; 559b8e80941Smrg 560b8e80941Smrg // XXX: single stream for now... 561b8e80941Smrg pGS->isSingleStream = true; 562b8e80941Smrg pGS->singleStreamID = 0; 563b8e80941Smrg 564b8e80941Smrg pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize 565b8e80941Smrg pGS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize 566b8e80941Smrg pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset; 567b8e80941Smrg pGS->outputVertexSize = SWR_VTX_NUM_SLOTS; 568b8e80941Smrg pGS->controlDataSize = 8; // GS ouputs max of 8 32B units 569b8e80941Smrg pGS->controlDataOffset = VERTEX_COUNT_SIZE; 570b8e80941Smrg pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE; 571b8e80941Smrg 572b8e80941Smrg pGS->allocationSize = 573b8e80941Smrg VERTEX_COUNT_SIZE + // vertex count 574b8e80941Smrg CONTROL_HEADER_SIZE + // control header 575b8e80941Smrg (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex 576b8e80941Smrg pGS->maxNumVerts; // num verts 577b8e80941Smrg 578b8e80941Smrg struct swr_geometry_shader *gs = ctx->gs; 579b8e80941Smrg 580b8e80941Smrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 581b8e80941Smrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 582b8e80941Smrg 583b8e80941Smrg memset(outputs, 0, sizeof(outputs)); 584b8e80941Smrg 585b8e80941Smrg AttrBuilder attrBuilder; 586b8e80941Smrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 587b8e80941Smrg 588b8e80941Smrg std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 589b8e80941Smrg PointerType::get(mInt8Ty, 0), 590b8e80941Smrg PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)}; 591b8e80941Smrg FunctionType *vsFuncType = 592b8e80941Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false); 593b8e80941Smrg 594b8e80941Smrg // create new vertex shader function 595b8e80941Smrg auto pFunction = Function::Create(vsFuncType, 596b8e80941Smrg GlobalValue::ExternalLinkage, 597b8e80941Smrg "GS", 598b8e80941Smrg JM()->mpCurrentModule); 599b8e80941Smrg#if HAVE_LLVM < 0x0500 600b8e80941Smrg AttributeSet attrSet = AttributeSet::get( 601b8e80941Smrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 602b8e80941Smrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 603b8e80941Smrg#else 604b8e80941Smrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 605b8e80941Smrg#endif 606b8e80941Smrg 607b8e80941Smrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 608b8e80941Smrg IRB()->SetInsertPoint(block); 609b8e80941Smrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 610b8e80941Smrg 611b8e80941Smrg auto argitr = pFunction->arg_begin(); 612b8e80941Smrg Value *hPrivateData = &*argitr++; 613b8e80941Smrg hPrivateData->setName("hPrivateData"); 614b8e80941Smrg Value *pWorkerData = &*argitr++; 615b8e80941Smrg pWorkerData->setName("pWorkerData"); 616b8e80941Smrg Value *pGsCtx = &*argitr++; 617b8e80941Smrg pGsCtx->setName("gsCtx"); 618b8e80941Smrg 619b8e80941Smrg Value *consts_ptr = 620b8e80941Smrg GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)}); 621b8e80941Smrg consts_ptr->setName("gs_constants"); 622b8e80941Smrg Value *const_sizes_ptr = 623b8e80941Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsGS}); 624b8e80941Smrg const_sizes_ptr->setName("num_gs_constants"); 625b8e80941Smrg 626b8e80941Smrg struct lp_build_sampler_soa *sampler = 627b8e80941Smrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY); 628b8e80941Smrg 629b8e80941Smrg struct lp_bld_tgsi_system_values system_values; 630b8e80941Smrg memset(&system_values, 0, sizeof(system_values)); 631b8e80941Smrg system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID})); 632b8e80941Smrg system_values.instance_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID})); 633b8e80941Smrg 634b8e80941Smrg std::vector<Constant*> mapConstants; 635b8e80941Smrg Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); 636b8e80941Smrg for (unsigned slot = 0; slot < info->num_inputs; slot++) { 637b8e80941Smrg ubyte semantic_name = info->input_semantic_name[slot]; 638b8e80941Smrg ubyte semantic_idx = info->input_semantic_index[slot]; 639b8e80941Smrg 640b8e80941Smrg unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 641b8e80941Smrg 642b8e80941Smrg vs_slot += VERTEX_ATTRIB_START_SLOT; 643b8e80941Smrg 644b8e80941Smrg if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 645b8e80941Smrg vs_slot--; 646b8e80941Smrg 647b8e80941Smrg if (semantic_name == TGSI_SEMANTIC_POSITION) 648b8e80941Smrg vs_slot = VERTEX_POSITION_SLOT; 649b8e80941Smrg 650b8e80941Smrg STORE(C(vs_slot), vtxAttribMap, {0, slot}); 651b8e80941Smrg mapConstants.push_back(C(vs_slot)); 652b8e80941Smrg } 653b8e80941Smrg 654b8e80941Smrg struct lp_build_mask_context mask; 655b8e80941Smrg Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask"); 656b8e80941Smrg lp_build_mask_begin(&mask, gallivm, 657b8e80941Smrg lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 658b8e80941Smrg 659b8e80941Smrg // zero out cut buffer so we can load/modify/store bits 660b8e80941Smrg for (uint32_t lane = 0; lane < mVWidth; ++lane) 661b8e80941Smrg { 662b8e80941Smrg Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); 663b8e80941Smrg MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH); 664b8e80941Smrg } 665b8e80941Smrg 666b8e80941Smrg struct swr_gs_llvm_iface gs_iface; 667b8e80941Smrg gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input; 668b8e80941Smrg gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex; 669b8e80941Smrg gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive; 670b8e80941Smrg gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue; 671b8e80941Smrg gs_iface.pBuilder = this; 672b8e80941Smrg gs_iface.pGsCtx = pGsCtx; 673b8e80941Smrg gs_iface.pGsState = pGS; 674b8e80941Smrg gs_iface.num_outputs = gs->info.base.num_outputs; 675b8e80941Smrg gs_iface.num_verts_per_prim = 676b8e80941Smrg u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); 677b8e80941Smrg gs_iface.info = info; 678b8e80941Smrg gs_iface.pVtxAttribMap = vtxAttribMap; 679b8e80941Smrg 680b8e80941Smrg lp_build_tgsi_soa(gallivm, 681b8e80941Smrg gs->pipe.tokens, 682b8e80941Smrg lp_type_float_vec(32, 32 * 8), 683b8e80941Smrg &mask, 684b8e80941Smrg wrap(consts_ptr), 685b8e80941Smrg wrap(const_sizes_ptr), 686b8e80941Smrg &system_values, 687b8e80941Smrg inputs, 688b8e80941Smrg outputs, 689b8e80941Smrg wrap(hPrivateData), // (sampler context) 690b8e80941Smrg NULL, // thread data 691b8e80941Smrg sampler, 692b8e80941Smrg &gs->info.base, 693b8e80941Smrg &gs_iface.base); 694b8e80941Smrg 695b8e80941Smrg lp_build_mask_end(&mask); 696b8e80941Smrg 697b8e80941Smrg sampler->destroy(sampler); 698b8e80941Smrg 699b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 700b8e80941Smrg 701b8e80941Smrg RET_VOID(); 702b8e80941Smrg 703b8e80941Smrg gallivm_verify_function(gallivm, wrap(pFunction)); 704b8e80941Smrg gallivm_compile_module(gallivm); 705b8e80941Smrg 706b8e80941Smrg PFN_GS_FUNC pFunc = 707b8e80941Smrg (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 708b8e80941Smrg 709b8e80941Smrg debug_printf("geom shader %p\n", pFunc); 710b8e80941Smrg assert(pFunc && "Error: GeomShader = NULL"); 711b8e80941Smrg 712b8e80941Smrg JM()->mIsModuleFinalized = true; 713b8e80941Smrg 714b8e80941Smrg return pFunc; 715b8e80941Smrg} 716b8e80941Smrg 717b8e80941SmrgPFN_GS_FUNC 718b8e80941Smrgswr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key) 719b8e80941Smrg{ 720b8e80941Smrg BuilderSWR builder( 721b8e80941Smrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 722b8e80941Smrg "GS"); 723b8e80941Smrg PFN_GS_FUNC func = builder.CompileGS(ctx, key); 724b8e80941Smrg 725b8e80941Smrg ctx->gs->map.insert(std::make_pair(key, make_unique<VariantGS>(builder.gallivm, func))); 726b8e80941Smrg return func; 727b8e80941Smrg} 728b8e80941Smrg 729b8e80941Smrgvoid 730b8e80941SmrgBuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel) 731b8e80941Smrg{ 732b8e80941Smrg#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS 733b8e80941Smrg // interleave the simdvertex components into the dest simd16vertex 734b8e80941Smrg // slot16offset = slot8offset * 2 735b8e80941Smrg // comp16offset = comp8offset * 2 + alternateOffset 736b8e80941Smrg 737b8e80941Smrg Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset }); 738b8e80941Smrg Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } ); 739b8e80941Smrg STORE(pVal, pOut, {channel * 2}); 740b8e80941Smrg#else 741b8e80941Smrg Value *pOut = GEP(pVtxOutput, {0, 0, slot}); 742b8e80941Smrg STORE(pVal, pOut, {0, channel}); 743b8e80941Smrg#endif 744b8e80941Smrg} 745b8e80941Smrg 746b8e80941SmrgPFN_VERTEX_FUNC 747b8e80941SmrgBuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) 748b8e80941Smrg{ 749b8e80941Smrg struct swr_vertex_shader *swr_vs = ctx->vs; 750b8e80941Smrg 751b8e80941Smrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 752b8e80941Smrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 753b8e80941Smrg 754b8e80941Smrg memset(outputs, 0, sizeof(outputs)); 755b8e80941Smrg 756b8e80941Smrg AttrBuilder attrBuilder; 757b8e80941Smrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 758b8e80941Smrg 759b8e80941Smrg std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 760b8e80941Smrg PointerType::get(mInt8Ty, 0), 761b8e80941Smrg PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; 762b8e80941Smrg FunctionType *vsFuncType = 763b8e80941Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); 764b8e80941Smrg 765b8e80941Smrg // create new vertex shader function 766b8e80941Smrg auto pFunction = Function::Create(vsFuncType, 767b8e80941Smrg GlobalValue::ExternalLinkage, 768b8e80941Smrg "VS", 769b8e80941Smrg JM()->mpCurrentModule); 770b8e80941Smrg#if HAVE_LLVM < 0x0500 771b8e80941Smrg AttributeSet attrSet = AttributeSet::get( 772b8e80941Smrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 773b8e80941Smrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 774b8e80941Smrg#else 775b8e80941Smrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 776b8e80941Smrg#endif 777b8e80941Smrg 778b8e80941Smrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 779b8e80941Smrg IRB()->SetInsertPoint(block); 780b8e80941Smrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 781b8e80941Smrg 782b8e80941Smrg auto argitr = pFunction->arg_begin(); 783b8e80941Smrg Value *hPrivateData = &*argitr++; 784b8e80941Smrg hPrivateData->setName("hPrivateData"); 785b8e80941Smrg Value *pWorkerData = &*argitr++; 786b8e80941Smrg pWorkerData->setName("pWorkerData"); 787b8e80941Smrg Value *pVsCtx = &*argitr++; 788b8e80941Smrg pVsCtx->setName("vsCtx"); 789b8e80941Smrg 790b8e80941Smrg Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); 791b8e80941Smrg 792b8e80941Smrg consts_ptr->setName("vs_constants"); 793b8e80941Smrg Value *const_sizes_ptr = 794b8e80941Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); 795b8e80941Smrg const_sizes_ptr->setName("num_vs_constants"); 796b8e80941Smrg 797b8e80941Smrg Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); 798b8e80941Smrg#if USE_SIMD16_VS 799b8e80941Smrg vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0)); 800b8e80941Smrg#endif 801b8e80941Smrg 802b8e80941Smrg for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 803b8e80941Smrg const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; 804b8e80941Smrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 805b8e80941Smrg if (mask & (1 << channel)) { 806b8e80941Smrg inputs[attrib][channel] = 807b8e80941Smrg wrap(LOAD(vtxInput, {0, 0, attrib, channel})); 808b8e80941Smrg } 809b8e80941Smrg } 810b8e80941Smrg } 811b8e80941Smrg 812b8e80941Smrg struct lp_build_sampler_soa *sampler = 813b8e80941Smrg swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); 814b8e80941Smrg 815b8e80941Smrg struct lp_bld_tgsi_system_values system_values; 816b8e80941Smrg memset(&system_values, 0, sizeof(system_values)); 817b8e80941Smrg system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); 818b8e80941Smrg 819b8e80941Smrg#if USE_SIMD16_VS 820b8e80941Smrg system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16})); 821b8e80941Smrg#else 822b8e80941Smrg system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); 823b8e80941Smrg#endif 824b8e80941Smrg 825b8e80941Smrg#if USE_SIMD16_VS 826b8e80941Smrg uint32_t vectorWidth = mVWidth16; 827b8e80941Smrg#else 828b8e80941Smrg uint32_t vectorWidth = mVWidth; 829b8e80941Smrg#endif 830b8e80941Smrg 831b8e80941Smrg lp_build_tgsi_soa(gallivm, 832b8e80941Smrg swr_vs->pipe.tokens, 833b8e80941Smrg lp_type_float_vec(32, 32 * vectorWidth), 834b8e80941Smrg NULL, // mask 835b8e80941Smrg wrap(consts_ptr), 836b8e80941Smrg wrap(const_sizes_ptr), 837b8e80941Smrg &system_values, 838b8e80941Smrg inputs, 839b8e80941Smrg outputs, 840b8e80941Smrg wrap(hPrivateData), // (sampler context) 841b8e80941Smrg NULL, // thread data 842b8e80941Smrg sampler, // sampler 843b8e80941Smrg &swr_vs->info.base, 844b8e80941Smrg NULL); // geometry shader face 845b8e80941Smrg 846b8e80941Smrg sampler->destroy(sampler); 847b8e80941Smrg 848b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 849b8e80941Smrg 850b8e80941Smrg Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); 851b8e80941Smrg#if USE_SIMD16_VS 852b8e80941Smrg vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0)); 853b8e80941Smrg#endif 854b8e80941Smrg 855b8e80941Smrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 856b8e80941Smrg for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 857b8e80941Smrg if (!outputs[attrib][channel]) 858b8e80941Smrg continue; 859b8e80941Smrg 860b8e80941Smrg Value *val; 861b8e80941Smrg uint32_t outSlot; 862b8e80941Smrg 863b8e80941Smrg if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { 864b8e80941Smrg if (channel != VERTEX_SGV_POINT_SIZE_COMP) 865b8e80941Smrg continue; 866b8e80941Smrg val = LOAD(unwrap(outputs[attrib][0])); 867b8e80941Smrg outSlot = VERTEX_SGV_SLOT; 868b8e80941Smrg } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { 869b8e80941Smrg val = LOAD(unwrap(outputs[attrib][channel])); 870b8e80941Smrg outSlot = VERTEX_POSITION_SLOT; 871b8e80941Smrg } else { 872b8e80941Smrg val = LOAD(unwrap(outputs[attrib][channel])); 873b8e80941Smrg outSlot = VERTEX_ATTRIB_START_SLOT + attrib; 874b8e80941Smrg if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) 875b8e80941Smrg outSlot--; 876b8e80941Smrg } 877b8e80941Smrg 878b8e80941Smrg WriteVS(val, pVsCtx, vtxOutput, outSlot, channel); 879b8e80941Smrg } 880b8e80941Smrg } 881b8e80941Smrg 882b8e80941Smrg if (ctx->rasterizer->clip_plane_enable || 883b8e80941Smrg swr_vs->info.base.culldist_writemask) { 884b8e80941Smrg unsigned clip_mask = ctx->rasterizer->clip_plane_enable; 885b8e80941Smrg 886b8e80941Smrg unsigned cv = 0; 887b8e80941Smrg if (swr_vs->info.base.writes_clipvertex) { 888b8e80941Smrg cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, 889b8e80941Smrg &swr_vs->info.base); 890b8e80941Smrg } else { 891b8e80941Smrg for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 892b8e80941Smrg if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 893b8e80941Smrg swr_vs->info.base.output_semantic_index[i] == 0) { 894b8e80941Smrg cv = i; 895b8e80941Smrg break; 896b8e80941Smrg } 897b8e80941Smrg } 898b8e80941Smrg } 899b8e80941Smrg LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], ""); 900b8e80941Smrg LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], ""); 901b8e80941Smrg LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], ""); 902b8e80941Smrg LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], ""); 903b8e80941Smrg 904b8e80941Smrg for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) { 905b8e80941Smrg // clip distance overrides user clip planes 906b8e80941Smrg if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) || 907b8e80941Smrg ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) { 908b8e80941Smrg unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, 909b8e80941Smrg &swr_vs->info.base); 910b8e80941Smrg if (val < 4) { 911b8e80941Smrg LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); 912b8e80941Smrg WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 913b8e80941Smrg } else { 914b8e80941Smrg LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], ""); 915b8e80941Smrg WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 916b8e80941Smrg } 917b8e80941Smrg continue; 918b8e80941Smrg } 919b8e80941Smrg 920b8e80941Smrg if (!(clip_mask & (1 << val))) 921b8e80941Smrg continue; 922b8e80941Smrg 923b8e80941Smrg Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0})); 924b8e80941Smrg Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1})); 925b8e80941Smrg Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2})); 926b8e80941Smrg Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3})); 927b8e80941Smrg#if USE_SIMD16_VS 928b8e80941Smrg Value *bpx = VBROADCAST_16(px); 929b8e80941Smrg Value *bpy = VBROADCAST_16(py); 930b8e80941Smrg Value *bpz = VBROADCAST_16(pz); 931b8e80941Smrg Value *bpw = VBROADCAST_16(pw); 932b8e80941Smrg#else 933b8e80941Smrg Value *bpx = VBROADCAST(px); 934b8e80941Smrg Value *bpy = VBROADCAST(py); 935b8e80941Smrg Value *bpz = VBROADCAST(pz); 936b8e80941Smrg Value *bpw = VBROADCAST(pw); 937b8e80941Smrg#endif 938b8e80941Smrg Value *dist = FADD(FMUL(unwrap(cx), bpx), 939b8e80941Smrg FADD(FMUL(unwrap(cy), bpy), 940b8e80941Smrg FADD(FMUL(unwrap(cz), bpz), 941b8e80941Smrg FMUL(unwrap(cw), bpw)))); 942b8e80941Smrg 943b8e80941Smrg if (val < 4) 944b8e80941Smrg WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); 945b8e80941Smrg else 946b8e80941Smrg WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4); 947b8e80941Smrg } 948b8e80941Smrg } 949b8e80941Smrg 950b8e80941Smrg RET_VOID(); 951b8e80941Smrg 952b8e80941Smrg gallivm_verify_function(gallivm, wrap(pFunction)); 953b8e80941Smrg gallivm_compile_module(gallivm); 954b8e80941Smrg 955b8e80941Smrg // lp_debug_dump_value(func); 956b8e80941Smrg 957b8e80941Smrg PFN_VERTEX_FUNC pFunc = 958b8e80941Smrg (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 959b8e80941Smrg 960b8e80941Smrg debug_printf("vert shader %p\n", pFunc); 961b8e80941Smrg assert(pFunc && "Error: VertShader = NULL"); 962b8e80941Smrg 963b8e80941Smrg JM()->mIsModuleFinalized = true; 964b8e80941Smrg 965b8e80941Smrg return pFunc; 966b8e80941Smrg} 967b8e80941Smrg 968b8e80941SmrgPFN_VERTEX_FUNC 969b8e80941Smrgswr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) 970b8e80941Smrg{ 971b8e80941Smrg if (!ctx->vs->pipe.tokens) 972b8e80941Smrg return NULL; 973b8e80941Smrg 974b8e80941Smrg BuilderSWR builder( 975b8e80941Smrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 976b8e80941Smrg "VS"); 977b8e80941Smrg PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); 978b8e80941Smrg 979b8e80941Smrg ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func))); 980b8e80941Smrg return func; 981b8e80941Smrg} 982b8e80941Smrg 983b8e80941Smrgunsigned 984b8e80941Smrgswr_so_adjust_attrib(unsigned in_attrib, 985b8e80941Smrg swr_vertex_shader *swr_vs) 986b8e80941Smrg{ 987b8e80941Smrg ubyte semantic_name; 988b8e80941Smrg unsigned attrib; 989b8e80941Smrg 990b8e80941Smrg attrib = in_attrib + VERTEX_ATTRIB_START_SLOT; 991b8e80941Smrg 992b8e80941Smrg if (swr_vs) { 993b8e80941Smrg semantic_name = swr_vs->info.base.output_semantic_name[in_attrib]; 994b8e80941Smrg if (semantic_name == TGSI_SEMANTIC_POSITION) { 995b8e80941Smrg attrib = VERTEX_POSITION_SLOT; 996b8e80941Smrg } else if (semantic_name == TGSI_SEMANTIC_PSIZE) { 997b8e80941Smrg attrib = VERTEX_SGV_SLOT; 998b8e80941Smrg } else if (semantic_name == TGSI_SEMANTIC_LAYER) { 999b8e80941Smrg attrib = VERTEX_SGV_SLOT; 1000b8e80941Smrg } else { 1001b8e80941Smrg if (swr_vs->info.base.writes_position) { 1002b8e80941Smrg attrib--; 1003b8e80941Smrg } 1004b8e80941Smrg } 1005b8e80941Smrg } 1006b8e80941Smrg 1007b8e80941Smrg return attrib; 1008b8e80941Smrg} 1009b8e80941Smrg 1010b8e80941Smrgstatic unsigned 1011b8e80941Smrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) 1012b8e80941Smrg{ 1013b8e80941Smrg for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 1014b8e80941Smrg if ((info->output_semantic_name[i] == name) 1015b8e80941Smrg && (info->output_semantic_index[i] == index)) { 1016b8e80941Smrg return i; 1017b8e80941Smrg } 1018b8e80941Smrg } 1019b8e80941Smrg 1020b8e80941Smrg return 0xFFFFFFFF; 1021b8e80941Smrg} 1022b8e80941Smrg 1023b8e80941SmrgPFN_PIXEL_KERNEL 1024b8e80941SmrgBuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) 1025b8e80941Smrg{ 1026b8e80941Smrg struct swr_fragment_shader *swr_fs = ctx->fs; 1027b8e80941Smrg 1028b8e80941Smrg struct tgsi_shader_info *pPrevShader; 1029b8e80941Smrg if (ctx->gs) 1030b8e80941Smrg pPrevShader = &ctx->gs->info.base; 1031b8e80941Smrg else 1032b8e80941Smrg pPrevShader = &ctx->vs->info.base; 1033b8e80941Smrg 1034b8e80941Smrg LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 1035b8e80941Smrg LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 1036b8e80941Smrg 1037b8e80941Smrg memset(inputs, 0, sizeof(inputs)); 1038b8e80941Smrg memset(outputs, 0, sizeof(outputs)); 1039b8e80941Smrg 1040b8e80941Smrg struct lp_build_sampler_soa *sampler = NULL; 1041b8e80941Smrg 1042b8e80941Smrg AttrBuilder attrBuilder; 1043b8e80941Smrg attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 1044b8e80941Smrg 1045b8e80941Smrg std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 1046b8e80941Smrg PointerType::get(mInt8Ty, 0), 1047b8e80941Smrg PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; 1048b8e80941Smrg FunctionType *funcType = 1049b8e80941Smrg FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); 1050b8e80941Smrg 1051b8e80941Smrg auto pFunction = Function::Create(funcType, 1052b8e80941Smrg GlobalValue::ExternalLinkage, 1053b8e80941Smrg "FS", 1054b8e80941Smrg JM()->mpCurrentModule); 1055b8e80941Smrg#if HAVE_LLVM < 0x0500 1056b8e80941Smrg AttributeSet attrSet = AttributeSet::get( 1057b8e80941Smrg JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 1058b8e80941Smrg pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 1059b8e80941Smrg#else 1060b8e80941Smrg pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder); 1061b8e80941Smrg#endif 1062b8e80941Smrg 1063b8e80941Smrg BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 1064b8e80941Smrg IRB()->SetInsertPoint(block); 1065b8e80941Smrg LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 1066b8e80941Smrg 1067b8e80941Smrg auto args = pFunction->arg_begin(); 1068b8e80941Smrg Value *hPrivateData = &*args++; 1069b8e80941Smrg hPrivateData->setName("hPrivateData"); 1070b8e80941Smrg Value *pWorkerData = &*args++; 1071b8e80941Smrg pWorkerData->setName("pWorkerData"); 1072b8e80941Smrg Value *pPS = &*args++; 1073b8e80941Smrg pPS->setName("psCtx"); 1074b8e80941Smrg 1075b8e80941Smrg Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); 1076b8e80941Smrg consts_ptr->setName("fs_constants"); 1077b8e80941Smrg Value *const_sizes_ptr = 1078b8e80941Smrg GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); 1079b8e80941Smrg const_sizes_ptr->setName("num_fs_constants"); 1080b8e80941Smrg 1081b8e80941Smrg // load *pAttribs, *pPerspAttribs 1082b8e80941Smrg Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); 1083b8e80941Smrg Value *pPerspAttribs = 1084b8e80941Smrg LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); 1085b8e80941Smrg 1086b8e80941Smrg swr_fs->constantMask = 0; 1087b8e80941Smrg swr_fs->flatConstantMask = 0; 1088b8e80941Smrg swr_fs->pointSpriteMask = 0; 1089b8e80941Smrg 1090b8e80941Smrg for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 1091b8e80941Smrg const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; 1092b8e80941Smrg const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; 1093b8e80941Smrg const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; 1094b8e80941Smrg 1095b8e80941Smrg if (!mask) 1096b8e80941Smrg continue; 1097b8e80941Smrg 1098b8e80941Smrg // load i,j 1099b8e80941Smrg Value *vi = nullptr, *vj = nullptr; 1100b8e80941Smrg switch (interpLoc) { 1101b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTER: 1102b8e80941Smrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); 1103b8e80941Smrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); 1104b8e80941Smrg break; 1105b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTROID: 1106b8e80941Smrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); 1107b8e80941Smrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); 1108b8e80941Smrg break; 1109b8e80941Smrg case TGSI_INTERPOLATE_LOC_SAMPLE: 1110b8e80941Smrg vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); 1111b8e80941Smrg vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); 1112b8e80941Smrg break; 1113b8e80941Smrg } 1114b8e80941Smrg 1115b8e80941Smrg // load/compute w 1116b8e80941Smrg Value *vw = nullptr, *pAttribs; 1117b8e80941Smrg if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 1118b8e80941Smrg interpMode == TGSI_INTERPOLATE_COLOR) { 1119b8e80941Smrg pAttribs = pPerspAttribs; 1120b8e80941Smrg switch (interpLoc) { 1121b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTER: 1122b8e80941Smrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); 1123b8e80941Smrg break; 1124b8e80941Smrg case TGSI_INTERPOLATE_LOC_CENTROID: 1125b8e80941Smrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); 1126b8e80941Smrg break; 1127b8e80941Smrg case TGSI_INTERPOLATE_LOC_SAMPLE: 1128b8e80941Smrg vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); 1129b8e80941Smrg break; 1130b8e80941Smrg } 1131b8e80941Smrg } else { 1132b8e80941Smrg pAttribs = pRawAttribs; 1133b8e80941Smrg vw = VIMMED1(1.f); 1134b8e80941Smrg } 1135b8e80941Smrg 1136b8e80941Smrg vw->setName("w"); 1137b8e80941Smrg 1138b8e80941Smrg ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; 1139b8e80941Smrg ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; 1140b8e80941Smrg 1141b8e80941Smrg if (semantic_name == TGSI_SEMANTIC_FACE) { 1142b8e80941Smrg Value *ff = 1143b8e80941Smrg UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); 1144b8e80941Smrg ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); 1145b8e80941Smrg ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); 1146b8e80941Smrg 1147b8e80941Smrg inputs[attrib][0] = wrap(ff); 1148b8e80941Smrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 1149b8e80941Smrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 1150b8e80941Smrg inputs[attrib][3] = wrap(VIMMED1(1.0f)); 1151b8e80941Smrg continue; 1152b8e80941Smrg } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord 1153b8e80941Smrg if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == 1154b8e80941Smrg TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) { 1155b8e80941Smrg inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); 1156b8e80941Smrg inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); 1157b8e80941Smrg } else { 1158b8e80941Smrg inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX")); 1159b8e80941Smrg inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY")); 1160b8e80941Smrg } 1161b8e80941Smrg inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); 1162b8e80941Smrg inputs[attrib][3] = 1163b8e80941Smrg wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); 1164b8e80941Smrg continue; 1165b8e80941Smrg } 1166b8e80941Smrg 1167b8e80941Smrg unsigned linkedAttrib = 1168b8e80941Smrg locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1; 1169b8e80941Smrg 1170b8e80941Smrg uint32_t extraAttribs = 0; 1171b8e80941Smrg if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) { 1172b8e80941Smrg /* non-gs generated primID - need to grab from swizzleMap override */ 1173b8e80941Smrg linkedAttrib = pPrevShader->num_outputs - 1; 1174b8e80941Smrg swr_fs->constantMask |= 1 << linkedAttrib; 1175b8e80941Smrg extraAttribs++; 1176b8e80941Smrg } else if (semantic_name == TGSI_SEMANTIC_GENERIC && 1177b8e80941Smrg key.sprite_coord_enable & (1 << semantic_idx)) { 1178b8e80941Smrg /* we add an extra attrib to the backendState in swr_update_derived. */ 1179b8e80941Smrg linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1; 1180b8e80941Smrg swr_fs->pointSpriteMask |= (1 << linkedAttrib); 1181b8e80941Smrg extraAttribs++; 1182b8e80941Smrg } else if (linkedAttrib == 0xFFFFFFFF) { 1183b8e80941Smrg inputs[attrib][0] = wrap(VIMMED1(0.0f)); 1184b8e80941Smrg inputs[attrib][1] = wrap(VIMMED1(0.0f)); 1185b8e80941Smrg inputs[attrib][2] = wrap(VIMMED1(0.0f)); 1186b8e80941Smrg inputs[attrib][3] = wrap(VIMMED1(1.0f)); 1187b8e80941Smrg /* If we're reading in color and 2-sided lighting is enabled, we have 1188b8e80941Smrg * to keep going. 1189b8e80941Smrg */ 1190b8e80941Smrg if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside) 1191b8e80941Smrg continue; 1192b8e80941Smrg } else { 1193b8e80941Smrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 1194b8e80941Smrg swr_fs->constantMask |= 1 << linkedAttrib; 1195b8e80941Smrg } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 1196b8e80941Smrg swr_fs->flatConstantMask |= 1 << linkedAttrib; 1197b8e80941Smrg } 1198b8e80941Smrg } 1199b8e80941Smrg 1200b8e80941Smrg unsigned bcolorAttrib = 0xFFFFFFFF; 1201b8e80941Smrg Value *offset = NULL; 1202b8e80941Smrg if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { 1203b8e80941Smrg bcolorAttrib = locate_linkage( 1204b8e80941Smrg TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1; 1205b8e80941Smrg /* Neither front nor back colors were available. Nothing to load. */ 1206b8e80941Smrg if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) 1207b8e80941Smrg continue; 1208b8e80941Smrg /* If there is no front color, just always use the back color. */ 1209b8e80941Smrg if (linkedAttrib == 0xFFFFFFFF) 1210b8e80941Smrg linkedAttrib = bcolorAttrib; 1211b8e80941Smrg 1212b8e80941Smrg if (bcolorAttrib != 0xFFFFFFFF) { 1213b8e80941Smrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 1214b8e80941Smrg swr_fs->constantMask |= 1 << bcolorAttrib; 1215b8e80941Smrg } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 1216b8e80941Smrg swr_fs->flatConstantMask |= 1 << bcolorAttrib; 1217b8e80941Smrg } 1218b8e80941Smrg 1219b8e80941Smrg unsigned diff = 12 * (bcolorAttrib - linkedAttrib); 1220b8e80941Smrg 1221b8e80941Smrg if (diff) { 1222b8e80941Smrg Value *back = 1223b8e80941Smrg XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); 1224b8e80941Smrg 1225b8e80941Smrg offset = MUL(back, C(diff)); 1226b8e80941Smrg offset->setName("offset"); 1227b8e80941Smrg } 1228b8e80941Smrg } 1229b8e80941Smrg } 1230b8e80941Smrg 1231b8e80941Smrg for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 1232b8e80941Smrg if (mask & (1 << channel)) { 1233b8e80941Smrg Value *indexA = C(linkedAttrib * 12 + channel); 1234b8e80941Smrg Value *indexB = C(linkedAttrib * 12 + channel + 4); 1235b8e80941Smrg Value *indexC = C(linkedAttrib * 12 + channel + 8); 1236b8e80941Smrg 1237b8e80941Smrg if (offset) { 1238b8e80941Smrg indexA = ADD(indexA, offset); 1239b8e80941Smrg indexB = ADD(indexB, offset); 1240b8e80941Smrg indexC = ADD(indexC, offset); 1241b8e80941Smrg } 1242b8e80941Smrg 1243b8e80941Smrg Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); 1244b8e80941Smrg Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); 1245b8e80941Smrg Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); 1246b8e80941Smrg 1247b8e80941Smrg if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 1248b8e80941Smrg inputs[attrib][channel] = wrap(va); 1249b8e80941Smrg } else { 1250b8e80941Smrg Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); 1251b8e80941Smrg 1252b8e80941Smrg vc = FMUL(vk, vc); 1253b8e80941Smrg 1254b8e80941Smrg Value *interp = FMUL(va, vi); 1255b8e80941Smrg Value *interp1 = FMUL(vb, vj); 1256b8e80941Smrg interp = FADD(interp, interp1); 1257b8e80941Smrg interp = FADD(interp, vc); 1258b8e80941Smrg if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 1259b8e80941Smrg interpMode == TGSI_INTERPOLATE_COLOR) 1260b8e80941Smrg interp = FMUL(interp, vw); 1261b8e80941Smrg inputs[attrib][channel] = wrap(interp); 1262b8e80941Smrg } 1263b8e80941Smrg } 1264b8e80941Smrg } 1265b8e80941Smrg } 1266b8e80941Smrg 1267b8e80941Smrg sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); 1268b8e80941Smrg 1269b8e80941Smrg struct lp_bld_tgsi_system_values system_values; 1270b8e80941Smrg memset(&system_values, 0, sizeof(system_values)); 1271b8e80941Smrg 1272b8e80941Smrg struct lp_build_mask_context mask; 1273b8e80941Smrg bool uses_mask = false; 1274b8e80941Smrg 1275b8e80941Smrg if (swr_fs->info.base.uses_kill || 1276b8e80941Smrg key.poly_stipple_enable) { 1277b8e80941Smrg Value *vActiveMask = NULL; 1278b8e80941Smrg if (swr_fs->info.base.uses_kill) { 1279b8e80941Smrg vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); 1280b8e80941Smrg } 1281b8e80941Smrg if (key.poly_stipple_enable) { 1282b8e80941Smrg // first get fragment xy coords and clip to stipple bounds 1283b8e80941Smrg Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}); 1284b8e80941Smrg Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}); 1285b8e80941Smrg Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty); 1286b8e80941Smrg Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty); 1287b8e80941Smrg 1288b8e80941Smrg // stipple pattern is 32x32, which means that one line of stipple 1289b8e80941Smrg // is stored in one word: 1290b8e80941Smrg // vXstipple is bit offset inside 32-bit stipple word 1291b8e80941Smrg // vYstipple is word index is stipple array 1292b8e80941Smrg Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1) 1293b8e80941Smrg Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1) 1294b8e80941Smrg 1295b8e80941Smrg // grab stipple pattern base address 1296b8e80941Smrg Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0}); 1297b8e80941Smrg stipplePtr = BITCAST(stipplePtr, mInt8PtrTy); 1298b8e80941Smrg 1299b8e80941Smrg // peform a gather to grab stipple words for each lane 1300b8e80941Smrg Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple, 1301b8e80941Smrg VIMMED1(0xffffffff), 4); 1302b8e80941Smrg 1303b8e80941Smrg // create a mask with one bit corresponding to the x stipple 1304b8e80941Smrg // and AND it with the pattern, to see if we have a bit 1305b8e80941Smrg Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple); 1306b8e80941Smrg Value *vStippleMask = AND(vStipple, vBitMask); 1307b8e80941Smrg vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0)); 1308b8e80941Smrg vStippleMask = VMASK(vStippleMask); 1309b8e80941Smrg 1310b8e80941Smrg if (swr_fs->info.base.uses_kill) { 1311b8e80941Smrg vActiveMask = AND(vActiveMask, vStippleMask); 1312b8e80941Smrg } else { 1313b8e80941Smrg vActiveMask = vStippleMask; 1314b8e80941Smrg } 1315b8e80941Smrg } 1316b8e80941Smrg lp_build_mask_begin( 1317b8e80941Smrg &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask)); 1318b8e80941Smrg uses_mask = true; 1319b8e80941Smrg } 1320b8e80941Smrg 1321b8e80941Smrg lp_build_tgsi_soa(gallivm, 1322b8e80941Smrg swr_fs->pipe.tokens, 1323b8e80941Smrg lp_type_float_vec(32, 32 * 8), 1324b8e80941Smrg uses_mask ? &mask : NULL, // mask 1325b8e80941Smrg wrap(consts_ptr), 1326b8e80941Smrg wrap(const_sizes_ptr), 1327b8e80941Smrg &system_values, 1328b8e80941Smrg inputs, 1329b8e80941Smrg outputs, 1330b8e80941Smrg wrap(hPrivateData), 1331b8e80941Smrg NULL, // thread data 1332b8e80941Smrg sampler, // sampler 1333b8e80941Smrg &swr_fs->info.base, 1334b8e80941Smrg NULL); // geometry shader face 1335b8e80941Smrg 1336b8e80941Smrg sampler->destroy(sampler); 1337b8e80941Smrg 1338b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1339b8e80941Smrg 1340b8e80941Smrg for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; 1341b8e80941Smrg attrib++) { 1342b8e80941Smrg switch (swr_fs->info.base.output_semantic_name[attrib]) { 1343b8e80941Smrg case TGSI_SEMANTIC_POSITION: { 1344b8e80941Smrg // write z 1345b8e80941Smrg LLVMValueRef outZ = 1346b8e80941Smrg LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); 1347b8e80941Smrg STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); 1348b8e80941Smrg break; 1349b8e80941Smrg } 1350b8e80941Smrg case TGSI_SEMANTIC_COLOR: { 1351b8e80941Smrg for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 1352b8e80941Smrg if (!outputs[attrib][channel]) 1353b8e80941Smrg continue; 1354b8e80941Smrg 1355b8e80941Smrg LLVMValueRef out = 1356b8e80941Smrg LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); 1357b8e80941Smrg if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 1358b8e80941Smrg swr_fs->info.base.output_semantic_index[attrib] == 0) { 1359b8e80941Smrg for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { 1360b8e80941Smrg STORE(unwrap(out), 1361b8e80941Smrg pPS, 1362b8e80941Smrg {0, SWR_PS_CONTEXT_shaded, rt, channel}); 1363b8e80941Smrg } 1364b8e80941Smrg } else { 1365b8e80941Smrg STORE(unwrap(out), 1366b8e80941Smrg pPS, 1367b8e80941Smrg {0, 1368b8e80941Smrg SWR_PS_CONTEXT_shaded, 1369b8e80941Smrg swr_fs->info.base.output_semantic_index[attrib], 1370b8e80941Smrg channel}); 1371b8e80941Smrg } 1372b8e80941Smrg } 1373b8e80941Smrg break; 1374b8e80941Smrg } 1375b8e80941Smrg default: { 1376b8e80941Smrg fprintf(stderr, 1377b8e80941Smrg "unknown output from FS %s[%d]\n", 1378b8e80941Smrg tgsi_semantic_names[swr_fs->info.base 1379b8e80941Smrg .output_semantic_name[attrib]], 1380b8e80941Smrg swr_fs->info.base.output_semantic_index[attrib]); 1381b8e80941Smrg break; 1382b8e80941Smrg } 1383b8e80941Smrg } 1384b8e80941Smrg } 1385b8e80941Smrg 1386b8e80941Smrg LLVMValueRef mask_result = 0; 1387b8e80941Smrg if (uses_mask) { 1388b8e80941Smrg mask_result = lp_build_mask_end(&mask); 1389b8e80941Smrg } 1390b8e80941Smrg 1391b8e80941Smrg IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 1392b8e80941Smrg 1393b8e80941Smrg if (uses_mask) { 1394b8e80941Smrg STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); 1395b8e80941Smrg } 1396b8e80941Smrg 1397b8e80941Smrg RET_VOID(); 1398b8e80941Smrg 1399b8e80941Smrg gallivm_verify_function(gallivm, wrap(pFunction)); 1400b8e80941Smrg 1401b8e80941Smrg gallivm_compile_module(gallivm); 1402b8e80941Smrg 1403b8e80941Smrg // after the gallivm passes, we have to lower the core's intrinsics 1404b8e80941Smrg llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule); 1405b8e80941Smrg lowerPass.add(createLowerX86Pass(this)); 1406b8e80941Smrg lowerPass.run(*pFunction); 1407b8e80941Smrg 1408b8e80941Smrg PFN_PIXEL_KERNEL kernel = 1409b8e80941Smrg (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); 1410b8e80941Smrg debug_printf("frag shader %p\n", kernel); 1411b8e80941Smrg assert(kernel && "Error: FragShader = NULL"); 1412b8e80941Smrg 1413b8e80941Smrg JM()->mIsModuleFinalized = true; 1414b8e80941Smrg 1415b8e80941Smrg return kernel; 1416b8e80941Smrg} 1417b8e80941Smrg 1418b8e80941SmrgPFN_PIXEL_KERNEL 1419b8e80941Smrgswr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) 1420b8e80941Smrg{ 1421b8e80941Smrg if (!ctx->fs->pipe.tokens) 1422b8e80941Smrg return NULL; 1423b8e80941Smrg 1424b8e80941Smrg BuilderSWR builder( 1425b8e80941Smrg reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 1426b8e80941Smrg "FS"); 1427b8e80941Smrg PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key); 1428b8e80941Smrg 1429b8e80941Smrg ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func))); 1430b8e80941Smrg return func; 1431b8e80941Smrg} 1432