1/* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#ifndef SI_SHADER_PRIVATE_H 26#define SI_SHADER_PRIVATE_H 27 28#include "si_shader.h" 29#include "gallivm/lp_bld_flow.h" 30#include "gallivm/lp_bld_init.h" 31#include "gallivm/lp_bld_tgsi.h" 32#include "tgsi/tgsi_parse.h" 33#include "ac_shader_abi.h" 34 35#include <llvm-c/Core.h> 36#include <llvm-c/TargetMachine.h> 37 38struct pipe_debug_callback; 39struct ac_shader_binary; 40 41#define RADEON_LLVM_MAX_INPUT_SLOTS 32 42#define RADEON_LLVM_MAX_INPUTS 32 * 4 43#define RADEON_LLVM_MAX_OUTPUTS 32 * 4 44 45#define RADEON_LLVM_MAX_SYSTEM_VALUES 11 46#define RADEON_LLVM_MAX_ADDRS 16 47 48struct si_shader_context { 49 struct lp_build_tgsi_context bld_base; 50 struct gallivm_state gallivm; 51 struct ac_llvm_context ac; 52 struct si_shader *shader; 53 struct si_screen *screen; 54 55 unsigned type; /* PIPE_SHADER_* specifies the type of shader. */ 56 57 /* For clamping the non-constant index in resource indexing: */ 58 unsigned num_const_buffers; 59 unsigned num_shader_buffers; 60 unsigned num_images; 61 unsigned num_samplers; 62 63 struct ac_shader_abi abi; 64 65 /** This function is responsible for initilizing the inputs array and will be 66 * called once for each input declared in the TGSI shader. 67 */ 68 void (*load_input)(struct si_shader_context *, 69 unsigned input_index, 70 const struct tgsi_full_declaration *decl, 71 LLVMValueRef out[4]); 72 73 /** This array contains the input values for the shader. Typically these 74 * values will be in the form of a target intrinsic that will inform the 75 * backend how to load the actual inputs to the shader. 76 */ 77 struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS]; 78 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; 79 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; 80 LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS]; 81 82 /** This pointer is used to contain the temporary values. 83 * The amount of temporary used in tgsi can't be bound to a max value and 84 * thus we must allocate this array at runtime. 85 */ 86 LLVMValueRef *temps; 87 unsigned temps_count; 88 LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; 89 90 LLVMValueRef *imms; 91 unsigned imms_num; 92 93 struct lp_build_if_state merged_wrap_if_state; 94 95 struct tgsi_array_info *temp_arrays; 96 LLVMValueRef *temp_array_allocas; 97 98 LLVMValueRef undef_alloca; 99 100 LLVMValueRef main_fn; 101 LLVMTypeRef return_type; 102 103 /* Parameter indices for LLVMGetParam. */ 104 int param_rw_buffers; 105 int param_const_and_shader_buffers; 106 int param_samplers_and_images; 107 int param_bindless_samplers_and_images; 108 /* Common inputs for merged shaders. */ 109 int param_merged_wave_info; 110 int param_merged_scratch_offset; 111 /* API VS */ 112 int param_vertex_buffers; 113 int param_rel_auto_id; 114 int param_vs_prim_id; 115 int param_vertex_index0; 116 /* VS states and layout of LS outputs / TCS inputs at the end 117 * [0] = clamp vertex color 118 * [1] = indexed 119 * [8:20] = stride between patches in DW = num_inputs * num_vertices * 4 120 * max = 32*32*4 + 32*4 121 * [24:31] = stride between vertices in DW = num_inputs * 4 122 * max = 32*4 123 */ 124 int param_vs_state_bits; 125 int param_vs_blit_inputs; 126 /* HW VS */ 127 int param_streamout_config; 128 int param_streamout_write_index; 129 int param_streamout_offset[4]; 130 131 /* API TCS & TES */ 132 /* Layout of TCS outputs in the offchip buffer 133 * # 6 bits 134 * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40) 135 * # 6 bits 136 * [6:11] = the number of output vertices per patch, max = 32 137 * # 20 bits 138 * [12:31] = the offset of per patch attributes in the buffer in bytes. 139 * max = NUM_PATCHES*32*32*16 140 */ 141 int param_tcs_offchip_layout; 142 143 /* API TCS */ 144 /* Offsets where TCS outputs and TCS patch outputs live in LDS: 145 * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 146 * [16:31] = TCS output patch0 offset for per-patch / 16 147 * max = (NUM_PATCHES + 1) * 32*32 148 */ 149 int param_tcs_out_lds_offsets; 150 /* Layout of TCS outputs / TES inputs: 151 * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4 152 * max = 32*32*4 + 32*4 153 * [13:18] = gl_PatchVerticesIn, max = 32 154 * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers 155 */ 156 int param_tcs_out_lds_layout; 157 int param_tcs_offchip_offset; 158 int param_tcs_factor_offset; 159 160 /* API TES */ 161 int param_tes_offchip_addr; 162 int param_tes_u; 163 int param_tes_v; 164 int param_tes_rel_patch_id; 165 /* HW ES */ 166 int param_es2gs_offset; 167 /* API GS */ 168 int param_gs2vs_offset; 169 int param_gs_wave_id; /* GFX6 */ 170 LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */ 171 int param_gs_vtx01_offset; /* in dwords (GFX9) */ 172 int param_gs_vtx23_offset; /* in dwords (GFX9) */ 173 int param_gs_vtx45_offset; /* in dwords (GFX9) */ 174 /* CS */ 175 int param_block_size; 176 int param_cs_user_data; 177 178 struct ac_llvm_compiler *compiler; 179 180 /* Preloaded descriptors. */ 181 LLVMValueRef esgs_ring; 182 LLVMValueRef gsvs_ring[4]; 183 LLVMValueRef tess_offchip_ring; 184 185 LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */ 186 LLVMValueRef gs_next_vertex[4]; 187 LLVMValueRef postponed_kill; 188 LLVMValueRef return_value; 189 190 LLVMTypeRef voidt; 191 LLVMTypeRef i1; 192 LLVMTypeRef i8; 193 LLVMTypeRef i32; 194 LLVMTypeRef i64; 195 LLVMTypeRef i128; 196 LLVMTypeRef f32; 197 LLVMTypeRef v2i32; 198 LLVMTypeRef v4i32; 199 LLVMTypeRef v4f32; 200 LLVMTypeRef v8i32; 201 202 LLVMValueRef i32_0; 203 LLVMValueRef i32_1; 204 LLVMValueRef i1false; 205 LLVMValueRef i1true; 206}; 207 208static inline struct si_shader_context * 209si_shader_context(struct lp_build_tgsi_context *bld_base) 210{ 211 return (struct si_shader_context*)bld_base; 212} 213 214static inline struct si_shader_context * 215si_shader_context_from_abi(struct ac_shader_abi *abi) 216{ 217 struct si_shader_context *ctx = NULL; 218 return container_of(abi, ctx, abi); 219} 220 221unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, 222 struct ac_llvm_compiler *compiler, 223 struct pipe_debug_callback *debug, 224 bool less_optimized); 225 226LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, 227 enum tgsi_opcode_type type); 228 229LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, 230 enum tgsi_opcode_type type, LLVMValueRef value); 231 232LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, 233 LLVMValueRef index, 234 unsigned num); 235 236void si_llvm_context_init(struct si_shader_context *ctx, 237 struct si_screen *sscreen, 238 struct ac_llvm_compiler *compiler); 239void si_llvm_context_set_tgsi(struct si_shader_context *ctx, 240 struct si_shader *shader); 241 242void si_llvm_create_func(struct si_shader_context *ctx, 243 const char *name, 244 LLVMTypeRef *return_types, unsigned num_return_elems, 245 LLVMTypeRef *ParamTypes, unsigned ParamCount); 246 247void si_llvm_dispose(struct si_shader_context *ctx); 248 249void si_llvm_optimize_module(struct si_shader_context *ctx); 250 251LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, 252 LLVMTypeRef type, 253 LLVMValueRef ptr, 254 LLVMValueRef ptr2); 255 256LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, 257 const struct tgsi_full_src_register *reg, 258 enum tgsi_opcode_type type, 259 unsigned swizzle); 260 261void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible); 262 263LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, 264 LLVMTypeRef type, 265 LLVMValueRef vertex_index, 266 LLVMValueRef param_index, 267 unsigned const_index, 268 unsigned location, 269 unsigned driver_location, 270 unsigned component, 271 unsigned num_components, 272 bool is_patch, 273 bool is_compact, 274 bool load_input); 275 276LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, 277 unsigned input_index, 278 unsigned vtx_offset_param, 279 LLVMTypeRef type, 280 unsigned swizzle); 281 282LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi, 283 enum glsl_interp_mode interp, 284 unsigned location); 285 286void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, 287 const struct tgsi_full_instruction *inst, 288 const struct tgsi_opcode_info *info, 289 unsigned index, 290 LLVMValueRef dst[4]); 291 292LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, 293 const struct tgsi_ind_register *ind, 294 unsigned addr_mul, int rel_index); 295LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, 296 const struct tgsi_ind_register *ind, 297 int rel_index, unsigned num); 298LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); 299 300void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); 301void si_shader_context_init_mem(struct si_shader_context *ctx); 302 303LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, 304 LLVMValueRef list, LLVMValueRef index, 305 enum ac_descriptor_type type); 306LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, 307 LLVMValueRef list, LLVMValueRef index, 308 enum ac_descriptor_type desc_type, bool dcc_off, 309 bool bindless); 310 311void si_load_system_value(struct si_shader_context *ctx, 312 unsigned index, 313 const struct tgsi_full_declaration *decl); 314void si_declare_compute_memory(struct si_shader_context *ctx); 315void si_tgsi_declare_compute_memory(struct si_shader_context *ctx, 316 const struct tgsi_full_declaration *decl); 317 318void si_llvm_load_input_vs( 319 struct si_shader_context *ctx, 320 unsigned input_index, 321 LLVMValueRef out[4]); 322void si_llvm_load_input_fs( 323 struct si_shader_context *ctx, 324 unsigned input_index, 325 LLVMValueRef out[4]); 326 327bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir); 328 329LLVMValueRef si_unpack_param(struct si_shader_context *ctx, 330 unsigned param, unsigned rshift, 331 unsigned bitwidth); 332 333#endif 334