1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2017 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice shall be included 12b8e80941Smrg * in all copies or substantial portions of the Software. 13b8e80941Smrg * 14b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20b8e80941Smrg * DEALINGS IN THE SOFTWARE. 21b8e80941Smrg */ 22b8e80941Smrg 23b8e80941Smrg/** 24b8e80941Smrg * @file iris_program.c 25b8e80941Smrg * 26b8e80941Smrg * This file contains the driver interface for compiling shaders. 27b8e80941Smrg * 28b8e80941Smrg * See iris_program_cache.c for the in-memory program cache where the 29b8e80941Smrg * compiled shaders are stored. 30b8e80941Smrg */ 31b8e80941Smrg 32b8e80941Smrg#include <stdio.h> 33b8e80941Smrg#include <errno.h> 34b8e80941Smrg#include "pipe/p_defines.h" 35b8e80941Smrg#include "pipe/p_state.h" 36b8e80941Smrg#include "pipe/p_context.h" 37b8e80941Smrg#include "pipe/p_screen.h" 38b8e80941Smrg#include "util/u_atomic.h" 39b8e80941Smrg#include "compiler/nir/nir.h" 40b8e80941Smrg#include "compiler/nir/nir_builder.h" 41b8e80941Smrg#include "intel/compiler/brw_compiler.h" 42b8e80941Smrg#include "intel/compiler/brw_nir.h" 43b8e80941Smrg#include "iris_context.h" 44b8e80941Smrg#include "nir/tgsi_to_nir.h" 45b8e80941Smrg 46b8e80941Smrg#define KEY_INIT_NO_ID(gen) \ 47b8e80941Smrg .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \ 48b8e80941Smrg .tex.compressed_multisample_layout_mask = ~0, \ 49b8e80941Smrg .tex.msaa_16 = (gen >= 9 ? ~0 : 0) 50b8e80941Smrg#define KEY_INIT(gen) .program_string_id = ish->program_id, KEY_INIT_NO_ID(gen) 51b8e80941Smrg 52b8e80941Smrgstatic unsigned 53b8e80941Smrgget_new_program_id(struct iris_screen *screen) 54b8e80941Smrg{ 55b8e80941Smrg return p_atomic_inc_return(&screen->program_id); 56b8e80941Smrg} 57b8e80941Smrg 58b8e80941Smrg/** 59b8e80941Smrg * An uncompiled, API-facing shader. This is the Gallium CSO for shaders. 60b8e80941Smrg * It primarily contains the NIR for the shader. 61b8e80941Smrg * 62b8e80941Smrg * Each API-facing shader can be compiled into multiple shader variants, 63b8e80941Smrg * based on non-orthogonal state dependencies, recorded in the shader key. 64b8e80941Smrg * 65b8e80941Smrg * See iris_compiled_shader, which represents a compiled shader variant. 66b8e80941Smrg */ 67b8e80941Smrgstruct iris_uncompiled_shader { 68b8e80941Smrg nir_shader *nir; 69b8e80941Smrg 70b8e80941Smrg struct pipe_stream_output_info stream_output; 71b8e80941Smrg 72b8e80941Smrg unsigned program_id; 73b8e80941Smrg 74b8e80941Smrg /** Bitfield of (1 << IRIS_NOS_*) flags. */ 75b8e80941Smrg unsigned nos; 76b8e80941Smrg 77b8e80941Smrg /** Have any shader variants been compiled yet? */ 78b8e80941Smrg bool compiled_once; 79b8e80941Smrg}; 80b8e80941Smrg 81b8e80941Smrgstatic nir_ssa_def * 82b8e80941Smrgget_aoa_deref_offset(nir_builder *b, 83b8e80941Smrg nir_deref_instr *deref, 84b8e80941Smrg unsigned elem_size) 85b8e80941Smrg{ 86b8e80941Smrg unsigned array_size = elem_size; 87b8e80941Smrg nir_ssa_def *offset = nir_imm_int(b, 0); 88b8e80941Smrg 89b8e80941Smrg while (deref->deref_type != nir_deref_type_var) { 90b8e80941Smrg assert(deref->deref_type == nir_deref_type_array); 91b8e80941Smrg 92b8e80941Smrg /* This level's element size is the previous level's array size */ 93b8e80941Smrg nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 94b8e80941Smrg assert(deref->arr.index.ssa); 95b8e80941Smrg offset = nir_iadd(b, offset, 96b8e80941Smrg nir_imul(b, index, nir_imm_int(b, array_size))); 97b8e80941Smrg 98b8e80941Smrg deref = nir_deref_instr_parent(deref); 99b8e80941Smrg assert(glsl_type_is_array(deref->type)); 100b8e80941Smrg array_size *= glsl_get_length(deref->type); 101b8e80941Smrg } 102b8e80941Smrg 103b8e80941Smrg /* Accessing an invalid surface index with the dataport can result in a 104b8e80941Smrg * hang. According to the spec "if the index used to select an individual 105b8e80941Smrg * element is negative or greater than or equal to the size of the array, 106b8e80941Smrg * the results of the operation are undefined but may not lead to 107b8e80941Smrg * termination" -- which is one of the possible outcomes of the hang. 108b8e80941Smrg * Clamp the index to prevent access outside of the array bounds. 109b8e80941Smrg */ 110b8e80941Smrg return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); 111b8e80941Smrg} 112b8e80941Smrg 113b8e80941Smrgstatic void 114b8e80941Smrgiris_lower_storage_image_derefs(nir_shader *nir) 115b8e80941Smrg{ 116b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 117b8e80941Smrg 118b8e80941Smrg nir_builder b; 119b8e80941Smrg nir_builder_init(&b, impl); 120b8e80941Smrg 121b8e80941Smrg nir_foreach_block(block, impl) { 122b8e80941Smrg nir_foreach_instr_safe(instr, block) { 123b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 124b8e80941Smrg continue; 125b8e80941Smrg 126b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 127b8e80941Smrg switch (intrin->intrinsic) { 128b8e80941Smrg case nir_intrinsic_image_deref_load: 129b8e80941Smrg case nir_intrinsic_image_deref_store: 130b8e80941Smrg case nir_intrinsic_image_deref_atomic_add: 131b8e80941Smrg case nir_intrinsic_image_deref_atomic_min: 132b8e80941Smrg case nir_intrinsic_image_deref_atomic_max: 133b8e80941Smrg case nir_intrinsic_image_deref_atomic_and: 134b8e80941Smrg case nir_intrinsic_image_deref_atomic_or: 135b8e80941Smrg case nir_intrinsic_image_deref_atomic_xor: 136b8e80941Smrg case nir_intrinsic_image_deref_atomic_exchange: 137b8e80941Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 138b8e80941Smrg case nir_intrinsic_image_deref_size: 139b8e80941Smrg case nir_intrinsic_image_deref_samples: 140b8e80941Smrg case nir_intrinsic_image_deref_load_raw_intel: 141b8e80941Smrg case nir_intrinsic_image_deref_store_raw_intel: { 142b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 143b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 144b8e80941Smrg 145b8e80941Smrg b.cursor = nir_before_instr(&intrin->instr); 146b8e80941Smrg nir_ssa_def *index = 147b8e80941Smrg nir_iadd(&b, nir_imm_int(&b, var->data.driver_location), 148b8e80941Smrg get_aoa_deref_offset(&b, deref, 1)); 149b8e80941Smrg nir_rewrite_image_intrinsic(intrin, index, false); 150b8e80941Smrg break; 151b8e80941Smrg } 152b8e80941Smrg 153b8e80941Smrg default: 154b8e80941Smrg break; 155b8e80941Smrg } 156b8e80941Smrg } 157b8e80941Smrg } 158b8e80941Smrg} 159b8e80941Smrg 160b8e80941Smrg// XXX: need unify_interfaces() at link time... 161b8e80941Smrg 162b8e80941Smrg/** 163b8e80941Smrg * Fix an uncompiled shader's stream output info. 164b8e80941Smrg * 165b8e80941Smrg * Core Gallium stores output->register_index as a "slot" number, where 166b8e80941Smrg * slots are assigned consecutively to all outputs in info->outputs_written. 167b8e80941Smrg * This naive packing of outputs doesn't work for us - we too have slots, 168b8e80941Smrg * but the layout is defined by the VUE map, which we won't have until we 169b8e80941Smrg * compile a specific shader variant. So, we remap these and simply store 170b8e80941Smrg * VARYING_SLOT_* in our copy's output->register_index fields. 171b8e80941Smrg * 172b8e80941Smrg * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W 173b8e80941Smrg * components of our VUE header. See brw_vue_map.c for the layout. 174b8e80941Smrg */ 175b8e80941Smrgstatic void 176b8e80941Smrgupdate_so_info(struct pipe_stream_output_info *so_info, 177b8e80941Smrg uint64_t outputs_written) 178b8e80941Smrg{ 179b8e80941Smrg uint8_t reverse_map[64] = {}; 180b8e80941Smrg unsigned slot = 0; 181b8e80941Smrg while (outputs_written) { 182b8e80941Smrg reverse_map[slot++] = u_bit_scan64(&outputs_written); 183b8e80941Smrg } 184b8e80941Smrg 185b8e80941Smrg for (unsigned i = 0; i < so_info->num_outputs; i++) { 186b8e80941Smrg struct pipe_stream_output *output = &so_info->output[i]; 187b8e80941Smrg 188b8e80941Smrg /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 189b8e80941Smrg output->register_index = reverse_map[output->register_index]; 190b8e80941Smrg 191b8e80941Smrg /* The VUE header contains three scalar fields packed together: 192b8e80941Smrg * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w 193b8e80941Smrg * - gl_Layer is stored in VARYING_SLOT_PSIZ.y 194b8e80941Smrg * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z 195b8e80941Smrg */ 196b8e80941Smrg switch (output->register_index) { 197b8e80941Smrg case VARYING_SLOT_LAYER: 198b8e80941Smrg assert(output->num_components == 1); 199b8e80941Smrg output->register_index = VARYING_SLOT_PSIZ; 200b8e80941Smrg output->start_component = 1; 201b8e80941Smrg break; 202b8e80941Smrg case VARYING_SLOT_VIEWPORT: 203b8e80941Smrg assert(output->num_components == 1); 204b8e80941Smrg output->register_index = VARYING_SLOT_PSIZ; 205b8e80941Smrg output->start_component = 2; 206b8e80941Smrg break; 207b8e80941Smrg case VARYING_SLOT_PSIZ: 208b8e80941Smrg assert(output->num_components == 1); 209b8e80941Smrg output->start_component = 3; 210b8e80941Smrg break; 211b8e80941Smrg } 212b8e80941Smrg 213b8e80941Smrg //info->outputs_written |= 1ull << output->register_index; 214b8e80941Smrg } 215b8e80941Smrg} 216b8e80941Smrg 217b8e80941Smrg/** 218b8e80941Smrg * Sets up the starting offsets for the groups of binding table entries 219b8e80941Smrg * common to all pipeline stages. 220b8e80941Smrg * 221b8e80941Smrg * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're 222b8e80941Smrg * unused but also make sure that addition of small offsets to them will 223b8e80941Smrg * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. 224b8e80941Smrg */ 225b8e80941Smrgstatic uint32_t 226b8e80941Smrgassign_common_binding_table_offsets(const struct gen_device_info *devinfo, 227b8e80941Smrg const struct nir_shader *nir, 228b8e80941Smrg struct brw_stage_prog_data *prog_data, 229b8e80941Smrg uint32_t next_binding_table_offset, 230b8e80941Smrg unsigned num_system_values, 231b8e80941Smrg unsigned num_cbufs) 232b8e80941Smrg{ 233b8e80941Smrg const struct shader_info *info = &nir->info; 234b8e80941Smrg 235b8e80941Smrg unsigned num_textures = util_last_bit(info->textures_used); 236b8e80941Smrg 237b8e80941Smrg if (num_textures) { 238b8e80941Smrg prog_data->binding_table.texture_start = next_binding_table_offset; 239b8e80941Smrg prog_data->binding_table.gather_texture_start = next_binding_table_offset; 240b8e80941Smrg next_binding_table_offset += num_textures; 241b8e80941Smrg } else { 242b8e80941Smrg prog_data->binding_table.texture_start = 0xd0d0d0d0; 243b8e80941Smrg prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; 244b8e80941Smrg } 245b8e80941Smrg 246b8e80941Smrg if (info->num_images) { 247b8e80941Smrg prog_data->binding_table.image_start = next_binding_table_offset; 248b8e80941Smrg next_binding_table_offset += info->num_images; 249b8e80941Smrg } else { 250b8e80941Smrg prog_data->binding_table.image_start = 0xd0d0d0d0; 251b8e80941Smrg } 252b8e80941Smrg 253b8e80941Smrg if (num_cbufs) { 254b8e80941Smrg //assert(info->num_ubos <= BRW_MAX_UBO); 255b8e80941Smrg prog_data->binding_table.ubo_start = next_binding_table_offset; 256b8e80941Smrg next_binding_table_offset += num_cbufs; 257b8e80941Smrg } else { 258b8e80941Smrg prog_data->binding_table.ubo_start = 0xd0d0d0d0; 259b8e80941Smrg } 260b8e80941Smrg 261b8e80941Smrg if (info->num_ssbos || info->num_abos) { 262b8e80941Smrg prog_data->binding_table.ssbo_start = next_binding_table_offset; 263b8e80941Smrg // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment 264b8e80941Smrg next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos; 265b8e80941Smrg } else { 266b8e80941Smrg prog_data->binding_table.ssbo_start = 0xd0d0d0d0; 267b8e80941Smrg } 268b8e80941Smrg 269b8e80941Smrg prog_data->binding_table.shader_time_start = 0xd0d0d0d0; 270b8e80941Smrg 271b8e80941Smrg /* Plane 0 is just the regular texture section */ 272b8e80941Smrg prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start; 273b8e80941Smrg 274b8e80941Smrg prog_data->binding_table.plane_start[1] = next_binding_table_offset; 275b8e80941Smrg next_binding_table_offset += num_textures; 276b8e80941Smrg 277b8e80941Smrg prog_data->binding_table.plane_start[2] = next_binding_table_offset; 278b8e80941Smrg next_binding_table_offset += num_textures; 279b8e80941Smrg 280b8e80941Smrg /* Set the binding table size */ 281b8e80941Smrg prog_data->binding_table.size_bytes = next_binding_table_offset * 4; 282b8e80941Smrg 283b8e80941Smrg return next_binding_table_offset; 284b8e80941Smrg} 285b8e80941Smrg 286b8e80941Smrgstatic void 287b8e80941Smrgsetup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, 288b8e80941Smrg unsigned offset, unsigned n) 289b8e80941Smrg{ 290b8e80941Smrg assert(offset % sizeof(uint32_t) == 0); 291b8e80941Smrg 292b8e80941Smrg for (unsigned i = 0; i < n; ++i) 293b8e80941Smrg sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); 294b8e80941Smrg 295b8e80941Smrg for (unsigned i = n; i < 4; ++i) 296b8e80941Smrg sysvals[i] = BRW_PARAM_BUILTIN_ZERO; 297b8e80941Smrg} 298b8e80941Smrg 299b8e80941Smrg/** 300b8e80941Smrg * Associate NIR uniform variables with the prog_data->param[] mechanism 301b8e80941Smrg * used by the backend. Also, decide which UBOs we'd like to push in an 302b8e80941Smrg * ideal situation (though the backend can reduce this). 303b8e80941Smrg */ 304b8e80941Smrgstatic void 305b8e80941Smrgiris_setup_uniforms(const struct brw_compiler *compiler, 306b8e80941Smrg void *mem_ctx, 307b8e80941Smrg nir_shader *nir, 308b8e80941Smrg struct brw_stage_prog_data *prog_data, 309b8e80941Smrg enum brw_param_builtin **out_system_values, 310b8e80941Smrg unsigned *out_num_system_values, 311b8e80941Smrg unsigned *out_num_cbufs) 312b8e80941Smrg{ 313b8e80941Smrg UNUSED const struct gen_device_info *devinfo = compiler->devinfo; 314b8e80941Smrg 315b8e80941Smrg /* The intel compiler assumes that num_uniforms is in bytes. For 316b8e80941Smrg * scalar that means 4 bytes per uniform slot. 317b8e80941Smrg * 318b8e80941Smrg * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes. 319b8e80941Smrg */ 320b8e80941Smrg nir->num_uniforms *= 4; 321b8e80941Smrg 322b8e80941Smrg const unsigned IRIS_MAX_SYSTEM_VALUES = 323b8e80941Smrg PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; 324b8e80941Smrg enum brw_param_builtin *system_values = 325b8e80941Smrg rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES); 326b8e80941Smrg unsigned num_system_values = 0; 327b8e80941Smrg 328b8e80941Smrg unsigned patch_vert_idx = -1; 329b8e80941Smrg unsigned ucp_idx[IRIS_MAX_CLIP_PLANES]; 330b8e80941Smrg unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; 331b8e80941Smrg memset(ucp_idx, -1, sizeof(ucp_idx)); 332b8e80941Smrg memset(img_idx, -1, sizeof(img_idx)); 333b8e80941Smrg 334b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 335b8e80941Smrg 336b8e80941Smrg nir_builder b; 337b8e80941Smrg nir_builder_init(&b, impl); 338b8e80941Smrg 339b8e80941Smrg b.cursor = nir_before_block(nir_start_block(impl)); 340b8e80941Smrg nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); 341b8e80941Smrg 342b8e80941Smrg /* Turn system value intrinsics into uniforms */ 343b8e80941Smrg nir_foreach_block(block, impl) { 344b8e80941Smrg nir_foreach_instr_safe(instr, block) { 345b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 346b8e80941Smrg continue; 347b8e80941Smrg 348b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 349b8e80941Smrg nir_ssa_def *offset; 350b8e80941Smrg 351b8e80941Smrg switch (intrin->intrinsic) { 352b8e80941Smrg case nir_intrinsic_load_user_clip_plane: { 353b8e80941Smrg unsigned ucp = nir_intrinsic_ucp_id(intrin); 354b8e80941Smrg 355b8e80941Smrg if (ucp_idx[ucp] == -1) { 356b8e80941Smrg ucp_idx[ucp] = num_system_values; 357b8e80941Smrg num_system_values += 4; 358b8e80941Smrg } 359b8e80941Smrg 360b8e80941Smrg for (int i = 0; i < 4; i++) { 361b8e80941Smrg system_values[ucp_idx[ucp] + i] = 362b8e80941Smrg BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i); 363b8e80941Smrg } 364b8e80941Smrg 365b8e80941Smrg b.cursor = nir_before_instr(instr); 366b8e80941Smrg offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t)); 367b8e80941Smrg break; 368b8e80941Smrg } 369b8e80941Smrg case nir_intrinsic_load_patch_vertices_in: 370b8e80941Smrg if (patch_vert_idx == -1) 371b8e80941Smrg patch_vert_idx = num_system_values++; 372b8e80941Smrg 373b8e80941Smrg system_values[patch_vert_idx] = 374b8e80941Smrg BRW_PARAM_BUILTIN_PATCH_VERTICES_IN; 375b8e80941Smrg 376b8e80941Smrg b.cursor = nir_before_instr(instr); 377b8e80941Smrg offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t)); 378b8e80941Smrg break; 379b8e80941Smrg case nir_intrinsic_image_deref_load_param_intel: { 380b8e80941Smrg assert(devinfo->gen < 9); 381b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 382b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 383b8e80941Smrg 384b8e80941Smrg /* XXX: var->data.binding is not set properly. We need to run 385b8e80941Smrg * some form of gl_nir_lower_samplers_as_deref() to get it. 386b8e80941Smrg * This breaks tests which use more than one image. 387b8e80941Smrg */ 388b8e80941Smrg if (img_idx[var->data.binding] == -1) { 389b8e80941Smrg /* GL only allows arrays of arrays of images. */ 390b8e80941Smrg assert(glsl_type_is_image(glsl_without_array(var->type))); 391b8e80941Smrg unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type)); 392b8e80941Smrg 393b8e80941Smrg for (int i = 0; i < num_images; i++) { 394b8e80941Smrg const unsigned img = var->data.binding + i; 395b8e80941Smrg 396b8e80941Smrg img_idx[img] = num_system_values; 397b8e80941Smrg num_system_values += BRW_IMAGE_PARAM_SIZE; 398b8e80941Smrg 399b8e80941Smrg uint32_t *img_sv = &system_values[img_idx[img]]; 400b8e80941Smrg 401b8e80941Smrg setup_vec4_image_sysval( 402b8e80941Smrg img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img, 403b8e80941Smrg offsetof(struct brw_image_param, offset), 2); 404b8e80941Smrg setup_vec4_image_sysval( 405b8e80941Smrg img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img, 406b8e80941Smrg offsetof(struct brw_image_param, size), 3); 407b8e80941Smrg setup_vec4_image_sysval( 408b8e80941Smrg img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img, 409b8e80941Smrg offsetof(struct brw_image_param, stride), 4); 410b8e80941Smrg setup_vec4_image_sysval( 411b8e80941Smrg img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img, 412b8e80941Smrg offsetof(struct brw_image_param, tiling), 3); 413b8e80941Smrg setup_vec4_image_sysval( 414b8e80941Smrg img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img, 415b8e80941Smrg offsetof(struct brw_image_param, swizzling), 2); 416b8e80941Smrg } 417b8e80941Smrg } 418b8e80941Smrg 419b8e80941Smrg b.cursor = nir_before_instr(instr); 420b8e80941Smrg offset = nir_iadd(&b, 421b8e80941Smrg get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4), 422b8e80941Smrg nir_imm_int(&b, img_idx[var->data.binding] * 4 + 423b8e80941Smrg nir_intrinsic_base(intrin) * 16)); 424b8e80941Smrg break; 425b8e80941Smrg } 426b8e80941Smrg default: 427b8e80941Smrg continue; 428b8e80941Smrg } 429b8e80941Smrg 430b8e80941Smrg unsigned comps = nir_intrinsic_dest_components(intrin); 431b8e80941Smrg 432b8e80941Smrg nir_intrinsic_instr *load = 433b8e80941Smrg nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo); 434b8e80941Smrg load->num_components = comps; 435b8e80941Smrg load->src[0] = nir_src_for_ssa(temp_ubo_name); 436b8e80941Smrg load->src[1] = nir_src_for_ssa(offset); 437b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL); 438b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 439b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 440b8e80941Smrg nir_src_for_ssa(&load->dest.ssa)); 441b8e80941Smrg nir_instr_remove(instr); 442b8e80941Smrg } 443b8e80941Smrg } 444b8e80941Smrg 445b8e80941Smrg nir_validate_shader(nir, "before remapping"); 446b8e80941Smrg 447b8e80941Smrg /* Place the new params at the front of constant buffer 0. */ 448b8e80941Smrg if (num_system_values > 0) { 449b8e80941Smrg nir->num_uniforms += num_system_values * sizeof(uint32_t); 450b8e80941Smrg 451b8e80941Smrg system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin, 452b8e80941Smrg num_system_values); 453b8e80941Smrg 454b8e80941Smrg nir_foreach_block(block, impl) { 455b8e80941Smrg nir_foreach_instr_safe(instr, block) { 456b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 457b8e80941Smrg continue; 458b8e80941Smrg 459b8e80941Smrg nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); 460b8e80941Smrg 461b8e80941Smrg if (load->intrinsic != nir_intrinsic_load_ubo) 462b8e80941Smrg continue; 463b8e80941Smrg 464b8e80941Smrg b.cursor = nir_before_instr(instr); 465b8e80941Smrg 466b8e80941Smrg assert(load->src[0].is_ssa); 467b8e80941Smrg 468b8e80941Smrg if (load->src[0].ssa == temp_ubo_name) { 469b8e80941Smrg nir_instr_rewrite_src(instr, &load->src[0], 470b8e80941Smrg nir_src_for_ssa(nir_imm_int(&b, 0))); 471b8e80941Smrg } else if (nir_src_is_const(load->src[0]) && 472b8e80941Smrg nir_src_as_uint(load->src[0]) == 0) { 473b8e80941Smrg nir_ssa_def *offset = 474b8e80941Smrg nir_iadd(&b, load->src[1].ssa, 475b8e80941Smrg nir_imm_int(&b, 4 * num_system_values)); 476b8e80941Smrg nir_instr_rewrite_src(instr, &load->src[1], 477b8e80941Smrg nir_src_for_ssa(offset)); 478b8e80941Smrg } 479b8e80941Smrg } 480b8e80941Smrg } 481b8e80941Smrg 482b8e80941Smrg /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */ 483b8e80941Smrg nir_opt_constant_folding(nir); 484b8e80941Smrg } else { 485b8e80941Smrg ralloc_free(system_values); 486b8e80941Smrg system_values = NULL; 487b8e80941Smrg } 488b8e80941Smrg 489b8e80941Smrg nir_validate_shader(nir, "after remap"); 490b8e80941Smrg 491b8e80941Smrg if (nir->info.stage != MESA_SHADER_COMPUTE) 492b8e80941Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 493b8e80941Smrg 494b8e80941Smrg /* We don't use params[], but fs_visitor::nir_setup_uniforms() asserts 495b8e80941Smrg * about it for compute shaders, so go ahead and make some fake ones 496b8e80941Smrg * which the backend will dead code eliminate. 497b8e80941Smrg */ 498b8e80941Smrg prog_data->nr_params = nir->num_uniforms / 4; 499b8e80941Smrg prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params); 500b8e80941Smrg 501b8e80941Smrg /* System values and uniforms are stored in constant buffer 0, the 502b8e80941Smrg * user-facing UBOs are indexed by one. So if any constant buffer is 503b8e80941Smrg * needed, the constant buffer 0 will be needed, so account for it. 504b8e80941Smrg */ 505b8e80941Smrg unsigned num_cbufs = nir->info.num_ubos; 506b8e80941Smrg if (num_cbufs || num_system_values || nir->num_uniforms) 507b8e80941Smrg num_cbufs++; 508b8e80941Smrg 509b8e80941Smrg *out_system_values = system_values; 510b8e80941Smrg *out_num_system_values = num_system_values; 511b8e80941Smrg *out_num_cbufs = num_cbufs; 512b8e80941Smrg} 513b8e80941Smrg 514b8e80941Smrgstatic void 515b8e80941Smrgiris_debug_recompile(struct iris_context *ice, 516b8e80941Smrg struct shader_info *info, 517b8e80941Smrg unsigned program_string_id, 518b8e80941Smrg const void *key) 519b8e80941Smrg{ 520b8e80941Smrg struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen; 521b8e80941Smrg const struct brw_compiler *c = screen->compiler; 522b8e80941Smrg 523b8e80941Smrg if (!info) 524b8e80941Smrg return; 525b8e80941Smrg 526b8e80941Smrg c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n", 527b8e80941Smrg _mesa_shader_stage_to_string(info->stage), 528b8e80941Smrg info->name ? info->name : "(no identifier)", 529b8e80941Smrg info->label ? info->label : ""); 530b8e80941Smrg 531b8e80941Smrg const void *old_key = 532b8e80941Smrg iris_find_previous_compile(ice, info->stage, program_string_id); 533b8e80941Smrg 534b8e80941Smrg brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key); 535b8e80941Smrg} 536b8e80941Smrg 537b8e80941Smrg 538b8e80941Smrg/** 539b8e80941Smrg * Compile a vertex shader, and upload the assembly. 540b8e80941Smrg */ 541b8e80941Smrgstatic struct iris_compiled_shader * 542b8e80941Smrgiris_compile_vs(struct iris_context *ice, 543b8e80941Smrg struct iris_uncompiled_shader *ish, 544b8e80941Smrg const struct brw_vs_prog_key *key) 545b8e80941Smrg{ 546b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 547b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 548b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 549b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 550b8e80941Smrg struct brw_vs_prog_data *vs_prog_data = 551b8e80941Smrg rzalloc(mem_ctx, struct brw_vs_prog_data); 552b8e80941Smrg struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base; 553b8e80941Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 554b8e80941Smrg enum brw_param_builtin *system_values; 555b8e80941Smrg unsigned num_system_values; 556b8e80941Smrg unsigned num_cbufs; 557b8e80941Smrg 558b8e80941Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 559b8e80941Smrg 560b8e80941Smrg if (key->nr_userclip_plane_consts) { 561b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 562b8e80941Smrg nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true); 563b8e80941Smrg nir_lower_io_to_temporaries(nir, impl, true, false); 564b8e80941Smrg nir_lower_global_vars_to_local(nir); 565b8e80941Smrg nir_lower_vars_to_ssa(nir); 566b8e80941Smrg nir_shader_gather_info(nir, impl); 567b8e80941Smrg } 568b8e80941Smrg 569b8e80941Smrg if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0) 570b8e80941Smrg prog_data->use_alt_mode = true; 571b8e80941Smrg 572b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 573b8e80941Smrg &num_system_values, &num_cbufs); 574b8e80941Smrg 575b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, 576b8e80941Smrg num_system_values, num_cbufs); 577b8e80941Smrg 578b8e80941Smrg brw_compute_vue_map(devinfo, 579b8e80941Smrg &vue_prog_data->vue_map, nir->info.outputs_written, 580b8e80941Smrg nir->info.separate_shader); 581b8e80941Smrg 582b8e80941Smrg /* Don't tell the backend about our clip plane constants, we've already 583b8e80941Smrg * lowered them in NIR and we don't want it doing it again. 584b8e80941Smrg */ 585b8e80941Smrg struct brw_vs_prog_key key_no_ucp = *key; 586b8e80941Smrg key_no_ucp.nr_userclip_plane_consts = 0; 587b8e80941Smrg 588b8e80941Smrg char *error_str = NULL; 589b8e80941Smrg const unsigned *program = 590b8e80941Smrg brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data, 591b8e80941Smrg nir, -1, &error_str); 592b8e80941Smrg if (program == NULL) { 593b8e80941Smrg dbg_printf("Failed to compile vertex shader: %s\n", error_str); 594b8e80941Smrg ralloc_free(mem_ctx); 595b8e80941Smrg return false; 596b8e80941Smrg } 597b8e80941Smrg 598b8e80941Smrg if (ish->compiled_once) { 599b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 600b8e80941Smrg } else { 601b8e80941Smrg ish->compiled_once = true; 602b8e80941Smrg } 603b8e80941Smrg 604b8e80941Smrg uint32_t *so_decls = 605b8e80941Smrg ice->vtbl.create_so_decl_list(&ish->stream_output, 606b8e80941Smrg &vue_prog_data->vue_map); 607b8e80941Smrg 608b8e80941Smrg struct iris_compiled_shader *shader = 609b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program, 610b8e80941Smrg prog_data, so_decls, system_values, num_system_values, 611b8e80941Smrg num_cbufs); 612b8e80941Smrg 613b8e80941Smrg ralloc_free(mem_ctx); 614b8e80941Smrg return shader; 615b8e80941Smrg} 616b8e80941Smrg 617b8e80941Smrg/** 618b8e80941Smrg * Update the current vertex shader variant. 619b8e80941Smrg * 620b8e80941Smrg * Fill out the key, look in the cache, compile and bind if needed. 621b8e80941Smrg */ 622b8e80941Smrgstatic void 623b8e80941Smrgiris_update_compiled_vs(struct iris_context *ice) 624b8e80941Smrg{ 625b8e80941Smrg struct iris_uncompiled_shader *ish = 626b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_VERTEX]; 627b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 628b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 629b8e80941Smrg 630b8e80941Smrg struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) }; 631b8e80941Smrg ice->vtbl.populate_vs_key(ice, &ish->nir->info, &key); 632b8e80941Smrg 633b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS]; 634b8e80941Smrg struct iris_compiled_shader *shader = 635b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key); 636b8e80941Smrg 637b8e80941Smrg if (!shader) 638b8e80941Smrg shader = iris_compile_vs(ice, ish, &key); 639b8e80941Smrg 640b8e80941Smrg if (old != shader) { 641b8e80941Smrg ice->shaders.prog[IRIS_CACHE_VS] = shader; 642b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_VS | 643b8e80941Smrg IRIS_DIRTY_BINDINGS_VS | 644b8e80941Smrg IRIS_DIRTY_CONSTANTS_VS | 645b8e80941Smrg IRIS_DIRTY_VF_SGVS; 646b8e80941Smrg const struct brw_vs_prog_data *vs_prog_data = 647b8e80941Smrg (void *) shader->prog_data; 648b8e80941Smrg const bool uses_draw_params = vs_prog_data->uses_firstvertex || 649b8e80941Smrg vs_prog_data->uses_baseinstance; 650b8e80941Smrg const bool uses_derived_draw_params = vs_prog_data->uses_drawid || 651b8e80941Smrg vs_prog_data->uses_is_indexed_draw; 652b8e80941Smrg const bool needs_sgvs_element = uses_draw_params || 653b8e80941Smrg vs_prog_data->uses_instanceid || 654b8e80941Smrg vs_prog_data->uses_vertexid; 655b8e80941Smrg bool needs_edge_flag = false; 656b8e80941Smrg nir_foreach_variable(var, &ish->nir->inputs) { 657b8e80941Smrg if (var->data.location == VERT_ATTRIB_EDGEFLAG) 658b8e80941Smrg needs_edge_flag = true; 659b8e80941Smrg } 660b8e80941Smrg 661b8e80941Smrg if (ice->state.vs_uses_draw_params != uses_draw_params || 662b8e80941Smrg ice->state.vs_uses_derived_draw_params != uses_derived_draw_params || 663b8e80941Smrg ice->state.vs_needs_edge_flag != needs_edge_flag) { 664b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS | 665b8e80941Smrg IRIS_DIRTY_VERTEX_ELEMENTS; 666b8e80941Smrg } 667b8e80941Smrg ice->state.vs_uses_draw_params = uses_draw_params; 668b8e80941Smrg ice->state.vs_uses_derived_draw_params = uses_derived_draw_params; 669b8e80941Smrg ice->state.vs_needs_sgvs_element = needs_sgvs_element; 670b8e80941Smrg ice->state.vs_needs_edge_flag = needs_edge_flag; 671b8e80941Smrg } 672b8e80941Smrg} 673b8e80941Smrg 674b8e80941Smrg/** 675b8e80941Smrg * Get the shader_info for a given stage, or NULL if the stage is disabled. 676b8e80941Smrg */ 677b8e80941Smrgconst struct shader_info * 678b8e80941Smrgiris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage) 679b8e80941Smrg{ 680b8e80941Smrg const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage]; 681b8e80941Smrg 682b8e80941Smrg if (!ish) 683b8e80941Smrg return NULL; 684b8e80941Smrg 685b8e80941Smrg const nir_shader *nir = ish->nir; 686b8e80941Smrg return &nir->info; 687b8e80941Smrg} 688b8e80941Smrg 689b8e80941Smrg/** 690b8e80941Smrg * Get the union of TCS output and TES input slots. 691b8e80941Smrg * 692b8e80941Smrg * TCS and TES need to agree on a common URB entry layout. In particular, 693b8e80941Smrg * the data for all patch vertices is stored in a single URB entry (unlike 694b8e80941Smrg * GS which has one entry per input vertex). This means that per-vertex 695b8e80941Smrg * array indexing needs a stride. 696b8e80941Smrg * 697b8e80941Smrg * SSO requires locations to match, but doesn't require the number of 698b8e80941Smrg * outputs/inputs to match (in fact, the TCS often has extra outputs). 699b8e80941Smrg * So, we need to take the extra step of unifying these on the fly. 700b8e80941Smrg */ 701b8e80941Smrgstatic void 702b8e80941Smrgget_unified_tess_slots(const struct iris_context *ice, 703b8e80941Smrg uint64_t *per_vertex_slots, 704b8e80941Smrg uint32_t *per_patch_slots) 705b8e80941Smrg{ 706b8e80941Smrg const struct shader_info *tcs = 707b8e80941Smrg iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 708b8e80941Smrg const struct shader_info *tes = 709b8e80941Smrg iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 710b8e80941Smrg 711b8e80941Smrg *per_vertex_slots = tes->inputs_read; 712b8e80941Smrg *per_patch_slots = tes->patch_inputs_read; 713b8e80941Smrg 714b8e80941Smrg if (tcs) { 715b8e80941Smrg *per_vertex_slots |= tcs->outputs_written; 716b8e80941Smrg *per_patch_slots |= tcs->patch_outputs_written; 717b8e80941Smrg } 718b8e80941Smrg} 719b8e80941Smrg 720b8e80941Smrg/** 721b8e80941Smrg * Compile a tessellation control shader, and upload the assembly. 722b8e80941Smrg */ 723b8e80941Smrgstatic struct iris_compiled_shader * 724b8e80941Smrgiris_compile_tcs(struct iris_context *ice, 725b8e80941Smrg struct iris_uncompiled_shader *ish, 726b8e80941Smrg const struct brw_tcs_prog_key *key) 727b8e80941Smrg{ 728b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 729b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 730b8e80941Smrg const struct nir_shader_compiler_options *options = 731b8e80941Smrg compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions; 732b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 733b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 734b8e80941Smrg struct brw_tcs_prog_data *tcs_prog_data = 735b8e80941Smrg rzalloc(mem_ctx, struct brw_tcs_prog_data); 736b8e80941Smrg struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; 737b8e80941Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 738b8e80941Smrg enum brw_param_builtin *system_values = NULL; 739b8e80941Smrg unsigned num_system_values = 0; 740b8e80941Smrg unsigned num_cbufs = 0; 741b8e80941Smrg 742b8e80941Smrg nir_shader *nir; 743b8e80941Smrg 744b8e80941Smrg if (ish) { 745b8e80941Smrg nir = nir_shader_clone(mem_ctx, ish->nir); 746b8e80941Smrg 747b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 748b8e80941Smrg &num_system_values, &num_cbufs); 749b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, 750b8e80941Smrg num_system_values, num_cbufs); 751b8e80941Smrg } else { 752b8e80941Smrg nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key); 753b8e80941Smrg 754b8e80941Smrg /* Reserve space for passing the default tess levels as constants. */ 755b8e80941Smrg num_system_values = 8; 756b8e80941Smrg system_values = 757b8e80941Smrg rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values); 758b8e80941Smrg prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values); 759b8e80941Smrg prog_data->nr_params = num_system_values; 760b8e80941Smrg 761b8e80941Smrg if (key->tes_primitive_mode == GL_QUADS) { 762b8e80941Smrg for (int i = 0; i < 4; i++) 763b8e80941Smrg system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 764b8e80941Smrg 765b8e80941Smrg system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 766b8e80941Smrg system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; 767b8e80941Smrg } else if (key->tes_primitive_mode == GL_TRIANGLES) { 768b8e80941Smrg for (int i = 0; i < 3; i++) 769b8e80941Smrg system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 770b8e80941Smrg 771b8e80941Smrg system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 772b8e80941Smrg } else { 773b8e80941Smrg assert(key->tes_primitive_mode == GL_ISOLINES); 774b8e80941Smrg system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; 775b8e80941Smrg system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 776b8e80941Smrg } 777b8e80941Smrg 778b8e80941Smrg prog_data->ubo_ranges[0].length = 1; 779b8e80941Smrg } 780b8e80941Smrg 781b8e80941Smrg char *error_str = NULL; 782b8e80941Smrg const unsigned *program = 783b8e80941Smrg brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir, 784b8e80941Smrg -1, &error_str); 785b8e80941Smrg if (program == NULL) { 786b8e80941Smrg dbg_printf("Failed to compile control shader: %s\n", error_str); 787b8e80941Smrg ralloc_free(mem_ctx); 788b8e80941Smrg return false; 789b8e80941Smrg } 790b8e80941Smrg 791b8e80941Smrg if (ish) { 792b8e80941Smrg if (ish->compiled_once) { 793b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 794b8e80941Smrg } else { 795b8e80941Smrg ish->compiled_once = true; 796b8e80941Smrg } 797b8e80941Smrg } 798b8e80941Smrg 799b8e80941Smrg struct iris_compiled_shader *shader = 800b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program, 801b8e80941Smrg prog_data, NULL, system_values, num_system_values, 802b8e80941Smrg num_cbufs); 803b8e80941Smrg 804b8e80941Smrg ralloc_free(mem_ctx); 805b8e80941Smrg return shader; 806b8e80941Smrg} 807b8e80941Smrg 808b8e80941Smrg/** 809b8e80941Smrg * Update the current tessellation control shader variant. 810b8e80941Smrg * 811b8e80941Smrg * Fill out the key, look in the cache, compile and bind if needed. 812b8e80941Smrg */ 813b8e80941Smrgstatic void 814b8e80941Smrgiris_update_compiled_tcs(struct iris_context *ice) 815b8e80941Smrg{ 816b8e80941Smrg struct iris_uncompiled_shader *tcs = 817b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]; 818b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 819b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 820b8e80941Smrg 821b8e80941Smrg const struct shader_info *tes_info = 822b8e80941Smrg iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 823b8e80941Smrg struct brw_tcs_prog_key key = { 824b8e80941Smrg KEY_INIT_NO_ID(devinfo->gen), 825b8e80941Smrg .program_string_id = tcs ? tcs->program_id : 0, 826b8e80941Smrg .tes_primitive_mode = tes_info->tess.primitive_mode, 827b8e80941Smrg .input_vertices = ice->state.vertices_per_patch, 828b8e80941Smrg }; 829b8e80941Smrg get_unified_tess_slots(ice, &key.outputs_written, 830b8e80941Smrg &key.patch_outputs_written); 831b8e80941Smrg ice->vtbl.populate_tcs_key(ice, &key); 832b8e80941Smrg 833b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS]; 834b8e80941Smrg struct iris_compiled_shader *shader = 835b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key); 836b8e80941Smrg 837b8e80941Smrg if (!shader) 838b8e80941Smrg shader = iris_compile_tcs(ice, tcs, &key); 839b8e80941Smrg 840b8e80941Smrg if (old != shader) { 841b8e80941Smrg ice->shaders.prog[IRIS_CACHE_TCS] = shader; 842b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_TCS | 843b8e80941Smrg IRIS_DIRTY_BINDINGS_TCS | 844b8e80941Smrg IRIS_DIRTY_CONSTANTS_TCS; 845b8e80941Smrg } 846b8e80941Smrg} 847b8e80941Smrg 848b8e80941Smrg/** 849b8e80941Smrg * Compile a tessellation evaluation shader, and upload the assembly. 850b8e80941Smrg */ 851b8e80941Smrgstatic struct iris_compiled_shader * 852b8e80941Smrgiris_compile_tes(struct iris_context *ice, 853b8e80941Smrg struct iris_uncompiled_shader *ish, 854b8e80941Smrg const struct brw_tes_prog_key *key) 855b8e80941Smrg{ 856b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 857b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 858b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 859b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 860b8e80941Smrg struct brw_tes_prog_data *tes_prog_data = 861b8e80941Smrg rzalloc(mem_ctx, struct brw_tes_prog_data); 862b8e80941Smrg struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base; 863b8e80941Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 864b8e80941Smrg enum brw_param_builtin *system_values; 865b8e80941Smrg unsigned num_system_values; 866b8e80941Smrg unsigned num_cbufs; 867b8e80941Smrg 868b8e80941Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 869b8e80941Smrg 870b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 871b8e80941Smrg &num_system_values, &num_cbufs); 872b8e80941Smrg 873b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, 874b8e80941Smrg num_system_values, num_cbufs); 875b8e80941Smrg 876b8e80941Smrg struct brw_vue_map input_vue_map; 877b8e80941Smrg brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, 878b8e80941Smrg key->patch_inputs_read); 879b8e80941Smrg 880b8e80941Smrg char *error_str = NULL; 881b8e80941Smrg const unsigned *program = 882b8e80941Smrg brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map, 883b8e80941Smrg tes_prog_data, nir, NULL, -1, &error_str); 884b8e80941Smrg if (program == NULL) { 885b8e80941Smrg dbg_printf("Failed to compile evaluation shader: %s\n", error_str); 886b8e80941Smrg ralloc_free(mem_ctx); 887b8e80941Smrg return false; 888b8e80941Smrg } 889b8e80941Smrg 890b8e80941Smrg if (ish->compiled_once) { 891b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 892b8e80941Smrg } else { 893b8e80941Smrg ish->compiled_once = true; 894b8e80941Smrg } 895b8e80941Smrg 896b8e80941Smrg uint32_t *so_decls = 897b8e80941Smrg ice->vtbl.create_so_decl_list(&ish->stream_output, 898b8e80941Smrg &vue_prog_data->vue_map); 899b8e80941Smrg 900b8e80941Smrg 901b8e80941Smrg struct iris_compiled_shader *shader = 902b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program, 903b8e80941Smrg prog_data, so_decls, system_values, num_system_values, 904b8e80941Smrg num_cbufs); 905b8e80941Smrg 906b8e80941Smrg ralloc_free(mem_ctx); 907b8e80941Smrg return shader; 908b8e80941Smrg} 909b8e80941Smrg 910b8e80941Smrg/** 911b8e80941Smrg * Update the current tessellation evaluation shader variant. 912b8e80941Smrg * 913b8e80941Smrg * Fill out the key, look in the cache, compile and bind if needed. 914b8e80941Smrg */ 915b8e80941Smrgstatic void 916b8e80941Smrgiris_update_compiled_tes(struct iris_context *ice) 917b8e80941Smrg{ 918b8e80941Smrg struct iris_uncompiled_shader *ish = 919b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 920b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 921b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 922b8e80941Smrg 923b8e80941Smrg struct brw_tes_prog_key key = { KEY_INIT(devinfo->gen) }; 924b8e80941Smrg get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); 925b8e80941Smrg ice->vtbl.populate_tes_key(ice, &key); 926b8e80941Smrg 927b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES]; 928b8e80941Smrg struct iris_compiled_shader *shader = 929b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key); 930b8e80941Smrg 931b8e80941Smrg if (!shader) 932b8e80941Smrg shader = iris_compile_tes(ice, ish, &key); 933b8e80941Smrg 934b8e80941Smrg if (old != shader) { 935b8e80941Smrg ice->shaders.prog[IRIS_CACHE_TES] = shader; 936b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_TES | 937b8e80941Smrg IRIS_DIRTY_BINDINGS_TES | 938b8e80941Smrg IRIS_DIRTY_CONSTANTS_TES; 939b8e80941Smrg } 940b8e80941Smrg 941b8e80941Smrg /* TODO: Could compare and avoid flagging this. */ 942b8e80941Smrg const struct shader_info *tes_info = &ish->nir->info; 943b8e80941Smrg if (tes_info->system_values_read & (1ull << SYSTEM_VALUE_VERTICES_IN)) { 944b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_CONSTANTS_TES; 945b8e80941Smrg ice->state.shaders[MESA_SHADER_TESS_EVAL].cbuf0_needs_upload = true; 946b8e80941Smrg } 947b8e80941Smrg} 948b8e80941Smrg 949b8e80941Smrg/** 950b8e80941Smrg * Compile a geometry shader, and upload the assembly. 951b8e80941Smrg */ 952b8e80941Smrgstatic struct iris_compiled_shader * 953b8e80941Smrgiris_compile_gs(struct iris_context *ice, 954b8e80941Smrg struct iris_uncompiled_shader *ish, 955b8e80941Smrg const struct brw_gs_prog_key *key) 956b8e80941Smrg{ 957b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 958b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 959b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 960b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 961b8e80941Smrg struct brw_gs_prog_data *gs_prog_data = 962b8e80941Smrg rzalloc(mem_ctx, struct brw_gs_prog_data); 963b8e80941Smrg struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base; 964b8e80941Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 965b8e80941Smrg enum brw_param_builtin *system_values; 966b8e80941Smrg unsigned num_system_values; 967b8e80941Smrg unsigned num_cbufs; 968b8e80941Smrg 969b8e80941Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 970b8e80941Smrg 971b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 972b8e80941Smrg &num_system_values, &num_cbufs); 973b8e80941Smrg 974b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, 975b8e80941Smrg num_system_values, num_cbufs); 976b8e80941Smrg 977b8e80941Smrg brw_compute_vue_map(devinfo, 978b8e80941Smrg &vue_prog_data->vue_map, nir->info.outputs_written, 979b8e80941Smrg nir->info.separate_shader); 980b8e80941Smrg 981b8e80941Smrg char *error_str = NULL; 982b8e80941Smrg const unsigned *program = 983b8e80941Smrg brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir, 984b8e80941Smrg NULL, -1, &error_str); 985b8e80941Smrg if (program == NULL) { 986b8e80941Smrg dbg_printf("Failed to compile geometry shader: %s\n", error_str); 987b8e80941Smrg ralloc_free(mem_ctx); 988b8e80941Smrg return false; 989b8e80941Smrg } 990b8e80941Smrg 991b8e80941Smrg if (ish->compiled_once) { 992b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 993b8e80941Smrg } else { 994b8e80941Smrg ish->compiled_once = true; 995b8e80941Smrg } 996b8e80941Smrg 997b8e80941Smrg uint32_t *so_decls = 998b8e80941Smrg ice->vtbl.create_so_decl_list(&ish->stream_output, 999b8e80941Smrg &vue_prog_data->vue_map); 1000b8e80941Smrg 1001b8e80941Smrg struct iris_compiled_shader *shader = 1002b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program, 1003b8e80941Smrg prog_data, so_decls, system_values, num_system_values, 1004b8e80941Smrg num_cbufs); 1005b8e80941Smrg 1006b8e80941Smrg ralloc_free(mem_ctx); 1007b8e80941Smrg return shader; 1008b8e80941Smrg} 1009b8e80941Smrg 1010b8e80941Smrg/** 1011b8e80941Smrg * Update the current geometry shader variant. 1012b8e80941Smrg * 1013b8e80941Smrg * Fill out the key, look in the cache, compile and bind if needed. 1014b8e80941Smrg */ 1015b8e80941Smrgstatic void 1016b8e80941Smrgiris_update_compiled_gs(struct iris_context *ice) 1017b8e80941Smrg{ 1018b8e80941Smrg struct iris_uncompiled_shader *ish = 1019b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]; 1020b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS]; 1021b8e80941Smrg struct iris_compiled_shader *shader = NULL; 1022b8e80941Smrg 1023b8e80941Smrg if (ish) { 1024b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1025b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1026b8e80941Smrg struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) }; 1027b8e80941Smrg ice->vtbl.populate_gs_key(ice, &key); 1028b8e80941Smrg 1029b8e80941Smrg shader = 1030b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key); 1031b8e80941Smrg 1032b8e80941Smrg if (!shader) 1033b8e80941Smrg shader = iris_compile_gs(ice, ish, &key); 1034b8e80941Smrg } 1035b8e80941Smrg 1036b8e80941Smrg if (old != shader) { 1037b8e80941Smrg ice->shaders.prog[IRIS_CACHE_GS] = shader; 1038b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_GS | 1039b8e80941Smrg IRIS_DIRTY_BINDINGS_GS | 1040b8e80941Smrg IRIS_DIRTY_CONSTANTS_GS; 1041b8e80941Smrg } 1042b8e80941Smrg} 1043b8e80941Smrg 1044b8e80941Smrg/** 1045b8e80941Smrg * Compile a fragment (pixel) shader, and upload the assembly. 1046b8e80941Smrg */ 1047b8e80941Smrgstatic struct iris_compiled_shader * 1048b8e80941Smrgiris_compile_fs(struct iris_context *ice, 1049b8e80941Smrg struct iris_uncompiled_shader *ish, 1050b8e80941Smrg const struct brw_wm_prog_key *key, 1051b8e80941Smrg struct brw_vue_map *vue_map) 1052b8e80941Smrg{ 1053b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1054b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 1055b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1056b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 1057b8e80941Smrg struct brw_wm_prog_data *fs_prog_data = 1058b8e80941Smrg rzalloc(mem_ctx, struct brw_wm_prog_data); 1059b8e80941Smrg struct brw_stage_prog_data *prog_data = &fs_prog_data->base; 1060b8e80941Smrg enum brw_param_builtin *system_values; 1061b8e80941Smrg unsigned num_system_values; 1062b8e80941Smrg unsigned num_cbufs; 1063b8e80941Smrg 1064b8e80941Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1065b8e80941Smrg 1066b8e80941Smrg if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0) 1067b8e80941Smrg prog_data->use_alt_mode = true; 1068b8e80941Smrg 1069b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 1070b8e80941Smrg &num_system_values, &num_cbufs); 1071b8e80941Smrg 1072b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 1073b8e80941Smrg MAX2(key->nr_color_regions, 1), 1074b8e80941Smrg num_system_values, num_cbufs); 1075b8e80941Smrg char *error_str = NULL; 1076b8e80941Smrg const unsigned *program = 1077b8e80941Smrg brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data, 1078b8e80941Smrg nir, NULL, -1, -1, -1, true, false, vue_map, &error_str); 1079b8e80941Smrg if (program == NULL) { 1080b8e80941Smrg dbg_printf("Failed to compile fragment shader: %s\n", error_str); 1081b8e80941Smrg ralloc_free(mem_ctx); 1082b8e80941Smrg return false; 1083b8e80941Smrg } 1084b8e80941Smrg 1085b8e80941Smrg if (ish->compiled_once) { 1086b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 1087b8e80941Smrg } else { 1088b8e80941Smrg ish->compiled_once = true; 1089b8e80941Smrg } 1090b8e80941Smrg 1091b8e80941Smrg struct iris_compiled_shader *shader = 1092b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program, 1093b8e80941Smrg prog_data, NULL, system_values, num_system_values, 1094b8e80941Smrg num_cbufs); 1095b8e80941Smrg 1096b8e80941Smrg ralloc_free(mem_ctx); 1097b8e80941Smrg return shader; 1098b8e80941Smrg} 1099b8e80941Smrg 1100b8e80941Smrg/** 1101b8e80941Smrg * Update the current fragment shader variant. 1102b8e80941Smrg * 1103b8e80941Smrg * Fill out the key, look in the cache, compile and bind if needed. 1104b8e80941Smrg */ 1105b8e80941Smrgstatic void 1106b8e80941Smrgiris_update_compiled_fs(struct iris_context *ice) 1107b8e80941Smrg{ 1108b8e80941Smrg struct iris_uncompiled_shader *ish = 1109b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 1110b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1111b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1112b8e80941Smrg struct brw_wm_prog_key key = { KEY_INIT(devinfo->gen) }; 1113b8e80941Smrg ice->vtbl.populate_fs_key(ice, &key); 1114b8e80941Smrg 1115b8e80941Smrg if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP)) 1116b8e80941Smrg key.input_slots_valid = ice->shaders.last_vue_map->slots_valid; 1117b8e80941Smrg 1118b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS]; 1119b8e80941Smrg struct iris_compiled_shader *shader = 1120b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key); 1121b8e80941Smrg 1122b8e80941Smrg if (!shader) 1123b8e80941Smrg shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map); 1124b8e80941Smrg 1125b8e80941Smrg if (old != shader) { 1126b8e80941Smrg // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE 1127b8e80941Smrg // toggles. might be able to avoid flagging SBE too. 1128b8e80941Smrg ice->shaders.prog[IRIS_CACHE_FS] = shader; 1129b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_FS | 1130b8e80941Smrg IRIS_DIRTY_BINDINGS_FS | 1131b8e80941Smrg IRIS_DIRTY_CONSTANTS_FS | 1132b8e80941Smrg IRIS_DIRTY_WM | 1133b8e80941Smrg IRIS_DIRTY_CLIP | 1134b8e80941Smrg IRIS_DIRTY_SBE; 1135b8e80941Smrg } 1136b8e80941Smrg} 1137b8e80941Smrg 1138b8e80941Smrg/** 1139b8e80941Smrg * Get the compiled shader for the last enabled geometry stage. 1140b8e80941Smrg * 1141b8e80941Smrg * This stage is the one which will feed stream output and the rasterizer. 1142b8e80941Smrg */ 1143b8e80941Smrgstatic gl_shader_stage 1144b8e80941Smrglast_vue_stage(struct iris_context *ice) 1145b8e80941Smrg{ 1146b8e80941Smrg if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) 1147b8e80941Smrg return MESA_SHADER_GEOMETRY; 1148b8e80941Smrg 1149b8e80941Smrg if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) 1150b8e80941Smrg return MESA_SHADER_TESS_EVAL; 1151b8e80941Smrg 1152b8e80941Smrg return MESA_SHADER_VERTEX; 1153b8e80941Smrg} 1154b8e80941Smrg 1155b8e80941Smrg/** 1156b8e80941Smrg * Update the last enabled stage's VUE map. 1157b8e80941Smrg * 1158b8e80941Smrg * When the shader feeding the rasterizer's output interface changes, we 1159b8e80941Smrg * need to re-emit various packets. 1160b8e80941Smrg */ 1161b8e80941Smrgstatic void 1162b8e80941Smrgupdate_last_vue_map(struct iris_context *ice, 1163b8e80941Smrg struct brw_stage_prog_data *prog_data) 1164b8e80941Smrg{ 1165b8e80941Smrg struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 1166b8e80941Smrg struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 1167b8e80941Smrg struct brw_vue_map *old_map = ice->shaders.last_vue_map; 1168b8e80941Smrg const uint64_t changed_slots = 1169b8e80941Smrg (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid; 1170b8e80941Smrg 1171b8e80941Smrg if (changed_slots & VARYING_BIT_VIEWPORT) { 1172b8e80941Smrg // XXX: could use ctx->Const.MaxViewports for old API efficiency 1173b8e80941Smrg ice->state.num_viewports = 1174b8e80941Smrg (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1; 1175b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_CLIP | 1176b8e80941Smrg IRIS_DIRTY_SF_CL_VIEWPORT | 1177b8e80941Smrg IRIS_DIRTY_CC_VIEWPORT | 1178b8e80941Smrg IRIS_DIRTY_SCISSOR_RECT | 1179b8e80941Smrg IRIS_DIRTY_UNCOMPILED_FS | 1180b8e80941Smrg ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP]; 1181b8e80941Smrg // XXX: CC_VIEWPORT? 1182b8e80941Smrg } 1183b8e80941Smrg 1184b8e80941Smrg if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { 1185b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_SBE; 1186b8e80941Smrg } 1187b8e80941Smrg 1188b8e80941Smrg ice->shaders.last_vue_map = &vue_prog_data->vue_map; 1189b8e80941Smrg} 1190b8e80941Smrg 1191b8e80941Smrg/** 1192b8e80941Smrg * Get the prog_data for a given stage, or NULL if the stage is disabled. 1193b8e80941Smrg */ 1194b8e80941Smrgstatic struct brw_vue_prog_data * 1195b8e80941Smrgget_vue_prog_data(struct iris_context *ice, gl_shader_stage stage) 1196b8e80941Smrg{ 1197b8e80941Smrg if (!ice->shaders.prog[stage]) 1198b8e80941Smrg return NULL; 1199b8e80941Smrg 1200b8e80941Smrg return (void *) ice->shaders.prog[stage]->prog_data; 1201b8e80941Smrg} 1202b8e80941Smrg 1203b8e80941Smrg// XXX: iris_compiled_shaders are space-leaking :( 1204b8e80941Smrg// XXX: do remember to unbind them if deleting them. 1205b8e80941Smrg 1206b8e80941Smrg/** 1207b8e80941Smrg * Update the current shader variants for the given state. 1208b8e80941Smrg * 1209b8e80941Smrg * This should be called on every draw call to ensure that the correct 1210b8e80941Smrg * shaders are bound. It will also flag any dirty state triggered by 1211b8e80941Smrg * swapping out those shaders. 1212b8e80941Smrg */ 1213b8e80941Smrgvoid 1214b8e80941Smrgiris_update_compiled_shaders(struct iris_context *ice) 1215b8e80941Smrg{ 1216b8e80941Smrg const uint64_t dirty = ice->state.dirty; 1217b8e80941Smrg 1218b8e80941Smrg struct brw_vue_prog_data *old_prog_datas[4]; 1219b8e80941Smrg if (!(dirty & IRIS_DIRTY_URB)) { 1220b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) 1221b8e80941Smrg old_prog_datas[i] = get_vue_prog_data(ice, i); 1222b8e80941Smrg } 1223b8e80941Smrg 1224b8e80941Smrg if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) { 1225b8e80941Smrg struct iris_uncompiled_shader *tes = 1226b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 1227b8e80941Smrg if (tes) { 1228b8e80941Smrg iris_update_compiled_tcs(ice); 1229b8e80941Smrg iris_update_compiled_tes(ice); 1230b8e80941Smrg } else { 1231b8e80941Smrg ice->shaders.prog[IRIS_CACHE_TCS] = NULL; 1232b8e80941Smrg ice->shaders.prog[IRIS_CACHE_TES] = NULL; 1233b8e80941Smrg ice->state.dirty |= 1234b8e80941Smrg IRIS_DIRTY_TCS | IRIS_DIRTY_TES | 1235b8e80941Smrg IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES | 1236b8e80941Smrg IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES; 1237b8e80941Smrg } 1238b8e80941Smrg } 1239b8e80941Smrg 1240b8e80941Smrg if (dirty & IRIS_DIRTY_UNCOMPILED_VS) 1241b8e80941Smrg iris_update_compiled_vs(ice); 1242b8e80941Smrg if (dirty & IRIS_DIRTY_UNCOMPILED_GS) 1243b8e80941Smrg iris_update_compiled_gs(ice); 1244b8e80941Smrg 1245b8e80941Smrg if (dirty & (IRIS_DIRTY_UNCOMPILED_GS | IRIS_DIRTY_UNCOMPILED_TES)) { 1246b8e80941Smrg const struct iris_compiled_shader *gs = 1247b8e80941Smrg ice->shaders.prog[MESA_SHADER_GEOMETRY]; 1248b8e80941Smrg const struct iris_compiled_shader *tes = 1249b8e80941Smrg ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 1250b8e80941Smrg 1251b8e80941Smrg bool points_or_lines = false; 1252b8e80941Smrg 1253b8e80941Smrg if (gs) { 1254b8e80941Smrg const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data; 1255b8e80941Smrg points_or_lines = 1256b8e80941Smrg gs_prog_data->output_topology == _3DPRIM_POINTLIST || 1257b8e80941Smrg gs_prog_data->output_topology == _3DPRIM_LINESTRIP; 1258b8e80941Smrg } else if (tes) { 1259b8e80941Smrg const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data; 1260b8e80941Smrg points_or_lines = 1261b8e80941Smrg tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE || 1262b8e80941Smrg tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1263b8e80941Smrg } 1264b8e80941Smrg 1265b8e80941Smrg if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) { 1266b8e80941Smrg /* Outbound to XY Clip enables */ 1267b8e80941Smrg ice->shaders.output_topology_is_points_or_lines = points_or_lines; 1268b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_CLIP; 1269b8e80941Smrg } 1270b8e80941Smrg } 1271b8e80941Smrg 1272b8e80941Smrg gl_shader_stage last_stage = last_vue_stage(ice); 1273b8e80941Smrg struct iris_compiled_shader *shader = ice->shaders.prog[last_stage]; 1274b8e80941Smrg struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage]; 1275b8e80941Smrg update_last_vue_map(ice, shader->prog_data); 1276b8e80941Smrg if (ice->state.streamout != shader->streamout) { 1277b8e80941Smrg ice->state.streamout = shader->streamout; 1278b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT; 1279b8e80941Smrg } 1280b8e80941Smrg 1281b8e80941Smrg if (ice->state.streamout_active) { 1282b8e80941Smrg for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 1283b8e80941Smrg struct iris_stream_output_target *so = 1284b8e80941Smrg (void *) ice->state.so_target[i]; 1285b8e80941Smrg if (so) 1286b8e80941Smrg so->stride = ish->stream_output.stride[i]; 1287b8e80941Smrg } 1288b8e80941Smrg } 1289b8e80941Smrg 1290b8e80941Smrg if (dirty & IRIS_DIRTY_UNCOMPILED_FS) 1291b8e80941Smrg iris_update_compiled_fs(ice); 1292b8e80941Smrg 1293b8e80941Smrg /* Changing shader interfaces may require a URB configuration. */ 1294b8e80941Smrg if (!(dirty & IRIS_DIRTY_URB)) { 1295b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 1296b8e80941Smrg struct brw_vue_prog_data *old = old_prog_datas[i]; 1297b8e80941Smrg struct brw_vue_prog_data *new = get_vue_prog_data(ice, i); 1298b8e80941Smrg if (!!old != !!new || 1299b8e80941Smrg (new && new->urb_entry_size != old->urb_entry_size)) { 1300b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_URB; 1301b8e80941Smrg break; 1302b8e80941Smrg } 1303b8e80941Smrg } 1304b8e80941Smrg } 1305b8e80941Smrg} 1306b8e80941Smrg 1307b8e80941Smrgstatic struct iris_compiled_shader * 1308b8e80941Smrgiris_compile_cs(struct iris_context *ice, 1309b8e80941Smrg struct iris_uncompiled_shader *ish, 1310b8e80941Smrg const struct brw_cs_prog_key *key) 1311b8e80941Smrg{ 1312b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1313b8e80941Smrg const struct brw_compiler *compiler = screen->compiler; 1314b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1315b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 1316b8e80941Smrg struct brw_cs_prog_data *cs_prog_data = 1317b8e80941Smrg rzalloc(mem_ctx, struct brw_cs_prog_data); 1318b8e80941Smrg struct brw_stage_prog_data *prog_data = &cs_prog_data->base; 1319b8e80941Smrg enum brw_param_builtin *system_values; 1320b8e80941Smrg unsigned num_system_values; 1321b8e80941Smrg unsigned num_cbufs; 1322b8e80941Smrg 1323b8e80941Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 1324b8e80941Smrg 1325b8e80941Smrg cs_prog_data->binding_table.work_groups_start = 0; 1326b8e80941Smrg 1327b8e80941Smrg prog_data->total_shared = nir->info.cs.shared_size; 1328b8e80941Smrg 1329b8e80941Smrg iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 1330b8e80941Smrg &num_system_values, &num_cbufs); 1331b8e80941Smrg 1332b8e80941Smrg assign_common_binding_table_offsets(devinfo, nir, prog_data, 1, 1333b8e80941Smrg num_system_values, num_cbufs); 1334b8e80941Smrg 1335b8e80941Smrg char *error_str = NULL; 1336b8e80941Smrg const unsigned *program = 1337b8e80941Smrg brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data, 1338b8e80941Smrg nir, -1, &error_str); 1339b8e80941Smrg if (program == NULL) { 1340b8e80941Smrg dbg_printf("Failed to compile compute shader: %s\n", error_str); 1341b8e80941Smrg ralloc_free(mem_ctx); 1342b8e80941Smrg return false; 1343b8e80941Smrg } 1344b8e80941Smrg 1345b8e80941Smrg if (ish->compiled_once) { 1346b8e80941Smrg iris_debug_recompile(ice, &nir->info, key->program_string_id, key); 1347b8e80941Smrg } else { 1348b8e80941Smrg ish->compiled_once = true; 1349b8e80941Smrg } 1350b8e80941Smrg 1351b8e80941Smrg struct iris_compiled_shader *shader = 1352b8e80941Smrg iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program, 1353b8e80941Smrg prog_data, NULL, system_values, num_system_values, 1354b8e80941Smrg num_cbufs); 1355b8e80941Smrg 1356b8e80941Smrg ralloc_free(mem_ctx); 1357b8e80941Smrg return shader; 1358b8e80941Smrg} 1359b8e80941Smrg 1360b8e80941Smrgvoid 1361b8e80941Smrgiris_update_compiled_compute_shader(struct iris_context *ice) 1362b8e80941Smrg{ 1363b8e80941Smrg struct iris_uncompiled_shader *ish = 1364b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; 1365b8e80941Smrg 1366b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1367b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1368b8e80941Smrg struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) }; 1369b8e80941Smrg ice->vtbl.populate_cs_key(ice, &key); 1370b8e80941Smrg 1371b8e80941Smrg struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS]; 1372b8e80941Smrg struct iris_compiled_shader *shader = 1373b8e80941Smrg iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key); 1374b8e80941Smrg 1375b8e80941Smrg if (!shader) 1376b8e80941Smrg shader = iris_compile_cs(ice, ish, &key); 1377b8e80941Smrg 1378b8e80941Smrg if (old != shader) { 1379b8e80941Smrg ice->shaders.prog[IRIS_CACHE_CS] = shader; 1380b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_CS | 1381b8e80941Smrg IRIS_DIRTY_BINDINGS_CS | 1382b8e80941Smrg IRIS_DIRTY_CONSTANTS_CS; 1383b8e80941Smrg } 1384b8e80941Smrg} 1385b8e80941Smrg 1386b8e80941Smrgvoid 1387b8e80941Smrgiris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, 1388b8e80941Smrg uint32_t *dst) 1389b8e80941Smrg{ 1390b8e80941Smrg assert(cs_prog_data->push.total.size > 0); 1391b8e80941Smrg assert(cs_prog_data->push.cross_thread.size == 0); 1392b8e80941Smrg assert(cs_prog_data->push.per_thread.dwords == 1); 1393b8e80941Smrg assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 1394b8e80941Smrg for (unsigned t = 0; t < cs_prog_data->threads; t++) 1395b8e80941Smrg dst[8 * t] = t; 1396b8e80941Smrg} 1397b8e80941Smrg 1398b8e80941Smrg/** 1399b8e80941Smrg * Allocate scratch BOs as needed for the given per-thread size and stage. 1400b8e80941Smrg */ 1401b8e80941Smrgstruct iris_bo * 1402b8e80941Smrgiris_get_scratch_space(struct iris_context *ice, 1403b8e80941Smrg unsigned per_thread_scratch, 1404b8e80941Smrg gl_shader_stage stage) 1405b8e80941Smrg{ 1406b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; 1407b8e80941Smrg struct iris_bufmgr *bufmgr = screen->bufmgr; 1408b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1409b8e80941Smrg 1410b8e80941Smrg unsigned encoded_size = ffs(per_thread_scratch) - 11; 1411b8e80941Smrg assert(encoded_size < (1 << 16)); 1412b8e80941Smrg 1413b8e80941Smrg struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; 1414b8e80941Smrg 1415b8e80941Smrg /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: 1416b8e80941Smrg * 1417b8e80941Smrg * "Scratch Space per slice is computed based on 4 sub-slices. SW 1418b8e80941Smrg * must allocate scratch space enough so that each slice has 4 1419b8e80941Smrg * slices allowed." 1420b8e80941Smrg * 1421b8e80941Smrg * According to the other driver team, this applies to compute shaders 1422b8e80941Smrg * as well. This is not currently documented at all. 1423b8e80941Smrg * 1424b8e80941Smrg * This hack is no longer necessary on Gen11+. 1425b8e80941Smrg */ 1426b8e80941Smrg unsigned subslice_total = screen->subslice_total; 1427b8e80941Smrg if (devinfo->gen < 11) 1428b8e80941Smrg subslice_total = 4 * devinfo->num_slices; 1429b8e80941Smrg assert(subslice_total >= screen->subslice_total); 1430b8e80941Smrg 1431b8e80941Smrg if (!*bop) { 1432b8e80941Smrg unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; 1433b8e80941Smrg uint32_t max_threads[] = { 1434b8e80941Smrg [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, 1435b8e80941Smrg [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, 1436b8e80941Smrg [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, 1437b8e80941Smrg [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, 1438b8e80941Smrg [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, 1439b8e80941Smrg [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total, 1440b8e80941Smrg }; 1441b8e80941Smrg 1442b8e80941Smrg uint32_t size = per_thread_scratch * max_threads[stage]; 1443b8e80941Smrg 1444b8e80941Smrg *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER); 1445b8e80941Smrg } 1446b8e80941Smrg 1447b8e80941Smrg return *bop; 1448b8e80941Smrg} 1449b8e80941Smrg 1450b8e80941Smrg/* ------------------------------------------------------------------- */ 1451b8e80941Smrg 1452b8e80941Smrg/** 1453b8e80941Smrg * The pipe->create_[stage]_state() driver hooks. 1454b8e80941Smrg * 1455b8e80941Smrg * Performs basic NIR preprocessing, records any state dependencies, and 1456b8e80941Smrg * returns an iris_uncompiled_shader as the Gallium CSO. 1457b8e80941Smrg * 1458b8e80941Smrg * Actual shader compilation to assembly happens later, at first use. 1459b8e80941Smrg */ 1460b8e80941Smrgstatic void * 1461b8e80941Smrgiris_create_uncompiled_shader(struct pipe_context *ctx, 1462b8e80941Smrg nir_shader *nir, 1463b8e80941Smrg const struct pipe_stream_output_info *so_info) 1464b8e80941Smrg{ 1465b8e80941Smrg struct iris_screen *screen = (struct iris_screen *)ctx->screen; 1466b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1467b8e80941Smrg 1468b8e80941Smrg struct iris_uncompiled_shader *ish = 1469b8e80941Smrg calloc(1, sizeof(struct iris_uncompiled_shader)); 1470b8e80941Smrg if (!ish) 1471b8e80941Smrg return NULL; 1472b8e80941Smrg 1473b8e80941Smrg nir = brw_preprocess_nir(screen->compiler, nir, NULL); 1474b8e80941Smrg 1475b8e80941Smrg NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo); 1476b8e80941Smrg NIR_PASS_V(nir, iris_lower_storage_image_derefs); 1477b8e80941Smrg 1478b8e80941Smrg ish->program_id = get_new_program_id(screen); 1479b8e80941Smrg ish->nir = nir; 1480b8e80941Smrg if (so_info) { 1481b8e80941Smrg memcpy(&ish->stream_output, so_info, sizeof(*so_info)); 1482b8e80941Smrg update_so_info(&ish->stream_output, nir->info.outputs_written); 1483b8e80941Smrg } 1484b8e80941Smrg 1485b8e80941Smrg return ish; 1486b8e80941Smrg} 1487b8e80941Smrg 1488b8e80941Smrgstatic struct iris_uncompiled_shader * 1489b8e80941Smrgiris_create_shader_state(struct pipe_context *ctx, 1490b8e80941Smrg const struct pipe_shader_state *state) 1491b8e80941Smrg{ 1492b8e80941Smrg struct nir_shader *nir; 1493b8e80941Smrg 1494b8e80941Smrg if (state->type == PIPE_SHADER_IR_TGSI) 1495b8e80941Smrg nir = tgsi_to_nir(state->tokens, ctx->screen); 1496b8e80941Smrg else 1497b8e80941Smrg nir = state->ir.nir; 1498b8e80941Smrg 1499b8e80941Smrg return iris_create_uncompiled_shader(ctx, nir, &state->stream_output); 1500b8e80941Smrg} 1501b8e80941Smrg 1502b8e80941Smrgstatic void * 1503b8e80941Smrgiris_create_vs_state(struct pipe_context *ctx, 1504b8e80941Smrg const struct pipe_shader_state *state) 1505b8e80941Smrg{ 1506b8e80941Smrg struct iris_context *ice = (void *) ctx; 1507b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1508b8e80941Smrg struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); 1509b8e80941Smrg 1510b8e80941Smrg /* User clip planes */ 1511b8e80941Smrg if (ish->nir->info.clip_distance_array_size == 0) 1512b8e80941Smrg ish->nos |= (1ull << IRIS_NOS_RASTERIZER); 1513b8e80941Smrg 1514b8e80941Smrg if (screen->precompile) { 1515b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1516b8e80941Smrg struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) }; 1517b8e80941Smrg 1518b8e80941Smrg iris_compile_vs(ice, ish, &key); 1519b8e80941Smrg } 1520b8e80941Smrg 1521b8e80941Smrg return ish; 1522b8e80941Smrg} 1523b8e80941Smrg 1524b8e80941Smrgstatic void * 1525b8e80941Smrgiris_create_tcs_state(struct pipe_context *ctx, 1526b8e80941Smrg const struct pipe_shader_state *state) 1527b8e80941Smrg{ 1528b8e80941Smrg struct iris_context *ice = (void *) ctx; 1529b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1530b8e80941Smrg struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); 1531b8e80941Smrg struct shader_info *info = &ish->nir->info; 1532b8e80941Smrg 1533b8e80941Smrg // XXX: NOS? 1534b8e80941Smrg 1535b8e80941Smrg if (screen->precompile) { 1536b8e80941Smrg const unsigned _GL_TRIANGLES = 0x0004; 1537b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1538b8e80941Smrg struct brw_tcs_prog_key key = { 1539b8e80941Smrg KEY_INIT(devinfo->gen), 1540b8e80941Smrg // XXX: make sure the linker fills this out from the TES... 1541b8e80941Smrg .tes_primitive_mode = 1542b8e80941Smrg info->tess.primitive_mode ? info->tess.primitive_mode 1543b8e80941Smrg : _GL_TRIANGLES, 1544b8e80941Smrg .outputs_written = info->outputs_written, 1545b8e80941Smrg .patch_outputs_written = info->patch_outputs_written, 1546b8e80941Smrg }; 1547b8e80941Smrg 1548b8e80941Smrg iris_compile_tcs(ice, ish, &key); 1549b8e80941Smrg } 1550b8e80941Smrg 1551b8e80941Smrg return ish; 1552b8e80941Smrg} 1553b8e80941Smrg 1554b8e80941Smrgstatic void * 1555b8e80941Smrgiris_create_tes_state(struct pipe_context *ctx, 1556b8e80941Smrg const struct pipe_shader_state *state) 1557b8e80941Smrg{ 1558b8e80941Smrg struct iris_context *ice = (void *) ctx; 1559b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1560b8e80941Smrg struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); 1561b8e80941Smrg struct shader_info *info = &ish->nir->info; 1562b8e80941Smrg 1563b8e80941Smrg // XXX: NOS? 1564b8e80941Smrg 1565b8e80941Smrg if (screen->precompile) { 1566b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1567b8e80941Smrg struct brw_tes_prog_key key = { 1568b8e80941Smrg KEY_INIT(devinfo->gen), 1569b8e80941Smrg // XXX: not ideal, need TCS output/TES input unification 1570b8e80941Smrg .inputs_read = info->inputs_read, 1571b8e80941Smrg .patch_inputs_read = info->patch_inputs_read, 1572b8e80941Smrg }; 1573b8e80941Smrg 1574b8e80941Smrg iris_compile_tes(ice, ish, &key); 1575b8e80941Smrg } 1576b8e80941Smrg 1577b8e80941Smrg return ish; 1578b8e80941Smrg} 1579b8e80941Smrg 1580b8e80941Smrgstatic void * 1581b8e80941Smrgiris_create_gs_state(struct pipe_context *ctx, 1582b8e80941Smrg const struct pipe_shader_state *state) 1583b8e80941Smrg{ 1584b8e80941Smrg struct iris_context *ice = (void *) ctx; 1585b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1586b8e80941Smrg struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); 1587b8e80941Smrg 1588b8e80941Smrg // XXX: NOS? 1589b8e80941Smrg 1590b8e80941Smrg if (screen->precompile) { 1591b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1592b8e80941Smrg struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) }; 1593b8e80941Smrg 1594b8e80941Smrg iris_compile_gs(ice, ish, &key); 1595b8e80941Smrg } 1596b8e80941Smrg 1597b8e80941Smrg return ish; 1598b8e80941Smrg} 1599b8e80941Smrg 1600b8e80941Smrgstatic void * 1601b8e80941Smrgiris_create_fs_state(struct pipe_context *ctx, 1602b8e80941Smrg const struct pipe_shader_state *state) 1603b8e80941Smrg{ 1604b8e80941Smrg struct iris_context *ice = (void *) ctx; 1605b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1606b8e80941Smrg struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); 1607b8e80941Smrg struct shader_info *info = &ish->nir->info; 1608b8e80941Smrg 1609b8e80941Smrg ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) | 1610b8e80941Smrg (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) | 1611b8e80941Smrg (1ull << IRIS_NOS_RASTERIZER) | 1612b8e80941Smrg (1ull << IRIS_NOS_BLEND); 1613b8e80941Smrg 1614b8e80941Smrg /* The program key needs the VUE map if there are > 16 inputs */ 1615b8e80941Smrg if (util_bitcount64(ish->nir->info.inputs_read & 1616b8e80941Smrg BRW_FS_VARYING_INPUT_MASK) > 16) { 1617b8e80941Smrg ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP); 1618b8e80941Smrg } 1619b8e80941Smrg 1620b8e80941Smrg if (screen->precompile) { 1621b8e80941Smrg const uint64_t color_outputs = info->outputs_written & 1622b8e80941Smrg ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | 1623b8e80941Smrg BITFIELD64_BIT(FRAG_RESULT_STENCIL) | 1624b8e80941Smrg BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); 1625b8e80941Smrg 1626b8e80941Smrg bool can_rearrange_varyings = 1627b8e80941Smrg util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16; 1628b8e80941Smrg 1629b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1630b8e80941Smrg struct brw_wm_prog_key key = { 1631b8e80941Smrg KEY_INIT(devinfo->gen), 1632b8e80941Smrg .nr_color_regions = util_bitcount(color_outputs), 1633b8e80941Smrg .coherent_fb_fetch = true, 1634b8e80941Smrg .input_slots_valid = 1635b8e80941Smrg can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS, 1636b8e80941Smrg }; 1637b8e80941Smrg 1638b8e80941Smrg iris_compile_fs(ice, ish, &key, NULL); 1639b8e80941Smrg } 1640b8e80941Smrg 1641b8e80941Smrg return ish; 1642b8e80941Smrg} 1643b8e80941Smrg 1644b8e80941Smrgstatic void * 1645b8e80941Smrgiris_create_compute_state(struct pipe_context *ctx, 1646b8e80941Smrg const struct pipe_compute_state *state) 1647b8e80941Smrg{ 1648b8e80941Smrg assert(state->ir_type == PIPE_SHADER_IR_NIR); 1649b8e80941Smrg 1650b8e80941Smrg struct iris_context *ice = (void *) ctx; 1651b8e80941Smrg struct iris_screen *screen = (void *) ctx->screen; 1652b8e80941Smrg struct iris_uncompiled_shader *ish = 1653b8e80941Smrg iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL); 1654b8e80941Smrg 1655b8e80941Smrg // XXX: disallow more than 64KB of shared variables 1656b8e80941Smrg 1657b8e80941Smrg if (screen->precompile) { 1658b8e80941Smrg const struct gen_device_info *devinfo = &screen->devinfo; 1659b8e80941Smrg struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) }; 1660b8e80941Smrg 1661b8e80941Smrg iris_compile_cs(ice, ish, &key); 1662b8e80941Smrg } 1663b8e80941Smrg 1664b8e80941Smrg return ish; 1665b8e80941Smrg} 1666b8e80941Smrg 1667b8e80941Smrg/** 1668b8e80941Smrg * The pipe->delete_[stage]_state() driver hooks. 1669b8e80941Smrg * 1670b8e80941Smrg * Frees the iris_uncompiled_shader. 1671b8e80941Smrg */ 1672b8e80941Smrgstatic void 1673b8e80941Smrgiris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage) 1674b8e80941Smrg{ 1675b8e80941Smrg struct iris_uncompiled_shader *ish = state; 1676b8e80941Smrg struct iris_context *ice = (void *) ctx; 1677b8e80941Smrg 1678b8e80941Smrg if (ice->shaders.uncompiled[stage] == ish) { 1679b8e80941Smrg ice->shaders.uncompiled[stage] = NULL; 1680b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage; 1681b8e80941Smrg } 1682b8e80941Smrg 1683b8e80941Smrg ralloc_free(ish->nir); 1684b8e80941Smrg free(ish); 1685b8e80941Smrg} 1686b8e80941Smrg 1687b8e80941Smrgstatic void 1688b8e80941Smrgiris_delete_vs_state(struct pipe_context *ctx, void *state) 1689b8e80941Smrg{ 1690b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX); 1691b8e80941Smrg} 1692b8e80941Smrg 1693b8e80941Smrgstatic void 1694b8e80941Smrgiris_delete_tcs_state(struct pipe_context *ctx, void *state) 1695b8e80941Smrg{ 1696b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL); 1697b8e80941Smrg} 1698b8e80941Smrg 1699b8e80941Smrgstatic void 1700b8e80941Smrgiris_delete_tes_state(struct pipe_context *ctx, void *state) 1701b8e80941Smrg{ 1702b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL); 1703b8e80941Smrg} 1704b8e80941Smrg 1705b8e80941Smrgstatic void 1706b8e80941Smrgiris_delete_gs_state(struct pipe_context *ctx, void *state) 1707b8e80941Smrg{ 1708b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY); 1709b8e80941Smrg} 1710b8e80941Smrg 1711b8e80941Smrgstatic void 1712b8e80941Smrgiris_delete_fs_state(struct pipe_context *ctx, void *state) 1713b8e80941Smrg{ 1714b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT); 1715b8e80941Smrg} 1716b8e80941Smrg 1717b8e80941Smrgstatic void 1718b8e80941Smrgiris_delete_cs_state(struct pipe_context *ctx, void *state) 1719b8e80941Smrg{ 1720b8e80941Smrg iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE); 1721b8e80941Smrg} 1722b8e80941Smrg 1723b8e80941Smrg/** 1724b8e80941Smrg * The pipe->bind_[stage]_state() driver hook. 1725b8e80941Smrg * 1726b8e80941Smrg * Binds an uncompiled shader as the current one for a particular stage. 1727b8e80941Smrg * Updates dirty tracking to account for the shader's NOS. 1728b8e80941Smrg */ 1729b8e80941Smrgstatic void 1730b8e80941Smrgbind_state(struct iris_context *ice, 1731b8e80941Smrg struct iris_uncompiled_shader *ish, 1732b8e80941Smrg gl_shader_stage stage) 1733b8e80941Smrg{ 1734b8e80941Smrg uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage; 1735b8e80941Smrg const uint64_t nos = ish ? ish->nos : 0; 1736b8e80941Smrg 1737b8e80941Smrg const struct shader_info *old_info = iris_get_shader_info(ice, stage); 1738b8e80941Smrg const struct shader_info *new_info = ish ? &ish->nir->info : NULL; 1739b8e80941Smrg 1740b8e80941Smrg if ((old_info ? util_last_bit(old_info->textures_used) : 0) != 1741b8e80941Smrg (new_info ? util_last_bit(new_info->textures_used) : 0)) { 1742b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage; 1743b8e80941Smrg } 1744b8e80941Smrg 1745b8e80941Smrg ice->shaders.uncompiled[stage] = ish; 1746b8e80941Smrg ice->state.dirty |= dirty_bit; 1747b8e80941Smrg 1748b8e80941Smrg /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change 1749b8e80941Smrg * (or that they no longer need to do so). 1750b8e80941Smrg */ 1751b8e80941Smrg for (int i = 0; i < IRIS_NOS_COUNT; i++) { 1752b8e80941Smrg if (nos & (1 << i)) 1753b8e80941Smrg ice->state.dirty_for_nos[i] |= dirty_bit; 1754b8e80941Smrg else 1755b8e80941Smrg ice->state.dirty_for_nos[i] &= ~dirty_bit; 1756b8e80941Smrg } 1757b8e80941Smrg} 1758b8e80941Smrg 1759b8e80941Smrgstatic void 1760b8e80941Smrgiris_bind_vs_state(struct pipe_context *ctx, void *state) 1761b8e80941Smrg{ 1762b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_VERTEX); 1763b8e80941Smrg} 1764b8e80941Smrg 1765b8e80941Smrgstatic void 1766b8e80941Smrgiris_bind_tcs_state(struct pipe_context *ctx, void *state) 1767b8e80941Smrg{ 1768b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL); 1769b8e80941Smrg} 1770b8e80941Smrg 1771b8e80941Smrgstatic void 1772b8e80941Smrgiris_bind_tes_state(struct pipe_context *ctx, void *state) 1773b8e80941Smrg{ 1774b8e80941Smrg struct iris_context *ice = (struct iris_context *)ctx; 1775b8e80941Smrg 1776b8e80941Smrg /* Enabling/disabling optional stages requires a URB reconfiguration. */ 1777b8e80941Smrg if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) 1778b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_URB; 1779b8e80941Smrg 1780b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL); 1781b8e80941Smrg} 1782b8e80941Smrg 1783b8e80941Smrgstatic void 1784b8e80941Smrgiris_bind_gs_state(struct pipe_context *ctx, void *state) 1785b8e80941Smrg{ 1786b8e80941Smrg struct iris_context *ice = (struct iris_context *)ctx; 1787b8e80941Smrg 1788b8e80941Smrg /* Enabling/disabling optional stages requires a URB reconfiguration. */ 1789b8e80941Smrg if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 1790b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_URB; 1791b8e80941Smrg 1792b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY); 1793b8e80941Smrg} 1794b8e80941Smrg 1795b8e80941Smrgstatic void 1796b8e80941Smrgiris_bind_fs_state(struct pipe_context *ctx, void *state) 1797b8e80941Smrg{ 1798b8e80941Smrg struct iris_context *ice = (struct iris_context *) ctx; 1799b8e80941Smrg struct iris_uncompiled_shader *old_ish = 1800b8e80941Smrg ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 1801b8e80941Smrg struct iris_uncompiled_shader *new_ish = state; 1802b8e80941Smrg 1803b8e80941Smrg const unsigned color_bits = 1804b8e80941Smrg BITFIELD64_BIT(FRAG_RESULT_COLOR) | 1805b8e80941Smrg BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS); 1806b8e80941Smrg 1807b8e80941Smrg /* Fragment shader outputs influence HasWriteableRT */ 1808b8e80941Smrg if (!old_ish || !new_ish || 1809b8e80941Smrg (old_ish->nir->info.outputs_written & color_bits) != 1810b8e80941Smrg (new_ish->nir->info.outputs_written & color_bits)) 1811b8e80941Smrg ice->state.dirty |= IRIS_DIRTY_PS_BLEND; 1812b8e80941Smrg 1813b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT); 1814b8e80941Smrg} 1815b8e80941Smrg 1816b8e80941Smrgstatic void 1817b8e80941Smrgiris_bind_cs_state(struct pipe_context *ctx, void *state) 1818b8e80941Smrg{ 1819b8e80941Smrg bind_state((void *) ctx, state, MESA_SHADER_COMPUTE); 1820b8e80941Smrg} 1821b8e80941Smrg 1822b8e80941Smrgvoid 1823b8e80941Smrgiris_init_program_functions(struct pipe_context *ctx) 1824b8e80941Smrg{ 1825b8e80941Smrg ctx->create_vs_state = iris_create_vs_state; 1826b8e80941Smrg ctx->create_tcs_state = iris_create_tcs_state; 1827b8e80941Smrg ctx->create_tes_state = iris_create_tes_state; 1828b8e80941Smrg ctx->create_gs_state = iris_create_gs_state; 1829b8e80941Smrg ctx->create_fs_state = iris_create_fs_state; 1830b8e80941Smrg ctx->create_compute_state = iris_create_compute_state; 1831b8e80941Smrg 1832b8e80941Smrg ctx->delete_vs_state = iris_delete_vs_state; 1833b8e80941Smrg ctx->delete_tcs_state = iris_delete_tcs_state; 1834b8e80941Smrg ctx->delete_tes_state = iris_delete_tes_state; 1835b8e80941Smrg ctx->delete_gs_state = iris_delete_gs_state; 1836b8e80941Smrg ctx->delete_fs_state = iris_delete_fs_state; 1837b8e80941Smrg ctx->delete_compute_state = iris_delete_cs_state; 1838b8e80941Smrg 1839b8e80941Smrg ctx->bind_vs_state = iris_bind_vs_state; 1840b8e80941Smrg ctx->bind_tcs_state = iris_bind_tcs_state; 1841b8e80941Smrg ctx->bind_tes_state = iris_bind_tes_state; 1842b8e80941Smrg ctx->bind_gs_state = iris_bind_gs_state; 1843b8e80941Smrg ctx->bind_fs_state = iris_bind_fs_state; 1844b8e80941Smrg ctx->bind_compute_state = iris_bind_cs_state; 1845b8e80941Smrg} 1846