17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2017 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice shall be included 127ec681f3Smrg * in all copies or substantial portions of the Software. 137ec681f3Smrg * 147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 157ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 197ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 207ec681f3Smrg * DEALINGS IN THE SOFTWARE. 217ec681f3Smrg */ 227ec681f3Smrg 237ec681f3Smrg/** 247ec681f3Smrg * @file crocus_program.c 257ec681f3Smrg * 267ec681f3Smrg * This file contains the driver interface for compiling shaders. 277ec681f3Smrg * 287ec681f3Smrg * See crocus_program_cache.c for the in-memory program cache where the 297ec681f3Smrg * compiled shaders are stored. 307ec681f3Smrg */ 317ec681f3Smrg 327ec681f3Smrg#include <stdio.h> 337ec681f3Smrg#include <errno.h> 347ec681f3Smrg#include "pipe/p_defines.h" 357ec681f3Smrg#include "pipe/p_state.h" 367ec681f3Smrg#include "pipe/p_context.h" 377ec681f3Smrg#include "pipe/p_screen.h" 387ec681f3Smrg#include "util/u_atomic.h" 397ec681f3Smrg#include "util/u_upload_mgr.h" 407ec681f3Smrg#include "util/debug.h" 417ec681f3Smrg#include "util/u_prim.h" 427ec681f3Smrg#include "compiler/nir/nir.h" 437ec681f3Smrg#include "compiler/nir/nir_builder.h" 447ec681f3Smrg#include "compiler/nir/nir_serialize.h" 457ec681f3Smrg#include "intel/compiler/brw_compiler.h" 467ec681f3Smrg#include "intel/compiler/brw_nir.h" 477ec681f3Smrg#include "crocus_context.h" 487ec681f3Smrg#include "nir/tgsi_to_nir.h" 497ec681f3Smrg 507ec681f3Smrg#define KEY_INIT_NO_ID() \ 517ec681f3Smrg .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \ 527ec681f3Smrg .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \ 537ec681f3Smrg .base.tex.compressed_multisample_layout_mask = ~0 547ec681f3Smrg#define KEY_INIT() .base.program_string_id = ish->program_id, KEY_INIT_NO_ID() 557ec681f3Smrg 567ec681f3Smrgstatic void 577ec681f3Smrgcrocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key) 587ec681f3Smrg{ 597ec681f3Smrg key->gather_channel_quirk_mask = 0; 607ec681f3Smrg for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 617ec681f3Smrg key->swizzles[s] = SWIZZLE_NOOP; 627ec681f3Smrg key->gfx6_gather_wa[s] = 0; 637ec681f3Smrg } 647ec681f3Smrg} 657ec681f3Smrg 667ec681f3Smrgstatic uint32_t 677ec681f3Smrgcrocus_get_texture_swizzle(const struct crocus_context *ice, 687ec681f3Smrg const struct crocus_sampler_view *t) 697ec681f3Smrg{ 707ec681f3Smrg uint32_t swiz = 0; 717ec681f3Smrg 727ec681f3Smrg for (int i = 0; i < 4; i++) { 737ec681f3Smrg swiz |= t->swizzle[i] << (i * 3); 747ec681f3Smrg } 757ec681f3Smrg return swiz; 767ec681f3Smrg} 777ec681f3Smrg 787ec681f3Smrgstatic inline bool can_push_ubo(const struct intel_device_info *devinfo) 797ec681f3Smrg{ 807ec681f3Smrg /* push works for everyone except SNB at the moment */ 817ec681f3Smrg return devinfo->ver != 6; 827ec681f3Smrg} 837ec681f3Smrg 847ec681f3Smrgstatic uint8_t 857ec681f3Smrggfx6_gather_workaround(enum pipe_format pformat) 867ec681f3Smrg{ 877ec681f3Smrg switch (pformat) { 887ec681f3Smrg case PIPE_FORMAT_R8_SINT: return WA_SIGN | WA_8BIT; 897ec681f3Smrg case PIPE_FORMAT_R8_UINT: return WA_8BIT; 907ec681f3Smrg case PIPE_FORMAT_R16_SINT: return WA_SIGN | WA_16BIT; 917ec681f3Smrg case PIPE_FORMAT_R16_UINT: return WA_16BIT; 927ec681f3Smrg default: 937ec681f3Smrg /* Note that even though PIPE_FORMAT_R32_SINT and 947ec681f3Smrg * PIPE_FORMAT_R32_UINThave format overrides in 957ec681f3Smrg * the surface state, there is no shader w/a required. 967ec681f3Smrg */ 977ec681f3Smrg return 0; 987ec681f3Smrg } 997ec681f3Smrg} 1007ec681f3Smrg 1017ec681f3Smrgstatic const unsigned crocus_gfx6_swizzle_for_offset[4] = { 1027ec681f3Smrg BRW_SWIZZLE4(0, 1, 2, 3), 1037ec681f3Smrg BRW_SWIZZLE4(1, 2, 3, 3), 1047ec681f3Smrg BRW_SWIZZLE4(2, 3, 3, 3), 1057ec681f3Smrg BRW_SWIZZLE4(3, 3, 3, 3) 1067ec681f3Smrg}; 1077ec681f3Smrg 1087ec681f3Smrgstatic void 1097ec681f3Smrggfx6_gs_xfb_setup(const struct pipe_stream_output_info *so_info, 1107ec681f3Smrg struct brw_gs_prog_data *gs_prog_data) 1117ec681f3Smrg{ 1127ec681f3Smrg /* Make sure that the VUE slots won't overflow the unsigned chars in 1137ec681f3Smrg * prog_data->transform_feedback_bindings[]. 1147ec681f3Smrg */ 1157ec681f3Smrg STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); 1167ec681f3Smrg 1177ec681f3Smrg /* Make sure that we don't need more binding table entries than we've 1187ec681f3Smrg * set aside for use in transform feedback. (We shouldn't, since we 1197ec681f3Smrg * set aside enough binding table entries to have one per component). 1207ec681f3Smrg */ 1217ec681f3Smrg assert(so_info->num_outputs <= BRW_MAX_SOL_BINDINGS); 1227ec681f3Smrg 1237ec681f3Smrg gs_prog_data->num_transform_feedback_bindings = so_info->num_outputs; 1247ec681f3Smrg for (unsigned i = 0; i < so_info->num_outputs; i++) { 1257ec681f3Smrg gs_prog_data->transform_feedback_bindings[i] = 1267ec681f3Smrg so_info->output[i].register_index; 1277ec681f3Smrg gs_prog_data->transform_feedback_swizzles[i] = 1287ec681f3Smrg crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component]; 1297ec681f3Smrg } 1307ec681f3Smrg} 1317ec681f3Smrg 1327ec681f3Smrgstatic void 1337ec681f3Smrggfx6_ff_gs_xfb_setup(const struct pipe_stream_output_info *so_info, 1347ec681f3Smrg struct brw_ff_gs_prog_key *key) 1357ec681f3Smrg{ 1367ec681f3Smrg key->num_transform_feedback_bindings = so_info->num_outputs; 1377ec681f3Smrg for (unsigned i = 0; i < so_info->num_outputs; i++) { 1387ec681f3Smrg key->transform_feedback_bindings[i] = 1397ec681f3Smrg so_info->output[i].register_index; 1407ec681f3Smrg key->transform_feedback_swizzles[i] = 1417ec681f3Smrg crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component]; 1427ec681f3Smrg } 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrgstatic void 1467ec681f3Smrgcrocus_populate_sampler_prog_key_data(struct crocus_context *ice, 1477ec681f3Smrg const struct intel_device_info *devinfo, 1487ec681f3Smrg gl_shader_stage stage, 1497ec681f3Smrg struct crocus_uncompiled_shader *ish, 1507ec681f3Smrg bool uses_texture_gather, 1517ec681f3Smrg struct brw_sampler_prog_key_data *key) 1527ec681f3Smrg{ 1537ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 1547ec681f3Smrg uint32_t mask = ish->nir->info.textures_used[0]; 1557ec681f3Smrg 1567ec681f3Smrg while (mask) { 1577ec681f3Smrg const int s = u_bit_scan(&mask); 1587ec681f3Smrg 1597ec681f3Smrg struct crocus_sampler_view *texture = ice->state.shaders[stage].textures[s]; 1607ec681f3Smrg key->swizzles[s] = SWIZZLE_NOOP; 1617ec681f3Smrg key->scale_factors[s] = 0.0f; 1627ec681f3Smrg 1637ec681f3Smrg if (!texture) 1647ec681f3Smrg continue; 1657ec681f3Smrg if (texture->base.target == PIPE_BUFFER) 1667ec681f3Smrg continue; 1677ec681f3Smrg if (devinfo->verx10 < 75) { 1687ec681f3Smrg key->swizzles[s] = crocus_get_texture_swizzle(ice, texture); 1697ec681f3Smrg } 1707ec681f3Smrg 1717ec681f3Smrg screen->vtbl.fill_clamp_mask(ice->state.shaders[stage].samplers[s], s, key->gl_clamp_mask); 1727ec681f3Smrg 1737ec681f3Smrg /* gather4 for RG32* is broken in multiple ways on Gen7. */ 1747ec681f3Smrg if (devinfo->ver == 7 && uses_texture_gather) { 1757ec681f3Smrg switch (texture->base.format) { 1767ec681f3Smrg case PIPE_FORMAT_R32G32_UINT: 1777ec681f3Smrg case PIPE_FORMAT_R32G32_SINT: { 1787ec681f3Smrg /* We have to override the format to R32G32_FLOAT_LD. 1797ec681f3Smrg * This means that SCS_ALPHA and SCS_ONE will return 0x3f8 1807ec681f3Smrg * (1.0) rather than integer 1. This needs shader hacks. 1817ec681f3Smrg * 1827ec681f3Smrg * On Ivybridge, we whack W (alpha) to ONE in our key's 1837ec681f3Smrg * swizzle. On Haswell, we look at the original texture 1847ec681f3Smrg * swizzle, and use XYZW with channels overridden to ONE, 1857ec681f3Smrg * leaving normal texture swizzling to SCS. 1867ec681f3Smrg */ 1877ec681f3Smrg unsigned src_swizzle = key->swizzles[s]; 1887ec681f3Smrg for (int i = 0; i < 4; i++) { 1897ec681f3Smrg unsigned src_comp = GET_SWZ(src_swizzle, i); 1907ec681f3Smrg if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) { 1917ec681f3Smrg key->swizzles[i] &= ~(0x7 << (3 * i)); 1927ec681f3Smrg key->swizzles[i] |= SWIZZLE_ONE << (3 * i); 1937ec681f3Smrg } 1947ec681f3Smrg } 1957ec681f3Smrg } 1967ec681f3Smrg FALLTHROUGH; 1977ec681f3Smrg case PIPE_FORMAT_R32G32_FLOAT: 1987ec681f3Smrg /* The channel select for green doesn't work - we have to 1997ec681f3Smrg * request blue. Haswell can use SCS for this, but Ivybridge 2007ec681f3Smrg * needs a shader workaround. 2017ec681f3Smrg */ 2027ec681f3Smrg if (devinfo->verx10 < 75) 2037ec681f3Smrg key->gather_channel_quirk_mask |= 1 << s; 2047ec681f3Smrg break; 2057ec681f3Smrg default: 2067ec681f3Smrg break; 2077ec681f3Smrg } 2087ec681f3Smrg } 2097ec681f3Smrg if (devinfo->ver == 6 && uses_texture_gather) { 2107ec681f3Smrg key->gfx6_gather_wa[s] = gfx6_gather_workaround(texture->base.format); 2117ec681f3Smrg } 2127ec681f3Smrg } 2137ec681f3Smrg} 2147ec681f3Smrg 2157ec681f3Smrgstatic void 2167ec681f3Smrgcrocus_lower_swizzles(struct nir_shader *nir, 2177ec681f3Smrg const struct brw_sampler_prog_key_data *key_tex) 2187ec681f3Smrg{ 2197ec681f3Smrg struct nir_lower_tex_options tex_options = { 0 }; 2207ec681f3Smrg uint32_t mask = nir->info.textures_used[0]; 2217ec681f3Smrg 2227ec681f3Smrg while (mask) { 2237ec681f3Smrg const int s = u_bit_scan(&mask); 2247ec681f3Smrg 2257ec681f3Smrg if (key_tex->swizzles[s] == SWIZZLE_NOOP) 2267ec681f3Smrg continue; 2277ec681f3Smrg 2287ec681f3Smrg tex_options.swizzle_result |= (1 << s); 2297ec681f3Smrg for (unsigned c = 0; c < 4; c++) 2307ec681f3Smrg tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 2317ec681f3Smrg } 2327ec681f3Smrg if (tex_options.swizzle_result) 2337ec681f3Smrg nir_lower_tex(nir, &tex_options); 2347ec681f3Smrg} 2357ec681f3Smrg 2367ec681f3Smrgstatic unsigned 2377ec681f3Smrgget_new_program_id(struct crocus_screen *screen) 2387ec681f3Smrg{ 2397ec681f3Smrg return p_atomic_inc_return(&screen->program_id); 2407ec681f3Smrg} 2417ec681f3Smrg 2427ec681f3Smrgstatic nir_ssa_def * 2437ec681f3Smrgget_aoa_deref_offset(nir_builder *b, 2447ec681f3Smrg nir_deref_instr *deref, 2457ec681f3Smrg unsigned elem_size) 2467ec681f3Smrg{ 2477ec681f3Smrg unsigned array_size = elem_size; 2487ec681f3Smrg nir_ssa_def *offset = nir_imm_int(b, 0); 2497ec681f3Smrg 2507ec681f3Smrg while (deref->deref_type != nir_deref_type_var) { 2517ec681f3Smrg assert(deref->deref_type == nir_deref_type_array); 2527ec681f3Smrg 2537ec681f3Smrg /* This level's element size is the previous level's array size */ 2547ec681f3Smrg nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 2557ec681f3Smrg assert(deref->arr.index.ssa); 2567ec681f3Smrg offset = nir_iadd(b, offset, 2577ec681f3Smrg nir_imul(b, index, nir_imm_int(b, array_size))); 2587ec681f3Smrg 2597ec681f3Smrg deref = nir_deref_instr_parent(deref); 2607ec681f3Smrg assert(glsl_type_is_array(deref->type)); 2617ec681f3Smrg array_size *= glsl_get_length(deref->type); 2627ec681f3Smrg } 2637ec681f3Smrg 2647ec681f3Smrg /* Accessing an invalid surface index with the dataport can result in a 2657ec681f3Smrg * hang. According to the spec "if the index used to select an individual 2667ec681f3Smrg * element is negative or greater than or equal to the size of the array, 2677ec681f3Smrg * the results of the operation are undefined but may not lead to 2687ec681f3Smrg * termination" -- which is one of the possible outcomes of the hang. 2697ec681f3Smrg * Clamp the index to prevent access outside of the array bounds. 2707ec681f3Smrg */ 2717ec681f3Smrg return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); 2727ec681f3Smrg} 2737ec681f3Smrg 2747ec681f3Smrgstatic void 2757ec681f3Smrgcrocus_lower_storage_image_derefs(nir_shader *nir) 2767ec681f3Smrg{ 2777ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2787ec681f3Smrg 2797ec681f3Smrg nir_builder b; 2807ec681f3Smrg nir_builder_init(&b, impl); 2817ec681f3Smrg 2827ec681f3Smrg nir_foreach_block(block, impl) { 2837ec681f3Smrg nir_foreach_instr_safe(instr, block) { 2847ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 2857ec681f3Smrg continue; 2867ec681f3Smrg 2877ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2887ec681f3Smrg switch (intrin->intrinsic) { 2897ec681f3Smrg case nir_intrinsic_image_deref_load: 2907ec681f3Smrg case nir_intrinsic_image_deref_store: 2917ec681f3Smrg case nir_intrinsic_image_deref_atomic_add: 2927ec681f3Smrg case nir_intrinsic_image_deref_atomic_imin: 2937ec681f3Smrg case nir_intrinsic_image_deref_atomic_umin: 2947ec681f3Smrg case nir_intrinsic_image_deref_atomic_imax: 2957ec681f3Smrg case nir_intrinsic_image_deref_atomic_umax: 2967ec681f3Smrg case nir_intrinsic_image_deref_atomic_and: 2977ec681f3Smrg case nir_intrinsic_image_deref_atomic_or: 2987ec681f3Smrg case nir_intrinsic_image_deref_atomic_xor: 2997ec681f3Smrg case nir_intrinsic_image_deref_atomic_exchange: 3007ec681f3Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 3017ec681f3Smrg case nir_intrinsic_image_deref_size: 3027ec681f3Smrg case nir_intrinsic_image_deref_samples: 3037ec681f3Smrg case nir_intrinsic_image_deref_load_raw_intel: 3047ec681f3Smrg case nir_intrinsic_image_deref_store_raw_intel: { 3057ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 3067ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 3077ec681f3Smrg 3087ec681f3Smrg b.cursor = nir_before_instr(&intrin->instr); 3097ec681f3Smrg nir_ssa_def *index = 3107ec681f3Smrg nir_iadd(&b, nir_imm_int(&b, var->data.driver_location), 3117ec681f3Smrg get_aoa_deref_offset(&b, deref, 1)); 3127ec681f3Smrg nir_rewrite_image_intrinsic(intrin, index, false); 3137ec681f3Smrg break; 3147ec681f3Smrg } 3157ec681f3Smrg 3167ec681f3Smrg default: 3177ec681f3Smrg break; 3187ec681f3Smrg } 3197ec681f3Smrg } 3207ec681f3Smrg } 3217ec681f3Smrg} 3227ec681f3Smrg 3237ec681f3Smrg// XXX: need unify_interfaces() at link time... 3247ec681f3Smrg 3257ec681f3Smrg/** 3267ec681f3Smrg * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag. 3277ec681f3Smrg */ 3287ec681f3Smrgstatic bool 3297ec681f3Smrgcrocus_fix_edge_flags(nir_shader *nir) 3307ec681f3Smrg{ 3317ec681f3Smrg if (nir->info.stage != MESA_SHADER_VERTEX) { 3327ec681f3Smrg nir_shader_preserve_all_metadata(nir); 3337ec681f3Smrg return false; 3347ec681f3Smrg } 3357ec681f3Smrg 3367ec681f3Smrg nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out, 3377ec681f3Smrg VARYING_SLOT_EDGE); 3387ec681f3Smrg if (!var) { 3397ec681f3Smrg nir_shader_preserve_all_metadata(nir); 3407ec681f3Smrg return false; 3417ec681f3Smrg } 3427ec681f3Smrg 3437ec681f3Smrg var->data.mode = nir_var_shader_temp; 3447ec681f3Smrg nir->info.outputs_written &= ~VARYING_BIT_EDGE; 3457ec681f3Smrg nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG; 3467ec681f3Smrg nir_fixup_deref_modes(nir); 3477ec681f3Smrg 3487ec681f3Smrg nir_foreach_function(f, nir) { 3497ec681f3Smrg if (f->impl) { 3507ec681f3Smrg nir_metadata_preserve(f->impl, nir_metadata_block_index | 3517ec681f3Smrg nir_metadata_dominance | 3527ec681f3Smrg nir_metadata_live_ssa_defs | 3537ec681f3Smrg nir_metadata_loop_analysis); 3547ec681f3Smrg } else { 3557ec681f3Smrg nir_metadata_preserve(f->impl, nir_metadata_all); 3567ec681f3Smrg } 3577ec681f3Smrg } 3587ec681f3Smrg 3597ec681f3Smrg return true; 3607ec681f3Smrg} 3617ec681f3Smrg 3627ec681f3Smrg/** 3637ec681f3Smrg * Fix an uncompiled shader's stream output info. 3647ec681f3Smrg * 3657ec681f3Smrg * Core Gallium stores output->register_index as a "slot" number, where 3667ec681f3Smrg * slots are assigned consecutively to all outputs in info->outputs_written. 3677ec681f3Smrg * This naive packing of outputs doesn't work for us - we too have slots, 3687ec681f3Smrg * but the layout is defined by the VUE map, which we won't have until we 3697ec681f3Smrg * compile a specific shader variant. So, we remap these and simply store 3707ec681f3Smrg * VARYING_SLOT_* in our copy's output->register_index fields. 3717ec681f3Smrg * 3727ec681f3Smrg * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W 3737ec681f3Smrg * components of our VUE header. See brw_vue_map.c for the layout. 3747ec681f3Smrg */ 3757ec681f3Smrgstatic void 3767ec681f3Smrgupdate_so_info(struct pipe_stream_output_info *so_info, 3777ec681f3Smrg uint64_t outputs_written) 3787ec681f3Smrg{ 3797ec681f3Smrg uint8_t reverse_map[64] = {}; 3807ec681f3Smrg unsigned slot = 0; 3817ec681f3Smrg while (outputs_written) { 3827ec681f3Smrg reverse_map[slot++] = u_bit_scan64(&outputs_written); 3837ec681f3Smrg } 3847ec681f3Smrg 3857ec681f3Smrg for (unsigned i = 0; i < so_info->num_outputs; i++) { 3867ec681f3Smrg struct pipe_stream_output *output = &so_info->output[i]; 3877ec681f3Smrg 3887ec681f3Smrg /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 3897ec681f3Smrg output->register_index = reverse_map[output->register_index]; 3907ec681f3Smrg 3917ec681f3Smrg /* The VUE header contains three scalar fields packed together: 3927ec681f3Smrg * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w 3937ec681f3Smrg * - gl_Layer is stored in VARYING_SLOT_PSIZ.y 3947ec681f3Smrg * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z 3957ec681f3Smrg */ 3967ec681f3Smrg switch (output->register_index) { 3977ec681f3Smrg case VARYING_SLOT_LAYER: 3987ec681f3Smrg assert(output->num_components == 1); 3997ec681f3Smrg output->register_index = VARYING_SLOT_PSIZ; 4007ec681f3Smrg output->start_component = 1; 4017ec681f3Smrg break; 4027ec681f3Smrg case VARYING_SLOT_VIEWPORT: 4037ec681f3Smrg assert(output->num_components == 1); 4047ec681f3Smrg output->register_index = VARYING_SLOT_PSIZ; 4057ec681f3Smrg output->start_component = 2; 4067ec681f3Smrg break; 4077ec681f3Smrg case VARYING_SLOT_PSIZ: 4087ec681f3Smrg assert(output->num_components == 1); 4097ec681f3Smrg output->start_component = 3; 4107ec681f3Smrg break; 4117ec681f3Smrg } 4127ec681f3Smrg 4137ec681f3Smrg //info->outputs_written |= 1ull << output->register_index; 4147ec681f3Smrg } 4157ec681f3Smrg} 4167ec681f3Smrg 4177ec681f3Smrgstatic void 4187ec681f3Smrgsetup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, 4197ec681f3Smrg unsigned offset, unsigned n) 4207ec681f3Smrg{ 4217ec681f3Smrg assert(offset % sizeof(uint32_t) == 0); 4227ec681f3Smrg 4237ec681f3Smrg for (unsigned i = 0; i < n; ++i) 4247ec681f3Smrg sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); 4257ec681f3Smrg 4267ec681f3Smrg for (unsigned i = n; i < 4; ++i) 4277ec681f3Smrg sysvals[i] = BRW_PARAM_BUILTIN_ZERO; 4287ec681f3Smrg} 4297ec681f3Smrg 4307ec681f3Smrg/** 4317ec681f3Smrg * Associate NIR uniform variables with the prog_data->param[] mechanism 4327ec681f3Smrg * used by the backend. Also, decide which UBOs we'd like to push in an 4337ec681f3Smrg * ideal situation (though the backend can reduce this). 4347ec681f3Smrg */ 4357ec681f3Smrgstatic void 4367ec681f3Smrgcrocus_setup_uniforms(const struct brw_compiler *compiler, 4377ec681f3Smrg void *mem_ctx, 4387ec681f3Smrg nir_shader *nir, 4397ec681f3Smrg struct brw_stage_prog_data *prog_data, 4407ec681f3Smrg enum brw_param_builtin **out_system_values, 4417ec681f3Smrg unsigned *out_num_system_values, 4427ec681f3Smrg unsigned *out_num_cbufs) 4437ec681f3Smrg{ 4447ec681f3Smrg UNUSED const struct intel_device_info *devinfo = compiler->devinfo; 4457ec681f3Smrg 4467ec681f3Smrg const unsigned CROCUS_MAX_SYSTEM_VALUES = 4477ec681f3Smrg PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; 4487ec681f3Smrg enum brw_param_builtin *system_values = 4497ec681f3Smrg rzalloc_array(mem_ctx, enum brw_param_builtin, CROCUS_MAX_SYSTEM_VALUES); 4507ec681f3Smrg unsigned num_system_values = 0; 4517ec681f3Smrg 4527ec681f3Smrg unsigned patch_vert_idx = -1; 4537ec681f3Smrg unsigned ucp_idx[CROCUS_MAX_CLIP_PLANES]; 4547ec681f3Smrg unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; 4557ec681f3Smrg unsigned variable_group_size_idx = -1; 4567ec681f3Smrg memset(ucp_idx, -1, sizeof(ucp_idx)); 4577ec681f3Smrg memset(img_idx, -1, sizeof(img_idx)); 4587ec681f3Smrg 4597ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 4607ec681f3Smrg 4617ec681f3Smrg nir_builder b; 4627ec681f3Smrg nir_builder_init(&b, impl); 4637ec681f3Smrg 4647ec681f3Smrg b.cursor = nir_before_block(nir_start_block(impl)); 4657ec681f3Smrg nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); 4667ec681f3Smrg nir_ssa_def *temp_const_ubo_name = NULL; 4677ec681f3Smrg 4687ec681f3Smrg /* Turn system value intrinsics into uniforms */ 4697ec681f3Smrg nir_foreach_block(block, impl) { 4707ec681f3Smrg nir_foreach_instr_safe(instr, block) { 4717ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 4727ec681f3Smrg continue; 4737ec681f3Smrg 4747ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 4757ec681f3Smrg nir_ssa_def *offset; 4767ec681f3Smrg 4777ec681f3Smrg switch (intrin->intrinsic) { 4787ec681f3Smrg case nir_intrinsic_load_constant: { 4797ec681f3Smrg /* This one is special because it reads from the shader constant 4807ec681f3Smrg * data and not cbuf0 which gallium uploads for us. 4817ec681f3Smrg */ 4827ec681f3Smrg b.cursor = nir_before_instr(instr); 4837ec681f3Smrg nir_ssa_def *offset = 4847ec681f3Smrg nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1), 4857ec681f3Smrg nir_intrinsic_base(intrin)); 4867ec681f3Smrg 4877ec681f3Smrg if (temp_const_ubo_name == NULL) 4887ec681f3Smrg temp_const_ubo_name = nir_imm_int(&b, 0); 4897ec681f3Smrg 4907ec681f3Smrg nir_intrinsic_instr *load_ubo = 4917ec681f3Smrg nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); 4927ec681f3Smrg load_ubo->num_components = intrin->num_components; 4937ec681f3Smrg load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name); 4947ec681f3Smrg load_ubo->src[1] = nir_src_for_ssa(offset); 4957ec681f3Smrg nir_intrinsic_set_align(load_ubo, 4, 0); 4967ec681f3Smrg nir_intrinsic_set_range_base(load_ubo, 0); 4977ec681f3Smrg nir_intrinsic_set_range(load_ubo, ~0); 4987ec681f3Smrg nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, 4997ec681f3Smrg intrin->dest.ssa.num_components, 5007ec681f3Smrg intrin->dest.ssa.bit_size, 5017ec681f3Smrg NULL); 5027ec681f3Smrg nir_builder_instr_insert(&b, &load_ubo->instr); 5037ec681f3Smrg 5047ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 5057ec681f3Smrg &load_ubo->dest.ssa); 5067ec681f3Smrg nir_instr_remove(&intrin->instr); 5077ec681f3Smrg continue; 5087ec681f3Smrg } 5097ec681f3Smrg case nir_intrinsic_load_user_clip_plane: { 5107ec681f3Smrg unsigned ucp = nir_intrinsic_ucp_id(intrin); 5117ec681f3Smrg 5127ec681f3Smrg if (ucp_idx[ucp] == -1) { 5137ec681f3Smrg ucp_idx[ucp] = num_system_values; 5147ec681f3Smrg num_system_values += 4; 5157ec681f3Smrg } 5167ec681f3Smrg 5177ec681f3Smrg for (int i = 0; i < 4; i++) { 5187ec681f3Smrg system_values[ucp_idx[ucp] + i] = 5197ec681f3Smrg BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i); 5207ec681f3Smrg } 5217ec681f3Smrg 5227ec681f3Smrg b.cursor = nir_before_instr(instr); 5237ec681f3Smrg offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t)); 5247ec681f3Smrg break; 5257ec681f3Smrg } 5267ec681f3Smrg case nir_intrinsic_load_patch_vertices_in: 5277ec681f3Smrg if (patch_vert_idx == -1) 5287ec681f3Smrg patch_vert_idx = num_system_values++; 5297ec681f3Smrg 5307ec681f3Smrg system_values[patch_vert_idx] = 5317ec681f3Smrg BRW_PARAM_BUILTIN_PATCH_VERTICES_IN; 5327ec681f3Smrg 5337ec681f3Smrg b.cursor = nir_before_instr(instr); 5347ec681f3Smrg offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t)); 5357ec681f3Smrg break; 5367ec681f3Smrg case nir_intrinsic_image_deref_load_param_intel: { 5377ec681f3Smrg assert(devinfo->ver < 9); 5387ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 5397ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 5407ec681f3Smrg 5417ec681f3Smrg if (img_idx[var->data.binding] == -1) { 5427ec681f3Smrg /* GL only allows arrays of arrays of images. */ 5437ec681f3Smrg assert(glsl_type_is_image(glsl_without_array(var->type))); 5447ec681f3Smrg unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type)); 5457ec681f3Smrg 5467ec681f3Smrg for (int i = 0; i < num_images; i++) { 5477ec681f3Smrg const unsigned img = var->data.binding + i; 5487ec681f3Smrg 5497ec681f3Smrg img_idx[img] = num_system_values; 5507ec681f3Smrg num_system_values += BRW_IMAGE_PARAM_SIZE; 5517ec681f3Smrg 5527ec681f3Smrg uint32_t *img_sv = &system_values[img_idx[img]]; 5537ec681f3Smrg 5547ec681f3Smrg setup_vec4_image_sysval( 5557ec681f3Smrg img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img, 5567ec681f3Smrg offsetof(struct brw_image_param, offset), 2); 5577ec681f3Smrg setup_vec4_image_sysval( 5587ec681f3Smrg img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img, 5597ec681f3Smrg offsetof(struct brw_image_param, size), 3); 5607ec681f3Smrg setup_vec4_image_sysval( 5617ec681f3Smrg img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img, 5627ec681f3Smrg offsetof(struct brw_image_param, stride), 4); 5637ec681f3Smrg setup_vec4_image_sysval( 5647ec681f3Smrg img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img, 5657ec681f3Smrg offsetof(struct brw_image_param, tiling), 3); 5667ec681f3Smrg setup_vec4_image_sysval( 5677ec681f3Smrg img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img, 5687ec681f3Smrg offsetof(struct brw_image_param, swizzling), 2); 5697ec681f3Smrg } 5707ec681f3Smrg } 5717ec681f3Smrg 5727ec681f3Smrg b.cursor = nir_before_instr(instr); 5737ec681f3Smrg offset = nir_iadd(&b, 5747ec681f3Smrg get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4), 5757ec681f3Smrg nir_imm_int(&b, img_idx[var->data.binding] * 4 + 5767ec681f3Smrg nir_intrinsic_base(intrin) * 16)); 5777ec681f3Smrg break; 5787ec681f3Smrg } 5797ec681f3Smrg case nir_intrinsic_load_workgroup_size: { 5807ec681f3Smrg assert(nir->info.workgroup_size_variable); 5817ec681f3Smrg if (variable_group_size_idx == -1) { 5827ec681f3Smrg variable_group_size_idx = num_system_values; 5837ec681f3Smrg num_system_values += 3; 5847ec681f3Smrg for (int i = 0; i < 3; i++) { 5857ec681f3Smrg system_values[variable_group_size_idx + i] = 5867ec681f3Smrg BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i; 5877ec681f3Smrg } 5887ec681f3Smrg } 5897ec681f3Smrg 5907ec681f3Smrg b.cursor = nir_before_instr(instr); 5917ec681f3Smrg offset = nir_imm_int(&b, 5927ec681f3Smrg variable_group_size_idx * sizeof(uint32_t)); 5937ec681f3Smrg break; 5947ec681f3Smrg } 5957ec681f3Smrg default: 5967ec681f3Smrg continue; 5977ec681f3Smrg } 5987ec681f3Smrg 5997ec681f3Smrg unsigned comps = nir_intrinsic_dest_components(intrin); 6007ec681f3Smrg 6017ec681f3Smrg nir_intrinsic_instr *load = 6027ec681f3Smrg nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo); 6037ec681f3Smrg load->num_components = comps; 6047ec681f3Smrg load->src[0] = nir_src_for_ssa(temp_ubo_name); 6057ec681f3Smrg load->src[1] = nir_src_for_ssa(offset); 6067ec681f3Smrg nir_intrinsic_set_align(load, 4, 0); 6077ec681f3Smrg nir_intrinsic_set_range_base(load, 0); 6087ec681f3Smrg nir_intrinsic_set_range(load, ~0); 6097ec681f3Smrg nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL); 6107ec681f3Smrg nir_builder_instr_insert(&b, &load->instr); 6117ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 6127ec681f3Smrg &load->dest.ssa); 6137ec681f3Smrg nir_instr_remove(instr); 6147ec681f3Smrg } 6157ec681f3Smrg } 6167ec681f3Smrg 6177ec681f3Smrg nir_validate_shader(nir, "before remapping"); 6187ec681f3Smrg 6197ec681f3Smrg /* Uniforms are stored in constant buffer 0, the 6207ec681f3Smrg * user-facing UBOs are indexed by one. So if any constant buffer is 6217ec681f3Smrg * needed, the constant buffer 0 will be needed, so account for it. 6227ec681f3Smrg */ 6237ec681f3Smrg unsigned num_cbufs = nir->info.num_ubos; 6247ec681f3Smrg if (num_cbufs || nir->num_uniforms) 6257ec681f3Smrg num_cbufs++; 6267ec681f3Smrg 6277ec681f3Smrg /* Place the new params in a new cbuf. */ 6287ec681f3Smrg if (num_system_values > 0) { 6297ec681f3Smrg unsigned sysval_cbuf_index = num_cbufs; 6307ec681f3Smrg num_cbufs++; 6317ec681f3Smrg 6327ec681f3Smrg system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin, 6337ec681f3Smrg num_system_values); 6347ec681f3Smrg 6357ec681f3Smrg nir_foreach_block(block, impl) { 6367ec681f3Smrg nir_foreach_instr_safe(instr, block) { 6377ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 6387ec681f3Smrg continue; 6397ec681f3Smrg 6407ec681f3Smrg nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); 6417ec681f3Smrg 6427ec681f3Smrg if (load->intrinsic != nir_intrinsic_load_ubo) 6437ec681f3Smrg continue; 6447ec681f3Smrg 6457ec681f3Smrg b.cursor = nir_before_instr(instr); 6467ec681f3Smrg 6477ec681f3Smrg assert(load->src[0].is_ssa); 6487ec681f3Smrg 6497ec681f3Smrg if (load->src[0].ssa == temp_ubo_name) { 6507ec681f3Smrg nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index); 6517ec681f3Smrg nir_instr_rewrite_src(instr, &load->src[0], 6527ec681f3Smrg nir_src_for_ssa(imm)); 6537ec681f3Smrg } 6547ec681f3Smrg } 6557ec681f3Smrg } 6567ec681f3Smrg 6577ec681f3Smrg /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */ 6587ec681f3Smrg nir_opt_constant_folding(nir); 6597ec681f3Smrg } else { 6607ec681f3Smrg ralloc_free(system_values); 6617ec681f3Smrg system_values = NULL; 6627ec681f3Smrg } 6637ec681f3Smrg 6647ec681f3Smrg assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS); 6657ec681f3Smrg nir_validate_shader(nir, "after remap"); 6667ec681f3Smrg 6677ec681f3Smrg /* We don't use params[] but gallium leaves num_uniforms set. We use this 6687ec681f3Smrg * to detect when cbuf0 exists but we don't need it anymore when we get 6697ec681f3Smrg * here. Instead, zero it out so that the back-end doesn't get confused 6707ec681f3Smrg * when nr_params * 4 != num_uniforms != nr_params * 4. 6717ec681f3Smrg */ 6727ec681f3Smrg nir->num_uniforms = 0; 6737ec681f3Smrg 6747ec681f3Smrg /* Constant loads (if any) need to go at the end of the constant buffers so 6757ec681f3Smrg * we need to know num_cbufs before we can lower to them. 6767ec681f3Smrg */ 6777ec681f3Smrg if (temp_const_ubo_name != NULL) { 6787ec681f3Smrg nir_load_const_instr *const_ubo_index = 6797ec681f3Smrg nir_instr_as_load_const(temp_const_ubo_name->parent_instr); 6807ec681f3Smrg assert(const_ubo_index->def.bit_size == 32); 6817ec681f3Smrg const_ubo_index->value[0].u32 = num_cbufs; 6827ec681f3Smrg } 6837ec681f3Smrg 6847ec681f3Smrg *out_system_values = system_values; 6857ec681f3Smrg *out_num_system_values = num_system_values; 6867ec681f3Smrg *out_num_cbufs = num_cbufs; 6877ec681f3Smrg} 6887ec681f3Smrg 6897ec681f3Smrgstatic const char *surface_group_names[] = { 6907ec681f3Smrg [CROCUS_SURFACE_GROUP_RENDER_TARGET] = "render target", 6917ec681f3Smrg [CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read", 6927ec681f3Smrg [CROCUS_SURFACE_GROUP_SOL] = "streamout", 6937ec681f3Smrg [CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups", 6947ec681f3Smrg [CROCUS_SURFACE_GROUP_TEXTURE] = "texture", 6957ec681f3Smrg [CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = "texture gather", 6967ec681f3Smrg [CROCUS_SURFACE_GROUP_UBO] = "ubo", 6977ec681f3Smrg [CROCUS_SURFACE_GROUP_SSBO] = "ssbo", 6987ec681f3Smrg [CROCUS_SURFACE_GROUP_IMAGE] = "image", 6997ec681f3Smrg}; 7007ec681f3Smrg 7017ec681f3Smrgstatic void 7027ec681f3Smrgcrocus_print_binding_table(FILE *fp, const char *name, 7037ec681f3Smrg const struct crocus_binding_table *bt) 7047ec681f3Smrg{ 7057ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == CROCUS_SURFACE_GROUP_COUNT); 7067ec681f3Smrg 7077ec681f3Smrg uint32_t total = 0; 7087ec681f3Smrg uint32_t compacted = 0; 7097ec681f3Smrg 7107ec681f3Smrg for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) { 7117ec681f3Smrg uint32_t size = bt->sizes[i]; 7127ec681f3Smrg total += size; 7137ec681f3Smrg if (size) 7147ec681f3Smrg compacted += util_bitcount64(bt->used_mask[i]); 7157ec681f3Smrg } 7167ec681f3Smrg 7177ec681f3Smrg if (total == 0) { 7187ec681f3Smrg fprintf(fp, "Binding table for %s is empty\n\n", name); 7197ec681f3Smrg return; 7207ec681f3Smrg } 7217ec681f3Smrg 7227ec681f3Smrg if (total != compacted) { 7237ec681f3Smrg fprintf(fp, "Binding table for %s " 7247ec681f3Smrg "(compacted to %u entries from %u entries)\n", 7257ec681f3Smrg name, compacted, total); 7267ec681f3Smrg } else { 7277ec681f3Smrg fprintf(fp, "Binding table for %s (%u entries)\n", name, total); 7287ec681f3Smrg } 7297ec681f3Smrg 7307ec681f3Smrg uint32_t entry = 0; 7317ec681f3Smrg for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) { 7327ec681f3Smrg uint64_t mask = bt->used_mask[i]; 7337ec681f3Smrg while (mask) { 7347ec681f3Smrg int index = u_bit_scan64(&mask); 7357ec681f3Smrg fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index); 7367ec681f3Smrg } 7377ec681f3Smrg } 7387ec681f3Smrg fprintf(fp, "\n"); 7397ec681f3Smrg} 7407ec681f3Smrg 7417ec681f3Smrgenum { 7427ec681f3Smrg /* Max elements in a surface group. */ 7437ec681f3Smrg SURFACE_GROUP_MAX_ELEMENTS = 64, 7447ec681f3Smrg}; 7457ec681f3Smrg 7467ec681f3Smrgstatic void 7477ec681f3Smrgrewrite_src_with_bti(nir_builder *b, struct crocus_binding_table *bt, 7487ec681f3Smrg nir_instr *instr, nir_src *src, 7497ec681f3Smrg enum crocus_surface_group group) 7507ec681f3Smrg{ 7517ec681f3Smrg assert(bt->sizes[group] > 0); 7527ec681f3Smrg 7537ec681f3Smrg b->cursor = nir_before_instr(instr); 7547ec681f3Smrg nir_ssa_def *bti; 7557ec681f3Smrg if (nir_src_is_const(*src)) { 7567ec681f3Smrg uint32_t index = nir_src_as_uint(*src); 7577ec681f3Smrg bti = nir_imm_intN_t(b, crocus_group_index_to_bti(bt, group, index), 7587ec681f3Smrg src->ssa->bit_size); 7597ec681f3Smrg } else { 7607ec681f3Smrg /* Indirect usage makes all the surfaces of the group to be available, 7617ec681f3Smrg * so we can just add the base. 7627ec681f3Smrg */ 7637ec681f3Smrg assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group])); 7647ec681f3Smrg bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]); 7657ec681f3Smrg } 7667ec681f3Smrg nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti)); 7677ec681f3Smrg} 7687ec681f3Smrg 7697ec681f3Smrgstatic void 7707ec681f3Smrgmark_used_with_src(struct crocus_binding_table *bt, nir_src *src, 7717ec681f3Smrg enum crocus_surface_group group) 7727ec681f3Smrg{ 7737ec681f3Smrg assert(bt->sizes[group] > 0); 7747ec681f3Smrg 7757ec681f3Smrg if (nir_src_is_const(*src)) { 7767ec681f3Smrg uint64_t index = nir_src_as_uint(*src); 7777ec681f3Smrg assert(index < bt->sizes[group]); 7787ec681f3Smrg bt->used_mask[group] |= 1ull << index; 7797ec681f3Smrg } else { 7807ec681f3Smrg /* There's an indirect usage, we need all the surfaces. */ 7817ec681f3Smrg bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]); 7827ec681f3Smrg } 7837ec681f3Smrg} 7847ec681f3Smrg 7857ec681f3Smrgstatic bool 7867ec681f3Smrgskip_compacting_binding_tables(void) 7877ec681f3Smrg{ 7887ec681f3Smrg static int skip = -1; 7897ec681f3Smrg if (skip < 0) 7907ec681f3Smrg skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false); 7917ec681f3Smrg return skip; 7927ec681f3Smrg} 7937ec681f3Smrg 7947ec681f3Smrg/** 7957ec681f3Smrg * Set up the binding table indices and apply to the shader. 7967ec681f3Smrg */ 7977ec681f3Smrgstatic void 7987ec681f3Smrgcrocus_setup_binding_table(const struct intel_device_info *devinfo, 7997ec681f3Smrg struct nir_shader *nir, 8007ec681f3Smrg struct crocus_binding_table *bt, 8017ec681f3Smrg unsigned num_render_targets, 8027ec681f3Smrg unsigned num_system_values, 8037ec681f3Smrg unsigned num_cbufs, 8047ec681f3Smrg const struct brw_sampler_prog_key_data *key) 8057ec681f3Smrg{ 8067ec681f3Smrg const struct shader_info *info = &nir->info; 8077ec681f3Smrg 8087ec681f3Smrg memset(bt, 0, sizeof(*bt)); 8097ec681f3Smrg 8107ec681f3Smrg /* Set the sizes for each surface group. For some groups, we already know 8117ec681f3Smrg * upfront how many will be used, so mark them. 8127ec681f3Smrg */ 8137ec681f3Smrg if (info->stage == MESA_SHADER_FRAGMENT) { 8147ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets; 8157ec681f3Smrg /* All render targets used. */ 8167ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET] = 8177ec681f3Smrg BITFIELD64_MASK(num_render_targets); 8187ec681f3Smrg 8197ec681f3Smrg /* Setup render target read surface group in order to support non-coherent 8207ec681f3Smrg * framebuffer fetch on Gfx7 8217ec681f3Smrg */ 8227ec681f3Smrg if (devinfo->ver >= 6 && info->outputs_read) { 8237ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets; 8247ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = 8257ec681f3Smrg BITFIELD64_MASK(num_render_targets); 8267ec681f3Smrg } 8277ec681f3Smrg } else if (info->stage == MESA_SHADER_COMPUTE) { 8287ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; 8297ec681f3Smrg } else if (info->stage == MESA_SHADER_GEOMETRY) { 8307ec681f3Smrg /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform 8317ec681f3Smrg * feedback surfaces. 8327ec681f3Smrg */ 8337ec681f3Smrg if (devinfo->ver == 6) { 8347ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS; 8357ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1; 8367ec681f3Smrg } 8377ec681f3Smrg } 8387ec681f3Smrg 8397ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used); 8407ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE] = info->textures_used[0]; 8417ec681f3Smrg 8427ec681f3Smrg if (info->uses_texture_gather && devinfo->ver < 8) { 8437ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = BITSET_LAST_BIT(info->textures_used); 8447ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = info->textures_used[0]; 8457ec681f3Smrg } 8467ec681f3Smrg 8477ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_IMAGE] = info->num_images; 8487ec681f3Smrg 8497ec681f3Smrg /* Allocate an extra slot in the UBO section for NIR constants. 8507ec681f3Smrg * Binding table compaction will remove it if unnecessary. 8517ec681f3Smrg * 8527ec681f3Smrg * We don't include them in crocus_compiled_shader::num_cbufs because 8537ec681f3Smrg * they are uploaded separately from shs->constbufs[], but from a shader 8547ec681f3Smrg * point of view, they're another UBO (at the end of the section). 8557ec681f3Smrg */ 8567ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_UBO] = num_cbufs + 1; 8577ec681f3Smrg 8587ec681f3Smrg bt->sizes[CROCUS_SURFACE_GROUP_SSBO] = info->num_ssbos; 8597ec681f3Smrg 8607ec681f3Smrg for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) 8617ec681f3Smrg assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS); 8627ec681f3Smrg 8637ec681f3Smrg /* Mark surfaces used for the cases we don't have the information available 8647ec681f3Smrg * upfront. 8657ec681f3Smrg */ 8667ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 8677ec681f3Smrg nir_foreach_block (block, impl) { 8687ec681f3Smrg nir_foreach_instr (instr, block) { 8697ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 8707ec681f3Smrg continue; 8717ec681f3Smrg 8727ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 8737ec681f3Smrg switch (intrin->intrinsic) { 8747ec681f3Smrg case nir_intrinsic_load_num_workgroups: 8757ec681f3Smrg bt->used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; 8767ec681f3Smrg break; 8777ec681f3Smrg 8787ec681f3Smrg case nir_intrinsic_load_output: 8797ec681f3Smrg if (devinfo->ver >= 6) { 8807ec681f3Smrg mark_used_with_src(bt, &intrin->src[0], 8817ec681f3Smrg CROCUS_SURFACE_GROUP_RENDER_TARGET_READ); 8827ec681f3Smrg } 8837ec681f3Smrg break; 8847ec681f3Smrg 8857ec681f3Smrg case nir_intrinsic_image_size: 8867ec681f3Smrg case nir_intrinsic_image_load: 8877ec681f3Smrg case nir_intrinsic_image_store: 8887ec681f3Smrg case nir_intrinsic_image_atomic_add: 8897ec681f3Smrg case nir_intrinsic_image_atomic_imin: 8907ec681f3Smrg case nir_intrinsic_image_atomic_umin: 8917ec681f3Smrg case nir_intrinsic_image_atomic_imax: 8927ec681f3Smrg case nir_intrinsic_image_atomic_umax: 8937ec681f3Smrg case nir_intrinsic_image_atomic_and: 8947ec681f3Smrg case nir_intrinsic_image_atomic_or: 8957ec681f3Smrg case nir_intrinsic_image_atomic_xor: 8967ec681f3Smrg case nir_intrinsic_image_atomic_exchange: 8977ec681f3Smrg case nir_intrinsic_image_atomic_comp_swap: 8987ec681f3Smrg case nir_intrinsic_image_load_raw_intel: 8997ec681f3Smrg case nir_intrinsic_image_store_raw_intel: 9007ec681f3Smrg mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_IMAGE); 9017ec681f3Smrg break; 9027ec681f3Smrg 9037ec681f3Smrg case nir_intrinsic_load_ubo: 9047ec681f3Smrg mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_UBO); 9057ec681f3Smrg break; 9067ec681f3Smrg 9077ec681f3Smrg case nir_intrinsic_store_ssbo: 9087ec681f3Smrg mark_used_with_src(bt, &intrin->src[1], CROCUS_SURFACE_GROUP_SSBO); 9097ec681f3Smrg break; 9107ec681f3Smrg 9117ec681f3Smrg case nir_intrinsic_get_ssbo_size: 9127ec681f3Smrg case nir_intrinsic_ssbo_atomic_add: 9137ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin: 9147ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin: 9157ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax: 9167ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax: 9177ec681f3Smrg case nir_intrinsic_ssbo_atomic_and: 9187ec681f3Smrg case nir_intrinsic_ssbo_atomic_or: 9197ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor: 9207ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange: 9217ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 9227ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmin: 9237ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmax: 9247ec681f3Smrg case nir_intrinsic_ssbo_atomic_fcomp_swap: 9257ec681f3Smrg case nir_intrinsic_load_ssbo: 9267ec681f3Smrg mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_SSBO); 9277ec681f3Smrg break; 9287ec681f3Smrg 9297ec681f3Smrg default: 9307ec681f3Smrg break; 9317ec681f3Smrg } 9327ec681f3Smrg } 9337ec681f3Smrg } 9347ec681f3Smrg 9357ec681f3Smrg /* When disable we just mark everything as used. */ 9367ec681f3Smrg if (unlikely(skip_compacting_binding_tables())) { 9377ec681f3Smrg for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) 9387ec681f3Smrg bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]); 9397ec681f3Smrg } 9407ec681f3Smrg 9417ec681f3Smrg /* Calculate the offsets and the binding table size based on the used 9427ec681f3Smrg * surfaces. After this point, the functions to go between "group indices" 9437ec681f3Smrg * and binding table indices can be used. 9447ec681f3Smrg */ 9457ec681f3Smrg uint32_t next = 0; 9467ec681f3Smrg for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) { 9477ec681f3Smrg if (bt->used_mask[i] != 0) { 9487ec681f3Smrg bt->offsets[i] = next; 9497ec681f3Smrg next += util_bitcount64(bt->used_mask[i]); 9507ec681f3Smrg } 9517ec681f3Smrg } 9527ec681f3Smrg bt->size_bytes = next * 4; 9537ec681f3Smrg 9547ec681f3Smrg if (INTEL_DEBUG(DEBUG_BT)) { 9557ec681f3Smrg crocus_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt); 9567ec681f3Smrg } 9577ec681f3Smrg 9587ec681f3Smrg /* Apply the binding table indices. The backend compiler is not expected 9597ec681f3Smrg * to change those, as we haven't set any of the *_start entries in brw 9607ec681f3Smrg * binding_table. 9617ec681f3Smrg */ 9627ec681f3Smrg nir_builder b; 9637ec681f3Smrg nir_builder_init(&b, impl); 9647ec681f3Smrg 9657ec681f3Smrg nir_foreach_block (block, impl) { 9667ec681f3Smrg nir_foreach_instr (instr, block) { 9677ec681f3Smrg if (instr->type == nir_instr_type_tex) { 9687ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 9697ec681f3Smrg bool is_gather = devinfo->ver < 8 && tex->op == nir_texop_tg4; 9707ec681f3Smrg 9717ec681f3Smrg /* rewrite the tg4 component from green to blue before replacing the 9727ec681f3Smrg texture index */ 9737ec681f3Smrg if (devinfo->verx10 == 70) { 9747ec681f3Smrg if (tex->component == 1) 9757ec681f3Smrg if (key->gather_channel_quirk_mask & (1 << tex->texture_index)) 9767ec681f3Smrg tex->component = 2; 9777ec681f3Smrg } 9787ec681f3Smrg 9797ec681f3Smrg if (is_gather && devinfo->ver == 6 && key->gfx6_gather_wa[tex->texture_index]) { 9807ec681f3Smrg b.cursor = nir_after_instr(instr); 9817ec681f3Smrg enum gfx6_gather_sampler_wa wa = key->gfx6_gather_wa[tex->texture_index]; 9827ec681f3Smrg int width = (wa & WA_8BIT) ? 8 : 16; 9837ec681f3Smrg 9847ec681f3Smrg nir_ssa_def *val = nir_fmul_imm(&b, &tex->dest.ssa, (1 << width) - 1); 9857ec681f3Smrg val = nir_f2u32(&b, val); 9867ec681f3Smrg if (wa & WA_SIGN) { 9877ec681f3Smrg val = nir_ishl(&b, val, nir_imm_int(&b, 32 - width)); 9887ec681f3Smrg val = nir_ishr(&b, val, nir_imm_int(&b, 32 - width)); 9897ec681f3Smrg } 9907ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, val, val->parent_instr); 9917ec681f3Smrg } 9927ec681f3Smrg 9937ec681f3Smrg tex->texture_index = 9947ec681f3Smrg crocus_group_index_to_bti(bt, is_gather ? CROCUS_SURFACE_GROUP_TEXTURE_GATHER : CROCUS_SURFACE_GROUP_TEXTURE, 9957ec681f3Smrg tex->texture_index); 9967ec681f3Smrg continue; 9977ec681f3Smrg } 9987ec681f3Smrg 9997ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 10007ec681f3Smrg continue; 10017ec681f3Smrg 10027ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 10037ec681f3Smrg switch (intrin->intrinsic) { 10047ec681f3Smrg case nir_intrinsic_image_size: 10057ec681f3Smrg case nir_intrinsic_image_load: 10067ec681f3Smrg case nir_intrinsic_image_store: 10077ec681f3Smrg case nir_intrinsic_image_atomic_add: 10087ec681f3Smrg case nir_intrinsic_image_atomic_imin: 10097ec681f3Smrg case nir_intrinsic_image_atomic_umin: 10107ec681f3Smrg case nir_intrinsic_image_atomic_imax: 10117ec681f3Smrg case nir_intrinsic_image_atomic_umax: 10127ec681f3Smrg case nir_intrinsic_image_atomic_and: 10137ec681f3Smrg case nir_intrinsic_image_atomic_or: 10147ec681f3Smrg case nir_intrinsic_image_atomic_xor: 10157ec681f3Smrg case nir_intrinsic_image_atomic_exchange: 10167ec681f3Smrg case nir_intrinsic_image_atomic_comp_swap: 10177ec681f3Smrg case nir_intrinsic_image_load_raw_intel: 10187ec681f3Smrg case nir_intrinsic_image_store_raw_intel: 10197ec681f3Smrg rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 10207ec681f3Smrg CROCUS_SURFACE_GROUP_IMAGE); 10217ec681f3Smrg break; 10227ec681f3Smrg 10237ec681f3Smrg case nir_intrinsic_load_ubo: 10247ec681f3Smrg rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 10257ec681f3Smrg CROCUS_SURFACE_GROUP_UBO); 10267ec681f3Smrg break; 10277ec681f3Smrg 10287ec681f3Smrg case nir_intrinsic_store_ssbo: 10297ec681f3Smrg rewrite_src_with_bti(&b, bt, instr, &intrin->src[1], 10307ec681f3Smrg CROCUS_SURFACE_GROUP_SSBO); 10317ec681f3Smrg break; 10327ec681f3Smrg 10337ec681f3Smrg case nir_intrinsic_load_output: 10347ec681f3Smrg if (devinfo->ver >= 6) { 10357ec681f3Smrg rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 10367ec681f3Smrg CROCUS_SURFACE_GROUP_RENDER_TARGET_READ); 10377ec681f3Smrg } 10387ec681f3Smrg break; 10397ec681f3Smrg 10407ec681f3Smrg case nir_intrinsic_get_ssbo_size: 10417ec681f3Smrg case nir_intrinsic_ssbo_atomic_add: 10427ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin: 10437ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin: 10447ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax: 10457ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax: 10467ec681f3Smrg case nir_intrinsic_ssbo_atomic_and: 10477ec681f3Smrg case nir_intrinsic_ssbo_atomic_or: 10487ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor: 10497ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange: 10507ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 10517ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmin: 10527ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmax: 10537ec681f3Smrg case nir_intrinsic_ssbo_atomic_fcomp_swap: 10547ec681f3Smrg case nir_intrinsic_load_ssbo: 10557ec681f3Smrg rewrite_src_with_bti(&b, bt, instr, &intrin->src[0], 10567ec681f3Smrg CROCUS_SURFACE_GROUP_SSBO); 10577ec681f3Smrg break; 10587ec681f3Smrg 10597ec681f3Smrg default: 10607ec681f3Smrg break; 10617ec681f3Smrg } 10627ec681f3Smrg } 10637ec681f3Smrg } 10647ec681f3Smrg} 10657ec681f3Smrg 10667ec681f3Smrgstatic void 10677ec681f3Smrgcrocus_debug_recompile(struct crocus_context *ice, 10687ec681f3Smrg struct shader_info *info, 10697ec681f3Smrg const struct brw_base_prog_key *key) 10707ec681f3Smrg{ 10717ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen; 10727ec681f3Smrg const struct brw_compiler *c = screen->compiler; 10737ec681f3Smrg 10747ec681f3Smrg if (!info) 10757ec681f3Smrg return; 10767ec681f3Smrg 10777ec681f3Smrg brw_shader_perf_log(c, &ice->dbg, "Recompiling %s shader for program %s: %s\n", 10787ec681f3Smrg _mesa_shader_stage_to_string(info->stage), 10797ec681f3Smrg info->name ? info->name : "(no identifier)", 10807ec681f3Smrg info->label ? info->label : ""); 10817ec681f3Smrg 10827ec681f3Smrg const void *old_key = 10837ec681f3Smrg crocus_find_previous_compile(ice, info->stage, key->program_string_id); 10847ec681f3Smrg 10857ec681f3Smrg brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key); 10867ec681f3Smrg} 10877ec681f3Smrg 10887ec681f3Smrg/** 10897ec681f3Smrg * Get the shader for the last enabled geometry stage. 10907ec681f3Smrg * 10917ec681f3Smrg * This stage is the one which will feed stream output and the rasterizer. 10927ec681f3Smrg */ 10937ec681f3Smrgstatic gl_shader_stage 10947ec681f3Smrglast_vue_stage(struct crocus_context *ice) 10957ec681f3Smrg{ 10967ec681f3Smrg if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 10977ec681f3Smrg return MESA_SHADER_GEOMETRY; 10987ec681f3Smrg 10997ec681f3Smrg if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) 11007ec681f3Smrg return MESA_SHADER_TESS_EVAL; 11017ec681f3Smrg 11027ec681f3Smrg return MESA_SHADER_VERTEX; 11037ec681f3Smrg} 11047ec681f3Smrg 11057ec681f3Smrgstatic GLbitfield64 11067ec681f3Smrgcrocus_vs_outputs_written(struct crocus_context *ice, 11077ec681f3Smrg const struct brw_vs_prog_key *key, 11087ec681f3Smrg GLbitfield64 user_varyings) 11097ec681f3Smrg{ 11107ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 11117ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 11127ec681f3Smrg GLbitfield64 outputs_written = user_varyings; 11137ec681f3Smrg 11147ec681f3Smrg if (devinfo->ver < 6) { 11157ec681f3Smrg 11167ec681f3Smrg if (key->copy_edgeflag) 11177ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); 11187ec681f3Smrg 11197ec681f3Smrg /* Put dummy slots into the VUE for the SF to put the replaced 11207ec681f3Smrg * point sprite coords in. We shouldn't need these dummy slots, 11217ec681f3Smrg * which take up precious URB space, but it would mean that the SF 11227ec681f3Smrg * doesn't get nice aligned pairs of input coords into output 11237ec681f3Smrg * coords, which would be a pain to handle. 11247ec681f3Smrg */ 11257ec681f3Smrg for (unsigned i = 0; i < 8; i++) { 11267ec681f3Smrg if (key->point_coord_replace & (1 << i)) 11277ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); 11287ec681f3Smrg } 11297ec681f3Smrg 11307ec681f3Smrg /* if back colors are written, allocate slots for front colors too */ 11317ec681f3Smrg if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) 11327ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); 11337ec681f3Smrg if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) 11347ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); 11357ec681f3Smrg } 11367ec681f3Smrg 11377ec681f3Smrg /* In order for legacy clipping to work, we need to populate the clip 11387ec681f3Smrg * distance varying slots whenever clipping is enabled, even if the vertex 11397ec681f3Smrg * shader doesn't write to gl_ClipDistance. 11407ec681f3Smrg */ 11417ec681f3Smrg if (key->nr_userclip_plane_consts > 0) { 11427ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); 11437ec681f3Smrg outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); 11447ec681f3Smrg } 11457ec681f3Smrg 11467ec681f3Smrg return outputs_written; 11477ec681f3Smrg} 11487ec681f3Smrg 11497ec681f3Smrg/* 11507ec681f3Smrg * If no edgeflags come from the user, gen4/5 11517ec681f3Smrg * require giving the clip shader a default edgeflag. 11527ec681f3Smrg * 11537ec681f3Smrg * This will always be 1.0. 11547ec681f3Smrg */ 11557ec681f3Smrgstatic void 11567ec681f3Smrgcrocus_lower_default_edgeflags(struct nir_shader *nir) 11577ec681f3Smrg{ 11587ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 11597ec681f3Smrg 11607ec681f3Smrg nir_builder b; 11617ec681f3Smrg nir_builder_init(&b, impl); 11627ec681f3Smrg 11637ec681f3Smrg b.cursor = nir_after_cf_list(&b.impl->body); 11647ec681f3Smrg nir_variable *var = nir_variable_create(nir, nir_var_shader_out, 11657ec681f3Smrg glsl_float_type(), 11667ec681f3Smrg "edgeflag"); 11677ec681f3Smrg var->data.location = VARYING_SLOT_EDGE; 11687ec681f3Smrg nir_store_var(&b, var, nir_imm_float(&b, 1.0), 0x1); 11697ec681f3Smrg} 11707ec681f3Smrg 11717ec681f3Smrg/** 11727ec681f3Smrg * Compile a vertex shader, and upload the assembly. 11737ec681f3Smrg */ 11747ec681f3Smrgstatic struct crocus_compiled_shader * 11757ec681f3Smrgcrocus_compile_vs(struct crocus_context *ice, 11767ec681f3Smrg struct crocus_uncompiled_shader *ish, 11777ec681f3Smrg const struct brw_vs_prog_key *key) 11787ec681f3Smrg{ 11797ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 11807ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 11817ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 11827ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 11837ec681f3Smrg struct brw_vs_prog_data *vs_prog_data = 11847ec681f3Smrg rzalloc(mem_ctx, struct brw_vs_prog_data); 11857ec681f3Smrg struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base; 11867ec681f3Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 11877ec681f3Smrg enum brw_param_builtin *system_values; 11887ec681f3Smrg unsigned num_system_values; 11897ec681f3Smrg unsigned num_cbufs; 11907ec681f3Smrg 11917ec681f3Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 11927ec681f3Smrg 11937ec681f3Smrg if (key->nr_userclip_plane_consts) { 11947ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 11957ec681f3Smrg nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true, 11967ec681f3Smrg false, NULL); 11977ec681f3Smrg nir_lower_io_to_temporaries(nir, impl, true, false); 11987ec681f3Smrg nir_lower_global_vars_to_local(nir); 11997ec681f3Smrg nir_lower_vars_to_ssa(nir); 12007ec681f3Smrg nir_shader_gather_info(nir, impl); 12017ec681f3Smrg } 12027ec681f3Smrg 12037ec681f3Smrg prog_data->use_alt_mode = nir->info.is_arb_asm; 12047ec681f3Smrg 12057ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 12067ec681f3Smrg &num_system_values, &num_cbufs); 12077ec681f3Smrg 12087ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 12097ec681f3Smrg 12107ec681f3Smrg if (devinfo->ver <= 5 && 12117ec681f3Smrg !(nir->info.inputs_read & BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG))) 12127ec681f3Smrg crocus_lower_default_edgeflags(nir); 12137ec681f3Smrg 12147ec681f3Smrg struct crocus_binding_table bt; 12157ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 12167ec681f3Smrg num_system_values, num_cbufs, &key->base.tex); 12177ec681f3Smrg 12187ec681f3Smrg if (can_push_ubo(devinfo)) 12197ec681f3Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 12207ec681f3Smrg 12217ec681f3Smrg uint64_t outputs_written = 12227ec681f3Smrg crocus_vs_outputs_written(ice, key, nir->info.outputs_written); 12237ec681f3Smrg brw_compute_vue_map(devinfo, 12247ec681f3Smrg &vue_prog_data->vue_map, outputs_written, 12257ec681f3Smrg nir->info.separate_shader, /* pos slots */ 1); 12267ec681f3Smrg 12277ec681f3Smrg /* Don't tell the backend about our clip plane constants, we've already 12287ec681f3Smrg * lowered them in NIR and we don't want it doing it again. 12297ec681f3Smrg */ 12307ec681f3Smrg struct brw_vs_prog_key key_no_ucp = *key; 12317ec681f3Smrg key_no_ucp.nr_userclip_plane_consts = 0; 12327ec681f3Smrg key_no_ucp.copy_edgeflag = false; 12337ec681f3Smrg crocus_sanitize_tex_key(&key_no_ucp.base.tex); 12347ec681f3Smrg 12357ec681f3Smrg struct brw_compile_vs_params params = { 12367ec681f3Smrg .nir = nir, 12377ec681f3Smrg .key = &key_no_ucp, 12387ec681f3Smrg .prog_data = vs_prog_data, 12397ec681f3Smrg .edgeflag_is_last = devinfo->ver < 6, 12407ec681f3Smrg .log_data = &ice->dbg, 12417ec681f3Smrg }; 12427ec681f3Smrg const unsigned *program = 12437ec681f3Smrg brw_compile_vs(compiler, mem_ctx, ¶ms); 12447ec681f3Smrg if (program == NULL) { 12457ec681f3Smrg dbg_printf("Failed to compile vertex shader: %s\n", params.error_str); 12467ec681f3Smrg ralloc_free(mem_ctx); 12477ec681f3Smrg return false; 12487ec681f3Smrg } 12497ec681f3Smrg 12507ec681f3Smrg if (ish->compiled_once) { 12517ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 12527ec681f3Smrg } else { 12537ec681f3Smrg ish->compiled_once = true; 12547ec681f3Smrg } 12557ec681f3Smrg 12567ec681f3Smrg uint32_t *so_decls = NULL; 12577ec681f3Smrg if (devinfo->ver > 6) 12587ec681f3Smrg so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output, 12597ec681f3Smrg &vue_prog_data->vue_map); 12607ec681f3Smrg 12617ec681f3Smrg struct crocus_compiled_shader *shader = 12627ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_VS, sizeof(*key), key, program, 12637ec681f3Smrg prog_data->program_size, 12647ec681f3Smrg prog_data, sizeof(*vs_prog_data), so_decls, 12657ec681f3Smrg system_values, num_system_values, 12667ec681f3Smrg num_cbufs, &bt); 12677ec681f3Smrg 12687ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 12697ec681f3Smrg ice->shaders.cache_bo_map, 12707ec681f3Smrg key, sizeof(*key)); 12717ec681f3Smrg 12727ec681f3Smrg ralloc_free(mem_ctx); 12737ec681f3Smrg return shader; 12747ec681f3Smrg} 12757ec681f3Smrg 12767ec681f3Smrg/** 12777ec681f3Smrg * Update the current vertex shader variant. 12787ec681f3Smrg * 12797ec681f3Smrg * Fill out the key, look in the cache, compile and bind if needed. 12807ec681f3Smrg */ 12817ec681f3Smrgstatic void 12827ec681f3Smrgcrocus_update_compiled_vs(struct crocus_context *ice) 12837ec681f3Smrg{ 12847ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX]; 12857ec681f3Smrg struct crocus_uncompiled_shader *ish = 12867ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_VERTEX]; 12877ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 12887ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 12897ec681f3Smrg struct brw_vs_prog_key key = { KEY_INIT() }; 12907ec681f3Smrg 12917ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) 12927ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_VERTEX, ish, 12937ec681f3Smrg ish->nir->info.uses_texture_gather, &key.base.tex); 12947ec681f3Smrg screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 12957ec681f3Smrg 12967ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_VS]; 12977ec681f3Smrg struct crocus_compiled_shader *shader = 12987ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_VS, sizeof(key), &key); 12997ec681f3Smrg 13007ec681f3Smrg if (!shader) 13017ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)); 13027ec681f3Smrg 13037ec681f3Smrg if (!shader) 13047ec681f3Smrg shader = crocus_compile_vs(ice, ish, &key); 13057ec681f3Smrg 13067ec681f3Smrg if (old != shader) { 13077ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_VS] = shader; 13087ec681f3Smrg if (devinfo->ver == 8) 13097ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS; 13107ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS | 13117ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_VS | 13127ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_VS; 13137ec681f3Smrg shs->sysvals_need_upload = true; 13147ec681f3Smrg 13157ec681f3Smrg const struct brw_vs_prog_data *vs_prog_data = 13167ec681f3Smrg (void *) shader->prog_data; 13177ec681f3Smrg const bool uses_draw_params = vs_prog_data->uses_firstvertex || 13187ec681f3Smrg vs_prog_data->uses_baseinstance; 13197ec681f3Smrg const bool uses_derived_draw_params = vs_prog_data->uses_drawid || 13207ec681f3Smrg vs_prog_data->uses_is_indexed_draw; 13217ec681f3Smrg const bool needs_sgvs_element = uses_draw_params || 13227ec681f3Smrg vs_prog_data->uses_instanceid || 13237ec681f3Smrg vs_prog_data->uses_vertexid; 13247ec681f3Smrg 13257ec681f3Smrg if (ice->state.vs_uses_draw_params != uses_draw_params || 13267ec681f3Smrg ice->state.vs_uses_derived_draw_params != uses_derived_draw_params || 13277ec681f3Smrg ice->state.vs_needs_edge_flag != ish->needs_edge_flag || 13287ec681f3Smrg ice->state.vs_uses_vertexid != vs_prog_data->uses_vertexid || 13297ec681f3Smrg ice->state.vs_uses_instanceid != vs_prog_data->uses_instanceid) { 13307ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS | 13317ec681f3Smrg CROCUS_DIRTY_VERTEX_ELEMENTS; 13327ec681f3Smrg } 13337ec681f3Smrg ice->state.vs_uses_draw_params = uses_draw_params; 13347ec681f3Smrg ice->state.vs_uses_derived_draw_params = uses_derived_draw_params; 13357ec681f3Smrg ice->state.vs_needs_sgvs_element = needs_sgvs_element; 13367ec681f3Smrg ice->state.vs_needs_edge_flag = ish->needs_edge_flag; 13377ec681f3Smrg ice->state.vs_uses_vertexid = vs_prog_data->uses_vertexid; 13387ec681f3Smrg ice->state.vs_uses_instanceid = vs_prog_data->uses_instanceid; 13397ec681f3Smrg } 13407ec681f3Smrg} 13417ec681f3Smrg 13427ec681f3Smrg/** 13437ec681f3Smrg * Get the shader_info for a given stage, or NULL if the stage is disabled. 13447ec681f3Smrg */ 13457ec681f3Smrgconst struct shader_info * 13467ec681f3Smrgcrocus_get_shader_info(const struct crocus_context *ice, gl_shader_stage stage) 13477ec681f3Smrg{ 13487ec681f3Smrg const struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[stage]; 13497ec681f3Smrg 13507ec681f3Smrg if (!ish) 13517ec681f3Smrg return NULL; 13527ec681f3Smrg 13537ec681f3Smrg const nir_shader *nir = ish->nir; 13547ec681f3Smrg return &nir->info; 13557ec681f3Smrg} 13567ec681f3Smrg 13577ec681f3Smrg/** 13587ec681f3Smrg * Get the union of TCS output and TES input slots. 13597ec681f3Smrg * 13607ec681f3Smrg * TCS and TES need to agree on a common URB entry layout. In particular, 13617ec681f3Smrg * the data for all patch vertices is stored in a single URB entry (unlike 13627ec681f3Smrg * GS which has one entry per input vertex). This means that per-vertex 13637ec681f3Smrg * array indexing needs a stride. 13647ec681f3Smrg * 13657ec681f3Smrg * SSO requires locations to match, but doesn't require the number of 13667ec681f3Smrg * outputs/inputs to match (in fact, the TCS often has extra outputs). 13677ec681f3Smrg * So, we need to take the extra step of unifying these on the fly. 13687ec681f3Smrg */ 13697ec681f3Smrgstatic void 13707ec681f3Smrgget_unified_tess_slots(const struct crocus_context *ice, 13717ec681f3Smrg uint64_t *per_vertex_slots, 13727ec681f3Smrg uint32_t *per_patch_slots) 13737ec681f3Smrg{ 13747ec681f3Smrg const struct shader_info *tcs = 13757ec681f3Smrg crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 13767ec681f3Smrg const struct shader_info *tes = 13777ec681f3Smrg crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 13787ec681f3Smrg 13797ec681f3Smrg *per_vertex_slots = tes->inputs_read; 13807ec681f3Smrg *per_patch_slots = tes->patch_inputs_read; 13817ec681f3Smrg 13827ec681f3Smrg if (tcs) { 13837ec681f3Smrg *per_vertex_slots |= tcs->outputs_written; 13847ec681f3Smrg *per_patch_slots |= tcs->patch_outputs_written; 13857ec681f3Smrg } 13867ec681f3Smrg} 13877ec681f3Smrg 13887ec681f3Smrg/** 13897ec681f3Smrg * Compile a tessellation control shader, and upload the assembly. 13907ec681f3Smrg */ 13917ec681f3Smrgstatic struct crocus_compiled_shader * 13927ec681f3Smrgcrocus_compile_tcs(struct crocus_context *ice, 13937ec681f3Smrg struct crocus_uncompiled_shader *ish, 13947ec681f3Smrg const struct brw_tcs_prog_key *key) 13957ec681f3Smrg{ 13967ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 13977ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 13987ec681f3Smrg const struct nir_shader_compiler_options *options = 13997ec681f3Smrg compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions; 14007ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 14017ec681f3Smrg struct brw_tcs_prog_data *tcs_prog_data = 14027ec681f3Smrg rzalloc(mem_ctx, struct brw_tcs_prog_data); 14037ec681f3Smrg struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; 14047ec681f3Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 14057ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 14067ec681f3Smrg enum brw_param_builtin *system_values = NULL; 14077ec681f3Smrg unsigned num_system_values = 0; 14087ec681f3Smrg unsigned num_cbufs = 0; 14097ec681f3Smrg 14107ec681f3Smrg nir_shader *nir; 14117ec681f3Smrg 14127ec681f3Smrg struct crocus_binding_table bt; 14137ec681f3Smrg 14147ec681f3Smrg if (ish) { 14157ec681f3Smrg nir = nir_shader_clone(mem_ctx, ish->nir); 14167ec681f3Smrg 14177ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 14187ec681f3Smrg &num_system_values, &num_cbufs); 14197ec681f3Smrg 14207ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 14217ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 14227ec681f3Smrg num_system_values, num_cbufs, &key->base.tex); 14237ec681f3Smrg if (can_push_ubo(devinfo)) 14247ec681f3Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 14257ec681f3Smrg } else { 14267ec681f3Smrg nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key); 14277ec681f3Smrg 14287ec681f3Smrg /* Reserve space for passing the default tess levels as constants. */ 14297ec681f3Smrg num_cbufs = 1; 14307ec681f3Smrg num_system_values = 8; 14317ec681f3Smrg system_values = 14327ec681f3Smrg rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values); 14337ec681f3Smrg prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values); 14347ec681f3Smrg prog_data->nr_params = num_system_values; 14357ec681f3Smrg 14367ec681f3Smrg if (key->tes_primitive_mode == GL_QUADS) { 14377ec681f3Smrg for (int i = 0; i < 4; i++) 14387ec681f3Smrg system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 14397ec681f3Smrg 14407ec681f3Smrg system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 14417ec681f3Smrg system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; 14427ec681f3Smrg } else if (key->tes_primitive_mode == GL_TRIANGLES) { 14437ec681f3Smrg for (int i = 0; i < 3; i++) 14447ec681f3Smrg system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; 14457ec681f3Smrg 14467ec681f3Smrg system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; 14477ec681f3Smrg } else { 14487ec681f3Smrg assert(key->tes_primitive_mode == GL_ISOLINES); 14497ec681f3Smrg system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; 14507ec681f3Smrg system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 14517ec681f3Smrg } 14527ec681f3Smrg 14537ec681f3Smrg /* Manually setup the TCS binding table. */ 14547ec681f3Smrg memset(&bt, 0, sizeof(bt)); 14557ec681f3Smrg bt.sizes[CROCUS_SURFACE_GROUP_UBO] = 1; 14567ec681f3Smrg bt.used_mask[CROCUS_SURFACE_GROUP_UBO] = 1; 14577ec681f3Smrg bt.size_bytes = 4; 14587ec681f3Smrg 14597ec681f3Smrg prog_data->ubo_ranges[0].length = 1; 14607ec681f3Smrg } 14617ec681f3Smrg 14627ec681f3Smrg struct brw_tcs_prog_key key_clean = *key; 14637ec681f3Smrg crocus_sanitize_tex_key(&key_clean.base.tex); 14647ec681f3Smrg char *error_str = NULL; 14657ec681f3Smrg const unsigned *program = 14667ec681f3Smrg brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &key_clean, tcs_prog_data, nir, 14677ec681f3Smrg -1, NULL, &error_str); 14687ec681f3Smrg if (program == NULL) { 14697ec681f3Smrg dbg_printf("Failed to compile control shader: %s\n", error_str); 14707ec681f3Smrg ralloc_free(mem_ctx); 14717ec681f3Smrg return false; 14727ec681f3Smrg } 14737ec681f3Smrg 14747ec681f3Smrg if (ish) { 14757ec681f3Smrg if (ish->compiled_once) { 14767ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 14777ec681f3Smrg } else { 14787ec681f3Smrg ish->compiled_once = true; 14797ec681f3Smrg } 14807ec681f3Smrg } 14817ec681f3Smrg 14827ec681f3Smrg struct crocus_compiled_shader *shader = 14837ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_TCS, sizeof(*key), key, program, 14847ec681f3Smrg prog_data->program_size, 14857ec681f3Smrg prog_data, sizeof(*tcs_prog_data), NULL, 14867ec681f3Smrg system_values, num_system_values, 14877ec681f3Smrg num_cbufs, &bt); 14887ec681f3Smrg 14897ec681f3Smrg if (ish) 14907ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 14917ec681f3Smrg ice->shaders.cache_bo_map, 14927ec681f3Smrg key, sizeof(*key)); 14937ec681f3Smrg 14947ec681f3Smrg ralloc_free(mem_ctx); 14957ec681f3Smrg return shader; 14967ec681f3Smrg} 14977ec681f3Smrg 14987ec681f3Smrg/** 14997ec681f3Smrg * Update the current tessellation control shader variant. 15007ec681f3Smrg * 15017ec681f3Smrg * Fill out the key, look in the cache, compile and bind if needed. 15027ec681f3Smrg */ 15037ec681f3Smrgstatic void 15047ec681f3Smrgcrocus_update_compiled_tcs(struct crocus_context *ice) 15057ec681f3Smrg{ 15067ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL]; 15077ec681f3Smrg struct crocus_uncompiled_shader *tcs = 15087ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]; 15097ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 15107ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 15117ec681f3Smrg 15127ec681f3Smrg const struct shader_info *tes_info = 15137ec681f3Smrg crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL); 15147ec681f3Smrg struct brw_tcs_prog_key key = { 15157ec681f3Smrg KEY_INIT_NO_ID(), 15167ec681f3Smrg .base.program_string_id = tcs ? tcs->program_id : 0, 15177ec681f3Smrg .tes_primitive_mode = tes_info->tess.primitive_mode, 15187ec681f3Smrg .input_vertices = ice->state.vertices_per_patch, 15197ec681f3Smrg .quads_workaround = tes_info->tess.primitive_mode == GL_QUADS && 15207ec681f3Smrg tes_info->tess.spacing == TESS_SPACING_EQUAL, 15217ec681f3Smrg }; 15227ec681f3Smrg 15237ec681f3Smrg if (tcs && tcs->nos & (1ull << CROCUS_NOS_TEXTURES)) 15247ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_CTRL, tcs, 15257ec681f3Smrg tcs->nir->info.uses_texture_gather, &key.base.tex); 15267ec681f3Smrg get_unified_tess_slots(ice, &key.outputs_written, 15277ec681f3Smrg &key.patch_outputs_written); 15287ec681f3Smrg screen->vtbl.populate_tcs_key(ice, &key); 15297ec681f3Smrg 15307ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TCS]; 15317ec681f3Smrg struct crocus_compiled_shader *shader = 15327ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_TCS, sizeof(key), &key); 15337ec681f3Smrg 15347ec681f3Smrg if (tcs && !shader) 15357ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, tcs, &key, sizeof(key)); 15367ec681f3Smrg 15377ec681f3Smrg if (!shader) 15387ec681f3Smrg shader = crocus_compile_tcs(ice, tcs, &key); 15397ec681f3Smrg 15407ec681f3Smrg if (old != shader) { 15417ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_TCS] = shader; 15427ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TCS | 15437ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_TCS | 15447ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_TCS; 15457ec681f3Smrg shs->sysvals_need_upload = true; 15467ec681f3Smrg } 15477ec681f3Smrg} 15487ec681f3Smrg 15497ec681f3Smrg/** 15507ec681f3Smrg * Compile a tessellation evaluation shader, and upload the assembly. 15517ec681f3Smrg */ 15527ec681f3Smrgstatic struct crocus_compiled_shader * 15537ec681f3Smrgcrocus_compile_tes(struct crocus_context *ice, 15547ec681f3Smrg struct crocus_uncompiled_shader *ish, 15557ec681f3Smrg const struct brw_tes_prog_key *key) 15567ec681f3Smrg{ 15577ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 15587ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 15597ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 15607ec681f3Smrg struct brw_tes_prog_data *tes_prog_data = 15617ec681f3Smrg rzalloc(mem_ctx, struct brw_tes_prog_data); 15627ec681f3Smrg struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base; 15637ec681f3Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 15647ec681f3Smrg enum brw_param_builtin *system_values; 15657ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 15667ec681f3Smrg unsigned num_system_values; 15677ec681f3Smrg unsigned num_cbufs; 15687ec681f3Smrg 15697ec681f3Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 15707ec681f3Smrg 15717ec681f3Smrg if (key->nr_userclip_plane_consts) { 15727ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 15737ec681f3Smrg nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true, 15747ec681f3Smrg false, NULL); 15757ec681f3Smrg nir_lower_io_to_temporaries(nir, impl, true, false); 15767ec681f3Smrg nir_lower_global_vars_to_local(nir); 15777ec681f3Smrg nir_lower_vars_to_ssa(nir); 15787ec681f3Smrg nir_shader_gather_info(nir, impl); 15797ec681f3Smrg } 15807ec681f3Smrg 15817ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 15827ec681f3Smrg &num_system_values, &num_cbufs); 15837ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 15847ec681f3Smrg struct crocus_binding_table bt; 15857ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 15867ec681f3Smrg num_system_values, num_cbufs, &key->base.tex); 15877ec681f3Smrg 15887ec681f3Smrg if (can_push_ubo(devinfo)) 15897ec681f3Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 15907ec681f3Smrg 15917ec681f3Smrg struct brw_vue_map input_vue_map; 15927ec681f3Smrg brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, 15937ec681f3Smrg key->patch_inputs_read); 15947ec681f3Smrg 15957ec681f3Smrg struct brw_tes_prog_key key_clean = *key; 15967ec681f3Smrg crocus_sanitize_tex_key(&key_clean.base.tex); 15977ec681f3Smrg char *error_str = NULL; 15987ec681f3Smrg const unsigned *program = 15997ec681f3Smrg brw_compile_tes(compiler, &ice->dbg, mem_ctx, &key_clean, &input_vue_map, 16007ec681f3Smrg tes_prog_data, nir, -1, NULL, &error_str); 16017ec681f3Smrg if (program == NULL) { 16027ec681f3Smrg dbg_printf("Failed to compile evaluation shader: %s\n", error_str); 16037ec681f3Smrg ralloc_free(mem_ctx); 16047ec681f3Smrg return false; 16057ec681f3Smrg } 16067ec681f3Smrg 16077ec681f3Smrg if (ish->compiled_once) { 16087ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 16097ec681f3Smrg } else { 16107ec681f3Smrg ish->compiled_once = true; 16117ec681f3Smrg } 16127ec681f3Smrg 16137ec681f3Smrg uint32_t *so_decls = NULL; 16147ec681f3Smrg if (devinfo->ver > 6) 16157ec681f3Smrg so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output, 16167ec681f3Smrg &vue_prog_data->vue_map); 16177ec681f3Smrg 16187ec681f3Smrg struct crocus_compiled_shader *shader = 16197ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_TES, sizeof(*key), key, program, 16207ec681f3Smrg prog_data->program_size, 16217ec681f3Smrg prog_data, sizeof(*tes_prog_data), so_decls, 16227ec681f3Smrg system_values, num_system_values, 16237ec681f3Smrg num_cbufs, &bt); 16247ec681f3Smrg 16257ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 16267ec681f3Smrg ice->shaders.cache_bo_map, 16277ec681f3Smrg key, sizeof(*key)); 16287ec681f3Smrg 16297ec681f3Smrg ralloc_free(mem_ctx); 16307ec681f3Smrg return shader; 16317ec681f3Smrg} 16327ec681f3Smrg 16337ec681f3Smrg/** 16347ec681f3Smrg * Update the current tessellation evaluation shader variant. 16357ec681f3Smrg * 16367ec681f3Smrg * Fill out the key, look in the cache, compile and bind if needed. 16377ec681f3Smrg */ 16387ec681f3Smrgstatic void 16397ec681f3Smrgcrocus_update_compiled_tes(struct crocus_context *ice) 16407ec681f3Smrg{ 16417ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL]; 16427ec681f3Smrg struct crocus_uncompiled_shader *ish = 16437ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 16447ec681f3Smrg struct brw_tes_prog_key key = { KEY_INIT() }; 16457ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 16467ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 16477ec681f3Smrg 16487ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) 16497ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_EVAL, ish, 16507ec681f3Smrg ish->nir->info.uses_texture_gather, &key.base.tex); 16517ec681f3Smrg get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); 16527ec681f3Smrg screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 16537ec681f3Smrg 16547ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TES]; 16557ec681f3Smrg struct crocus_compiled_shader *shader = 16567ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_TES, sizeof(key), &key); 16577ec681f3Smrg 16587ec681f3Smrg if (!shader) 16597ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)); 16607ec681f3Smrg 16617ec681f3Smrg if (!shader) 16627ec681f3Smrg shader = crocus_compile_tes(ice, ish, &key); 16637ec681f3Smrg 16647ec681f3Smrg if (old != shader) { 16657ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_TES] = shader; 16667ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TES | 16677ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_TES | 16687ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_TES; 16697ec681f3Smrg shs->sysvals_need_upload = true; 16707ec681f3Smrg } 16717ec681f3Smrg 16727ec681f3Smrg /* TODO: Could compare and avoid flagging this. */ 16737ec681f3Smrg const struct shader_info *tes_info = &ish->nir->info; 16747ec681f3Smrg if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) { 16757ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES; 16767ec681f3Smrg ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true; 16777ec681f3Smrg } 16787ec681f3Smrg} 16797ec681f3Smrg 16807ec681f3Smrg/** 16817ec681f3Smrg * Compile a geometry shader, and upload the assembly. 16827ec681f3Smrg */ 16837ec681f3Smrgstatic struct crocus_compiled_shader * 16847ec681f3Smrgcrocus_compile_gs(struct crocus_context *ice, 16857ec681f3Smrg struct crocus_uncompiled_shader *ish, 16867ec681f3Smrg const struct brw_gs_prog_key *key) 16877ec681f3Smrg{ 16887ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 16897ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 16907ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 16917ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 16927ec681f3Smrg struct brw_gs_prog_data *gs_prog_data = 16937ec681f3Smrg rzalloc(mem_ctx, struct brw_gs_prog_data); 16947ec681f3Smrg struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base; 16957ec681f3Smrg struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 16967ec681f3Smrg enum brw_param_builtin *system_values; 16977ec681f3Smrg unsigned num_system_values; 16987ec681f3Smrg unsigned num_cbufs; 16997ec681f3Smrg 17007ec681f3Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 17017ec681f3Smrg 17027ec681f3Smrg if (key->nr_userclip_plane_consts) { 17037ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 17047ec681f3Smrg nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1, false, 17057ec681f3Smrg NULL); 17067ec681f3Smrg nir_lower_io_to_temporaries(nir, impl, true, false); 17077ec681f3Smrg nir_lower_global_vars_to_local(nir); 17087ec681f3Smrg nir_lower_vars_to_ssa(nir); 17097ec681f3Smrg nir_shader_gather_info(nir, impl); 17107ec681f3Smrg } 17117ec681f3Smrg 17127ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 17137ec681f3Smrg &num_system_values, &num_cbufs); 17147ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 17157ec681f3Smrg struct crocus_binding_table bt; 17167ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 17177ec681f3Smrg num_system_values, num_cbufs, &key->base.tex); 17187ec681f3Smrg 17197ec681f3Smrg if (can_push_ubo(devinfo)) 17207ec681f3Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 17217ec681f3Smrg 17227ec681f3Smrg brw_compute_vue_map(devinfo, 17237ec681f3Smrg &vue_prog_data->vue_map, nir->info.outputs_written, 17247ec681f3Smrg nir->info.separate_shader, /* pos slots */ 1); 17257ec681f3Smrg 17267ec681f3Smrg if (devinfo->ver == 6) 17277ec681f3Smrg gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data); 17287ec681f3Smrg struct brw_gs_prog_key key_clean = *key; 17297ec681f3Smrg crocus_sanitize_tex_key(&key_clean.base.tex); 17307ec681f3Smrg 17317ec681f3Smrg char *error_str = NULL; 17327ec681f3Smrg const unsigned *program = 17337ec681f3Smrg brw_compile_gs(compiler, &ice->dbg, mem_ctx, &key_clean, gs_prog_data, nir, 17347ec681f3Smrg -1, NULL, &error_str); 17357ec681f3Smrg if (program == NULL) { 17367ec681f3Smrg dbg_printf("Failed to compile geometry shader: %s\n", error_str); 17377ec681f3Smrg ralloc_free(mem_ctx); 17387ec681f3Smrg return false; 17397ec681f3Smrg } 17407ec681f3Smrg 17417ec681f3Smrg if (ish->compiled_once) { 17427ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 17437ec681f3Smrg } else { 17447ec681f3Smrg ish->compiled_once = true; 17457ec681f3Smrg } 17467ec681f3Smrg 17477ec681f3Smrg uint32_t *so_decls = NULL; 17487ec681f3Smrg if (devinfo->ver > 6) 17497ec681f3Smrg so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output, 17507ec681f3Smrg &vue_prog_data->vue_map); 17517ec681f3Smrg 17527ec681f3Smrg struct crocus_compiled_shader *shader = 17537ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_GS, sizeof(*key), key, program, 17547ec681f3Smrg prog_data->program_size, 17557ec681f3Smrg prog_data, sizeof(*gs_prog_data), so_decls, 17567ec681f3Smrg system_values, num_system_values, 17577ec681f3Smrg num_cbufs, &bt); 17587ec681f3Smrg 17597ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 17607ec681f3Smrg ice->shaders.cache_bo_map, 17617ec681f3Smrg key, sizeof(*key)); 17627ec681f3Smrg 17637ec681f3Smrg ralloc_free(mem_ctx); 17647ec681f3Smrg return shader; 17657ec681f3Smrg} 17667ec681f3Smrg 17677ec681f3Smrg/** 17687ec681f3Smrg * Update the current geometry shader variant. 17697ec681f3Smrg * 17707ec681f3Smrg * Fill out the key, look in the cache, compile and bind if needed. 17717ec681f3Smrg */ 17727ec681f3Smrgstatic void 17737ec681f3Smrgcrocus_update_compiled_gs(struct crocus_context *ice) 17747ec681f3Smrg{ 17757ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY]; 17767ec681f3Smrg struct crocus_uncompiled_shader *ish = 17777ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]; 17787ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_GS]; 17797ec681f3Smrg struct crocus_compiled_shader *shader = NULL; 17807ec681f3Smrg 17817ec681f3Smrg if (ish) { 17827ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 17837ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 17847ec681f3Smrg struct brw_gs_prog_key key = { KEY_INIT() }; 17857ec681f3Smrg 17867ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) 17877ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_GEOMETRY, ish, 17887ec681f3Smrg ish->nir->info.uses_texture_gather, &key.base.tex); 17897ec681f3Smrg screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); 17907ec681f3Smrg 17917ec681f3Smrg shader = 17927ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_GS, sizeof(key), &key); 17937ec681f3Smrg 17947ec681f3Smrg if (!shader) 17957ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)); 17967ec681f3Smrg 17977ec681f3Smrg if (!shader) 17987ec681f3Smrg shader = crocus_compile_gs(ice, ish, &key); 17997ec681f3Smrg } 18007ec681f3Smrg 18017ec681f3Smrg if (old != shader) { 18027ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_GS] = shader; 18037ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS | 18047ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_GS | 18057ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_GS; 18067ec681f3Smrg shs->sysvals_need_upload = true; 18077ec681f3Smrg } 18087ec681f3Smrg} 18097ec681f3Smrg 18107ec681f3Smrg/** 18117ec681f3Smrg * Compile a fragment (pixel) shader, and upload the assembly. 18127ec681f3Smrg */ 18137ec681f3Smrgstatic struct crocus_compiled_shader * 18147ec681f3Smrgcrocus_compile_fs(struct crocus_context *ice, 18157ec681f3Smrg struct crocus_uncompiled_shader *ish, 18167ec681f3Smrg const struct brw_wm_prog_key *key, 18177ec681f3Smrg struct brw_vue_map *vue_map) 18187ec681f3Smrg{ 18197ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 18207ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 18217ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 18227ec681f3Smrg struct brw_wm_prog_data *fs_prog_data = 18237ec681f3Smrg rzalloc(mem_ctx, struct brw_wm_prog_data); 18247ec681f3Smrg struct brw_stage_prog_data *prog_data = &fs_prog_data->base; 18257ec681f3Smrg enum brw_param_builtin *system_values; 18267ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 18277ec681f3Smrg unsigned num_system_values; 18287ec681f3Smrg unsigned num_cbufs; 18297ec681f3Smrg 18307ec681f3Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 18317ec681f3Smrg 18327ec681f3Smrg prog_data->use_alt_mode = nir->info.is_arb_asm; 18337ec681f3Smrg 18347ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 18357ec681f3Smrg &num_system_values, &num_cbufs); 18367ec681f3Smrg 18377ec681f3Smrg /* Lower output variables to load_output intrinsics before setting up 18387ec681f3Smrg * binding tables, so crocus_setup_binding_table can map any load_output 18397ec681f3Smrg * intrinsics to CROCUS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for 18407ec681f3Smrg * non-coherent framebuffer fetches. 18417ec681f3Smrg */ 18427ec681f3Smrg brw_nir_lower_fs_outputs(nir); 18437ec681f3Smrg 18447ec681f3Smrg /* lower swizzles before binding table */ 18457ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 18467ec681f3Smrg int null_rts = 1; 18477ec681f3Smrg 18487ec681f3Smrg struct crocus_binding_table bt; 18497ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, 18507ec681f3Smrg MAX2(key->nr_color_regions, null_rts), 18517ec681f3Smrg num_system_values, num_cbufs, 18527ec681f3Smrg &key->base.tex); 18537ec681f3Smrg 18547ec681f3Smrg if (can_push_ubo(devinfo)) 18557ec681f3Smrg brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); 18567ec681f3Smrg 18577ec681f3Smrg struct brw_wm_prog_key key_clean = *key; 18587ec681f3Smrg crocus_sanitize_tex_key(&key_clean.base.tex); 18597ec681f3Smrg 18607ec681f3Smrg struct brw_compile_fs_params params = { 18617ec681f3Smrg .nir = nir, 18627ec681f3Smrg .key = &key_clean, 18637ec681f3Smrg .prog_data = fs_prog_data, 18647ec681f3Smrg 18657ec681f3Smrg .allow_spilling = true, 18667ec681f3Smrg .vue_map = vue_map, 18677ec681f3Smrg 18687ec681f3Smrg .log_data = &ice->dbg, 18697ec681f3Smrg }; 18707ec681f3Smrg const unsigned *program = 18717ec681f3Smrg brw_compile_fs(compiler, mem_ctx, ¶ms); 18727ec681f3Smrg if (program == NULL) { 18737ec681f3Smrg dbg_printf("Failed to compile fragment shader: %s\n", params.error_str); 18747ec681f3Smrg ralloc_free(mem_ctx); 18757ec681f3Smrg return false; 18767ec681f3Smrg } 18777ec681f3Smrg 18787ec681f3Smrg if (ish->compiled_once) { 18797ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 18807ec681f3Smrg } else { 18817ec681f3Smrg ish->compiled_once = true; 18827ec681f3Smrg } 18837ec681f3Smrg 18847ec681f3Smrg struct crocus_compiled_shader *shader = 18857ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_FS, sizeof(*key), key, program, 18867ec681f3Smrg prog_data->program_size, 18877ec681f3Smrg prog_data, sizeof(*fs_prog_data), NULL, 18887ec681f3Smrg system_values, num_system_values, 18897ec681f3Smrg num_cbufs, &bt); 18907ec681f3Smrg 18917ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 18927ec681f3Smrg ice->shaders.cache_bo_map, 18937ec681f3Smrg key, sizeof(*key)); 18947ec681f3Smrg 18957ec681f3Smrg ralloc_free(mem_ctx); 18967ec681f3Smrg return shader; 18977ec681f3Smrg} 18987ec681f3Smrg 18997ec681f3Smrg/** 19007ec681f3Smrg * Update the current fragment shader variant. 19017ec681f3Smrg * 19027ec681f3Smrg * Fill out the key, look in the cache, compile and bind if needed. 19037ec681f3Smrg */ 19047ec681f3Smrgstatic void 19057ec681f3Smrgcrocus_update_compiled_fs(struct crocus_context *ice) 19067ec681f3Smrg{ 19077ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 19087ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 19097ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT]; 19107ec681f3Smrg struct crocus_uncompiled_shader *ish = 19117ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 19127ec681f3Smrg struct brw_wm_prog_key key = { KEY_INIT() }; 19137ec681f3Smrg 19147ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) 19157ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_FRAGMENT, ish, 19167ec681f3Smrg ish->nir->info.uses_texture_gather, &key.base.tex); 19177ec681f3Smrg screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key); 19187ec681f3Smrg 19197ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_LAST_VUE_MAP)) 19207ec681f3Smrg key.input_slots_valid = ice->shaders.last_vue_map->slots_valid; 19217ec681f3Smrg 19227ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_FS]; 19237ec681f3Smrg struct crocus_compiled_shader *shader = 19247ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_FS, sizeof(key), &key); 19257ec681f3Smrg 19267ec681f3Smrg if (!shader) 19277ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)); 19287ec681f3Smrg 19297ec681f3Smrg if (!shader) 19307ec681f3Smrg shader = crocus_compile_fs(ice, ish, &key, ice->shaders.last_vue_map); 19317ec681f3Smrg 19327ec681f3Smrg if (old != shader) { 19337ec681f3Smrg // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE 19347ec681f3Smrg // toggles. might be able to avoid flagging SBE too. 19357ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_FS] = shader; 19367ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_WM; 19377ec681f3Smrg /* gen4 clip/sf rely on fs prog_data */ 19387ec681f3Smrg if (devinfo->ver < 6) 19397ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG; 19407ec681f3Smrg else 19417ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_GEN6_BLEND_STATE; 19427ec681f3Smrg if (devinfo->ver == 6) 19437ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_RASTER; 19447ec681f3Smrg if (devinfo->ver >= 7) 19457ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE; 19467ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS | 19477ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_FS | 19487ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_FS; 19497ec681f3Smrg shs->sysvals_need_upload = true; 19507ec681f3Smrg } 19517ec681f3Smrg} 19527ec681f3Smrg 19537ec681f3Smrg/** 19547ec681f3Smrg * Update the last enabled stage's VUE map. 19557ec681f3Smrg * 19567ec681f3Smrg * When the shader feeding the rasterizer's output interface changes, we 19577ec681f3Smrg * need to re-emit various packets. 19587ec681f3Smrg */ 19597ec681f3Smrgstatic void 19607ec681f3Smrgupdate_last_vue_map(struct crocus_context *ice, 19617ec681f3Smrg struct brw_stage_prog_data *prog_data) 19627ec681f3Smrg{ 19637ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 19647ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 19657ec681f3Smrg struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; 19667ec681f3Smrg struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 19677ec681f3Smrg struct brw_vue_map *old_map = ice->shaders.last_vue_map; 19687ec681f3Smrg const uint64_t changed_slots = 19697ec681f3Smrg (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid; 19707ec681f3Smrg 19717ec681f3Smrg if (changed_slots & VARYING_BIT_VIEWPORT) { 19727ec681f3Smrg ice->state.num_viewports = 19737ec681f3Smrg (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? CROCUS_MAX_VIEWPORTS : 1; 19747ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT | 19757ec681f3Smrg CROCUS_DIRTY_CC_VIEWPORT; 19767ec681f3Smrg if (devinfo->ver < 6) 19777ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG; 19787ec681f3Smrg 19797ec681f3Smrg if (devinfo->ver <= 6) 19807ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; 19817ec681f3Smrg 19827ec681f3Smrg if (devinfo->ver >= 6) 19837ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP | 19847ec681f3Smrg CROCUS_DIRTY_GEN6_SCISSOR_RECT;; 19857ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS | 19867ec681f3Smrg ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP]; 19877ec681f3Smrg } 19887ec681f3Smrg 19897ec681f3Smrg if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { 19907ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE; 19917ec681f3Smrg if (devinfo->ver < 6) 19927ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; 19937ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS; 19947ec681f3Smrg } 19957ec681f3Smrg 19967ec681f3Smrg ice->shaders.last_vue_map = &vue_prog_data->vue_map; 19977ec681f3Smrg} 19987ec681f3Smrg 19997ec681f3Smrgstatic void 20007ec681f3Smrgcrocus_update_pull_constant_descriptors(struct crocus_context *ice, 20017ec681f3Smrg gl_shader_stage stage) 20027ec681f3Smrg{ 20037ec681f3Smrg struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 20047ec681f3Smrg 20057ec681f3Smrg if (!shader || !shader->prog_data->has_ubo_pull) 20067ec681f3Smrg return; 20077ec681f3Smrg 20087ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[stage]; 20097ec681f3Smrg bool any_new_descriptors = 20107ec681f3Smrg shader->num_system_values > 0 && shs->sysvals_need_upload; 20117ec681f3Smrg 20127ec681f3Smrg unsigned bound_cbufs = shs->bound_cbufs; 20137ec681f3Smrg 20147ec681f3Smrg while (bound_cbufs) { 20157ec681f3Smrg const int i = u_bit_scan(&bound_cbufs); 20167ec681f3Smrg struct pipe_constant_buffer *cbuf = &shs->constbufs[i]; 20177ec681f3Smrg if (cbuf->buffer) { 20187ec681f3Smrg any_new_descriptors = true; 20197ec681f3Smrg } 20207ec681f3Smrg } 20217ec681f3Smrg 20227ec681f3Smrg if (any_new_descriptors) 20237ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage; 20247ec681f3Smrg} 20257ec681f3Smrg 20267ec681f3Smrg/** 20277ec681f3Smrg * Get the prog_data for a given stage, or NULL if the stage is disabled. 20287ec681f3Smrg */ 20297ec681f3Smrgstatic struct brw_vue_prog_data * 20307ec681f3Smrgget_vue_prog_data(struct crocus_context *ice, gl_shader_stage stage) 20317ec681f3Smrg{ 20327ec681f3Smrg if (!ice->shaders.prog[stage]) 20337ec681f3Smrg return NULL; 20347ec681f3Smrg 20357ec681f3Smrg return (void *) ice->shaders.prog[stage]->prog_data; 20367ec681f3Smrg} 20377ec681f3Smrg 20387ec681f3Smrgstatic struct crocus_compiled_shader * 20397ec681f3Smrgcrocus_compile_clip(struct crocus_context *ice, struct brw_clip_prog_key *key) 20407ec681f3Smrg{ 20417ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 20427ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 20437ec681f3Smrg void *mem_ctx; 20447ec681f3Smrg unsigned program_size; 20457ec681f3Smrg mem_ctx = ralloc_context(NULL); 20467ec681f3Smrg 20477ec681f3Smrg struct brw_clip_prog_data *clip_prog_data = 20487ec681f3Smrg rzalloc(mem_ctx, struct brw_clip_prog_data); 20497ec681f3Smrg 20507ec681f3Smrg const unsigned *program = brw_compile_clip(compiler, mem_ctx, key, clip_prog_data, 20517ec681f3Smrg ice->shaders.last_vue_map, &program_size); 20527ec681f3Smrg 20537ec681f3Smrg if (program == NULL) { 20547ec681f3Smrg dbg_printf("failed to compile clip shader\n"); 20557ec681f3Smrg ralloc_free(mem_ctx); 20567ec681f3Smrg return false; 20577ec681f3Smrg } 20587ec681f3Smrg struct crocus_binding_table bt; 20597ec681f3Smrg memset(&bt, 0, sizeof(bt)); 20607ec681f3Smrg 20617ec681f3Smrg struct crocus_compiled_shader *shader = 20627ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_CLIP, sizeof(*key), key, program, 20637ec681f3Smrg program_size, 20647ec681f3Smrg (struct brw_stage_prog_data *)clip_prog_data, sizeof(*clip_prog_data), 20657ec681f3Smrg NULL, NULL, 0, 0, &bt); 20667ec681f3Smrg ralloc_free(mem_ctx); 20677ec681f3Smrg return shader; 20687ec681f3Smrg} 20697ec681f3Smrgstatic void 20707ec681f3Smrgcrocus_update_compiled_clip(struct crocus_context *ice) 20717ec681f3Smrg{ 20727ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 20737ec681f3Smrg struct brw_clip_prog_key key; 20747ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.clip_prog; 20757ec681f3Smrg memset(&key, 0, sizeof(key)); 20767ec681f3Smrg 20777ec681f3Smrg const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data); 20787ec681f3Smrg if (wm_prog_data) { 20797ec681f3Smrg key.contains_flat_varying = wm_prog_data->contains_flat_varying; 20807ec681f3Smrg key.contains_noperspective_varying = 20817ec681f3Smrg wm_prog_data->contains_noperspective_varying; 20827ec681f3Smrg memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode)); 20837ec681f3Smrg } 20847ec681f3Smrg 20857ec681f3Smrg key.primitive = ice->state.reduced_prim_mode; 20867ec681f3Smrg key.attrs = ice->shaders.last_vue_map->slots_valid; 20877ec681f3Smrg 20887ec681f3Smrg struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice); 20897ec681f3Smrg key.pv_first = rs_state->flatshade_first; 20907ec681f3Smrg 20917ec681f3Smrg if (rs_state->clip_plane_enable) 20927ec681f3Smrg key.nr_userclip = util_logbase2(rs_state->clip_plane_enable) + 1; 20937ec681f3Smrg 20947ec681f3Smrg if (screen->devinfo.ver == 5) 20957ec681f3Smrg key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP; 20967ec681f3Smrg else 20977ec681f3Smrg key.clip_mode = BRW_CLIP_MODE_NORMAL; 20987ec681f3Smrg 20997ec681f3Smrg if (key.primitive == PIPE_PRIM_TRIANGLES) { 21007ec681f3Smrg if (rs_state->cull_face == PIPE_FACE_FRONT_AND_BACK) 21017ec681f3Smrg key.clip_mode = BRW_CLIP_MODE_REJECT_ALL; 21027ec681f3Smrg else { 21037ec681f3Smrg uint32_t fill_front = BRW_CLIP_FILL_MODE_CULL; 21047ec681f3Smrg uint32_t fill_back = BRW_CLIP_FILL_MODE_CULL; 21057ec681f3Smrg uint32_t offset_front = 0; 21067ec681f3Smrg uint32_t offset_back = 0; 21077ec681f3Smrg 21087ec681f3Smrg if (!(rs_state->cull_face & PIPE_FACE_FRONT)) { 21097ec681f3Smrg switch (rs_state->fill_front) { 21107ec681f3Smrg case PIPE_POLYGON_MODE_FILL: 21117ec681f3Smrg fill_front = BRW_CLIP_FILL_MODE_FILL; 21127ec681f3Smrg offset_front = 0; 21137ec681f3Smrg break; 21147ec681f3Smrg case PIPE_POLYGON_MODE_LINE: 21157ec681f3Smrg fill_front = BRW_CLIP_FILL_MODE_LINE; 21167ec681f3Smrg offset_front = rs_state->offset_line; 21177ec681f3Smrg break; 21187ec681f3Smrg case PIPE_POLYGON_MODE_POINT: 21197ec681f3Smrg fill_front = BRW_CLIP_FILL_MODE_POINT; 21207ec681f3Smrg offset_front = rs_state->offset_point; 21217ec681f3Smrg break; 21227ec681f3Smrg } 21237ec681f3Smrg } 21247ec681f3Smrg 21257ec681f3Smrg if (!(rs_state->cull_face & PIPE_FACE_BACK)) { 21267ec681f3Smrg switch (rs_state->fill_back) { 21277ec681f3Smrg case PIPE_POLYGON_MODE_FILL: 21287ec681f3Smrg fill_back = BRW_CLIP_FILL_MODE_FILL; 21297ec681f3Smrg offset_back = 0; 21307ec681f3Smrg break; 21317ec681f3Smrg case PIPE_POLYGON_MODE_LINE: 21327ec681f3Smrg fill_back = BRW_CLIP_FILL_MODE_LINE; 21337ec681f3Smrg offset_back = rs_state->offset_line; 21347ec681f3Smrg break; 21357ec681f3Smrg case PIPE_POLYGON_MODE_POINT: 21367ec681f3Smrg fill_back = BRW_CLIP_FILL_MODE_POINT; 21377ec681f3Smrg offset_back = rs_state->offset_point; 21387ec681f3Smrg break; 21397ec681f3Smrg } 21407ec681f3Smrg } 21417ec681f3Smrg 21427ec681f3Smrg if (rs_state->fill_back != PIPE_POLYGON_MODE_FILL || 21437ec681f3Smrg rs_state->fill_front != PIPE_POLYGON_MODE_FILL) { 21447ec681f3Smrg key.do_unfilled = 1; 21457ec681f3Smrg 21467ec681f3Smrg /* Most cases the fixed function units will handle. Cases where 21477ec681f3Smrg * one or more polygon faces are unfilled will require help: 21487ec681f3Smrg */ 21497ec681f3Smrg key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED; 21507ec681f3Smrg 21517ec681f3Smrg if (offset_back || offset_front) { 21527ec681f3Smrg double mrd = 0.0; 21537ec681f3Smrg if (ice->state.framebuffer.zsbuf) 21547ec681f3Smrg mrd = util_get_depth_format_mrd(util_format_description(ice->state.framebuffer.zsbuf->format)); 21557ec681f3Smrg key.offset_units = rs_state->offset_units * mrd * 2; 21567ec681f3Smrg key.offset_factor = rs_state->offset_scale * mrd; 21577ec681f3Smrg key.offset_clamp = rs_state->offset_clamp * mrd; 21587ec681f3Smrg } 21597ec681f3Smrg 21607ec681f3Smrg if (!(rs_state->front_ccw ^ rs_state->bottom_edge_rule)) { 21617ec681f3Smrg key.fill_ccw = fill_front; 21627ec681f3Smrg key.fill_cw = fill_back; 21637ec681f3Smrg key.offset_ccw = offset_front; 21647ec681f3Smrg key.offset_cw = offset_back; 21657ec681f3Smrg if (rs_state->light_twoside && 21667ec681f3Smrg key.fill_cw != BRW_CLIP_FILL_MODE_CULL) 21677ec681f3Smrg key.copy_bfc_cw = 1; 21687ec681f3Smrg } else { 21697ec681f3Smrg key.fill_cw = fill_front; 21707ec681f3Smrg key.fill_ccw = fill_back; 21717ec681f3Smrg key.offset_cw = offset_front; 21727ec681f3Smrg key.offset_ccw = offset_back; 21737ec681f3Smrg if (rs_state->light_twoside && 21747ec681f3Smrg key.fill_ccw != BRW_CLIP_FILL_MODE_CULL) 21757ec681f3Smrg key.copy_bfc_ccw = 1; 21767ec681f3Smrg } 21777ec681f3Smrg } 21787ec681f3Smrg } 21797ec681f3Smrg } 21807ec681f3Smrg struct crocus_compiled_shader *shader = 21817ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_CLIP, sizeof(key), &key); 21827ec681f3Smrg 21837ec681f3Smrg if (!shader) 21847ec681f3Smrg shader = crocus_compile_clip(ice, &key); 21857ec681f3Smrg 21867ec681f3Smrg if (old != shader) { 21877ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP; 21887ec681f3Smrg ice->shaders.clip_prog = shader; 21897ec681f3Smrg } 21907ec681f3Smrg} 21917ec681f3Smrg 21927ec681f3Smrgstatic struct crocus_compiled_shader * 21937ec681f3Smrgcrocus_compile_sf(struct crocus_context *ice, struct brw_sf_prog_key *key) 21947ec681f3Smrg{ 21957ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 21967ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 21977ec681f3Smrg void *mem_ctx; 21987ec681f3Smrg unsigned program_size; 21997ec681f3Smrg mem_ctx = ralloc_context(NULL); 22007ec681f3Smrg 22017ec681f3Smrg struct brw_sf_prog_data *sf_prog_data = 22027ec681f3Smrg rzalloc(mem_ctx, struct brw_sf_prog_data); 22037ec681f3Smrg 22047ec681f3Smrg const unsigned *program = brw_compile_sf(compiler, mem_ctx, key, sf_prog_data, 22057ec681f3Smrg ice->shaders.last_vue_map, &program_size); 22067ec681f3Smrg 22077ec681f3Smrg if (program == NULL) { 22087ec681f3Smrg dbg_printf("failed to compile sf shader\n"); 22097ec681f3Smrg ralloc_free(mem_ctx); 22107ec681f3Smrg return false; 22117ec681f3Smrg } 22127ec681f3Smrg 22137ec681f3Smrg struct crocus_binding_table bt; 22147ec681f3Smrg memset(&bt, 0, sizeof(bt)); 22157ec681f3Smrg struct crocus_compiled_shader *shader = 22167ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_SF, sizeof(*key), key, program, 22177ec681f3Smrg program_size, 22187ec681f3Smrg (struct brw_stage_prog_data *)sf_prog_data, sizeof(*sf_prog_data), 22197ec681f3Smrg NULL, NULL, 0, 0, &bt); 22207ec681f3Smrg ralloc_free(mem_ctx); 22217ec681f3Smrg return shader; 22227ec681f3Smrg} 22237ec681f3Smrg 22247ec681f3Smrgstatic void 22257ec681f3Smrgcrocus_update_compiled_sf(struct crocus_context *ice) 22267ec681f3Smrg{ 22277ec681f3Smrg struct brw_sf_prog_key key; 22287ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.sf_prog; 22297ec681f3Smrg memset(&key, 0, sizeof(key)); 22307ec681f3Smrg 22317ec681f3Smrg key.attrs = ice->shaders.last_vue_map->slots_valid; 22327ec681f3Smrg 22337ec681f3Smrg switch (ice->state.reduced_prim_mode) { 22347ec681f3Smrg case GL_TRIANGLES: 22357ec681f3Smrg default: 22367ec681f3Smrg if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE)) 22377ec681f3Smrg key.primitive = BRW_SF_PRIM_UNFILLED_TRIS; 22387ec681f3Smrg else 22397ec681f3Smrg key.primitive = BRW_SF_PRIM_TRIANGLES; 22407ec681f3Smrg break; 22417ec681f3Smrg case GL_LINES: 22427ec681f3Smrg key.primitive = BRW_SF_PRIM_LINES; 22437ec681f3Smrg break; 22447ec681f3Smrg case GL_POINTS: 22457ec681f3Smrg key.primitive = BRW_SF_PRIM_POINTS; 22467ec681f3Smrg break; 22477ec681f3Smrg } 22487ec681f3Smrg 22497ec681f3Smrg struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice); 22507ec681f3Smrg key.userclip_active = rs_state->clip_plane_enable != 0; 22517ec681f3Smrg const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data); 22527ec681f3Smrg if (wm_prog_data) { 22537ec681f3Smrg key.contains_flat_varying = wm_prog_data->contains_flat_varying; 22547ec681f3Smrg memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode)); 22557ec681f3Smrg } 22567ec681f3Smrg 22577ec681f3Smrg key.do_twoside_color = rs_state->light_twoside; 22587ec681f3Smrg 22597ec681f3Smrg key.do_point_sprite = rs_state->point_quad_rasterization; 22607ec681f3Smrg if (key.do_point_sprite) { 22617ec681f3Smrg key.point_sprite_coord_replace = rs_state->sprite_coord_enable & 0xff; 22627ec681f3Smrg if (rs_state->sprite_coord_enable & (1 << 8)) 22637ec681f3Smrg key.do_point_coord = 1; 22647ec681f3Smrg if (wm_prog_data && wm_prog_data->urb_setup[VARYING_SLOT_PNTC] != -1) 22657ec681f3Smrg key.do_point_coord = 1; 22667ec681f3Smrg } 22677ec681f3Smrg 22687ec681f3Smrg key.sprite_origin_lower_left = rs_state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT; 22697ec681f3Smrg 22707ec681f3Smrg if (key.do_twoside_color) { 22717ec681f3Smrg key.frontface_ccw = rs_state->front_ccw; 22727ec681f3Smrg } 22737ec681f3Smrg struct crocus_compiled_shader *shader = 22747ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_SF, sizeof(key), &key); 22757ec681f3Smrg 22767ec681f3Smrg if (!shader) 22777ec681f3Smrg shader = crocus_compile_sf(ice, &key); 22787ec681f3Smrg 22797ec681f3Smrg if (old != shader) { 22807ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_RASTER; 22817ec681f3Smrg ice->shaders.sf_prog = shader; 22827ec681f3Smrg } 22837ec681f3Smrg} 22847ec681f3Smrg 22857ec681f3Smrgstatic struct crocus_compiled_shader * 22867ec681f3Smrgcrocus_compile_ff_gs(struct crocus_context *ice, struct brw_ff_gs_prog_key *key) 22877ec681f3Smrg{ 22887ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 22897ec681f3Smrg struct brw_compiler *compiler = screen->compiler; 22907ec681f3Smrg void *mem_ctx; 22917ec681f3Smrg unsigned program_size; 22927ec681f3Smrg mem_ctx = ralloc_context(NULL); 22937ec681f3Smrg 22947ec681f3Smrg struct brw_ff_gs_prog_data *ff_gs_prog_data = 22957ec681f3Smrg rzalloc(mem_ctx, struct brw_ff_gs_prog_data); 22967ec681f3Smrg 22977ec681f3Smrg const unsigned *program = brw_compile_ff_gs_prog(compiler, mem_ctx, key, ff_gs_prog_data, 22987ec681f3Smrg ice->shaders.last_vue_map, &program_size); 22997ec681f3Smrg 23007ec681f3Smrg if (program == NULL) { 23017ec681f3Smrg dbg_printf("failed to compile sf shader\n"); 23027ec681f3Smrg ralloc_free(mem_ctx); 23037ec681f3Smrg return false; 23047ec681f3Smrg } 23057ec681f3Smrg 23067ec681f3Smrg struct crocus_binding_table bt; 23077ec681f3Smrg memset(&bt, 0, sizeof(bt)); 23087ec681f3Smrg 23097ec681f3Smrg if (screen->devinfo.ver == 6) { 23107ec681f3Smrg bt.sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS; 23117ec681f3Smrg bt.used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1; 23127ec681f3Smrg 23137ec681f3Smrg bt.size_bytes = BRW_MAX_SOL_BINDINGS * 4; 23147ec681f3Smrg } 23157ec681f3Smrg 23167ec681f3Smrg struct crocus_compiled_shader *shader = 23177ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_FF_GS, sizeof(*key), key, program, 23187ec681f3Smrg program_size, 23197ec681f3Smrg (struct brw_stage_prog_data *)ff_gs_prog_data, sizeof(*ff_gs_prog_data), 23207ec681f3Smrg NULL, NULL, 0, 0, &bt); 23217ec681f3Smrg ralloc_free(mem_ctx); 23227ec681f3Smrg return shader; 23237ec681f3Smrg} 23247ec681f3Smrg 23257ec681f3Smrgstatic void 23267ec681f3Smrgcrocus_update_compiled_ff_gs(struct crocus_context *ice) 23277ec681f3Smrg{ 23287ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 23297ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 23307ec681f3Smrg struct brw_ff_gs_prog_key key; 23317ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.ff_gs_prog; 23327ec681f3Smrg memset(&key, 0, sizeof(key)); 23337ec681f3Smrg 23347ec681f3Smrg assert(devinfo->ver < 7); 23357ec681f3Smrg 23367ec681f3Smrg key.attrs = ice->shaders.last_vue_map->slots_valid; 23377ec681f3Smrg 23387ec681f3Smrg key.primitive = screen->vtbl.translate_prim_type(ice->state.prim_mode, 0); 23397ec681f3Smrg 23407ec681f3Smrg struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice); 23417ec681f3Smrg key.pv_first = rs_state->flatshade_first; 23427ec681f3Smrg 23437ec681f3Smrg if (key.primitive == _3DPRIM_QUADLIST && !rs_state->flatshade) { 23447ec681f3Smrg /* Provide consistenbbbbbt primitive order with brw_set_prim's 23457ec681f3Smrg * optimization of single quads to trifans. 23467ec681f3Smrg */ 23477ec681f3Smrg key.pv_first = true; 23487ec681f3Smrg } 23497ec681f3Smrg 23507ec681f3Smrg if (devinfo->ver >= 6) { 23517ec681f3Smrg key.need_gs_prog = ice->state.streamout_active; 23527ec681f3Smrg if (key.need_gs_prog) { 23537ec681f3Smrg struct crocus_uncompiled_shader *vs = 23547ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_VERTEX]; 23557ec681f3Smrg gfx6_ff_gs_xfb_setup(&vs->stream_output, 23567ec681f3Smrg &key); 23577ec681f3Smrg } 23587ec681f3Smrg } else { 23597ec681f3Smrg key.need_gs_prog = (key.primitive == _3DPRIM_QUADLIST || 23607ec681f3Smrg key.primitive == _3DPRIM_QUADSTRIP || 23617ec681f3Smrg key.primitive == _3DPRIM_LINELOOP); 23627ec681f3Smrg } 23637ec681f3Smrg 23647ec681f3Smrg struct crocus_compiled_shader *shader = NULL; 23657ec681f3Smrg if (key.need_gs_prog) { 23667ec681f3Smrg shader = crocus_find_cached_shader(ice, CROCUS_CACHE_FF_GS, 23677ec681f3Smrg sizeof(key), &key); 23687ec681f3Smrg if (!shader) 23697ec681f3Smrg shader = crocus_compile_ff_gs(ice, &key); 23707ec681f3Smrg } 23717ec681f3Smrg if (old != shader) { 23727ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS; 23737ec681f3Smrg if (!!old != !!shader) 23747ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN6_URB; 23757ec681f3Smrg ice->shaders.ff_gs_prog = shader; 23767ec681f3Smrg if (shader) { 23777ec681f3Smrg const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data; 23787ec681f3Smrg ice->state.last_xfb_verts_per_prim = gs_prog_data->svbi_postincrement_value; 23797ec681f3Smrg } 23807ec681f3Smrg } 23817ec681f3Smrg} 23827ec681f3Smrg 23837ec681f3Smrg// XXX: crocus_compiled_shaders are space-leaking :( 23847ec681f3Smrg// XXX: do remember to unbind them if deleting them. 23857ec681f3Smrg 23867ec681f3Smrg/** 23877ec681f3Smrg * Update the current shader variants for the given state. 23887ec681f3Smrg * 23897ec681f3Smrg * This should be called on every draw call to ensure that the correct 23907ec681f3Smrg * shaders are bound. It will also flag any dirty state triggered by 23917ec681f3Smrg * swapping out those shaders. 23927ec681f3Smrg */ 23937ec681f3Smrgbool 23947ec681f3Smrgcrocus_update_compiled_shaders(struct crocus_context *ice) 23957ec681f3Smrg{ 23967ec681f3Smrg struct crocus_screen *screen = (void *) ice->ctx.screen; 23977ec681f3Smrg const uint64_t stage_dirty = ice->state.stage_dirty; 23987ec681f3Smrg 23997ec681f3Smrg struct brw_vue_prog_data *old_prog_datas[4]; 24007ec681f3Smrg if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) { 24017ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) 24027ec681f3Smrg old_prog_datas[i] = get_vue_prog_data(ice, i); 24037ec681f3Smrg } 24047ec681f3Smrg 24057ec681f3Smrg if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_TCS | 24067ec681f3Smrg CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) { 24077ec681f3Smrg struct crocus_uncompiled_shader *tes = 24087ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; 24097ec681f3Smrg if (tes) { 24107ec681f3Smrg crocus_update_compiled_tcs(ice); 24117ec681f3Smrg crocus_update_compiled_tes(ice); 24127ec681f3Smrg } else { 24137ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_TCS] = NULL; 24147ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_TES] = NULL; 24157ec681f3Smrg ice->state.stage_dirty |= 24167ec681f3Smrg CROCUS_STAGE_DIRTY_TCS | CROCUS_STAGE_DIRTY_TES | 24177ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_TCS | CROCUS_STAGE_DIRTY_BINDINGS_TES | 24187ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_TCS | CROCUS_STAGE_DIRTY_CONSTANTS_TES; 24197ec681f3Smrg } 24207ec681f3Smrg } 24217ec681f3Smrg 24227ec681f3Smrg if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_VS) 24237ec681f3Smrg crocus_update_compiled_vs(ice); 24247ec681f3Smrg if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_GS) 24257ec681f3Smrg crocus_update_compiled_gs(ice); 24267ec681f3Smrg 24277ec681f3Smrg if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_GS | 24287ec681f3Smrg CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) { 24297ec681f3Smrg const struct crocus_compiled_shader *gs = 24307ec681f3Smrg ice->shaders.prog[MESA_SHADER_GEOMETRY]; 24317ec681f3Smrg const struct crocus_compiled_shader *tes = 24327ec681f3Smrg ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 24337ec681f3Smrg 24347ec681f3Smrg bool points_or_lines = false; 24357ec681f3Smrg 24367ec681f3Smrg if (gs) { 24377ec681f3Smrg const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data; 24387ec681f3Smrg points_or_lines = 24397ec681f3Smrg gs_prog_data->output_topology == _3DPRIM_POINTLIST || 24407ec681f3Smrg gs_prog_data->output_topology == _3DPRIM_LINESTRIP; 24417ec681f3Smrg } else if (tes) { 24427ec681f3Smrg const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data; 24437ec681f3Smrg points_or_lines = 24447ec681f3Smrg tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE || 24457ec681f3Smrg tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; 24467ec681f3Smrg } 24477ec681f3Smrg 24487ec681f3Smrg if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) { 24497ec681f3Smrg /* Outbound to XY Clip enables */ 24507ec681f3Smrg ice->shaders.output_topology_is_points_or_lines = points_or_lines; 24517ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP; 24527ec681f3Smrg } 24537ec681f3Smrg } 24547ec681f3Smrg 24557ec681f3Smrg if (!ice->shaders.prog[MESA_SHADER_VERTEX]) 24567ec681f3Smrg return false; 24577ec681f3Smrg 24587ec681f3Smrg gl_shader_stage last_stage = last_vue_stage(ice); 24597ec681f3Smrg struct crocus_compiled_shader *shader = ice->shaders.prog[last_stage]; 24607ec681f3Smrg struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage]; 24617ec681f3Smrg update_last_vue_map(ice, shader->prog_data); 24627ec681f3Smrg if (ice->state.streamout != shader->streamout) { 24637ec681f3Smrg ice->state.streamout = shader->streamout; 24647ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST | CROCUS_DIRTY_STREAMOUT; 24657ec681f3Smrg } 24667ec681f3Smrg 24677ec681f3Smrg if (ice->state.streamout_active) { 24687ec681f3Smrg screen->vtbl.update_so_strides(ice, ish->stream_output.stride); 24697ec681f3Smrg } 24707ec681f3Smrg 24717ec681f3Smrg /* use ice->state version as last_vue_map can dirty this bit */ 24727ec681f3Smrg if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_FS) 24737ec681f3Smrg crocus_update_compiled_fs(ice); 24747ec681f3Smrg 24757ec681f3Smrg if (screen->devinfo.ver <= 6) { 24767ec681f3Smrg if (ice->state.dirty & CROCUS_DIRTY_GEN4_FF_GS_PROG && 24777ec681f3Smrg !ice->shaders.prog[MESA_SHADER_GEOMETRY]) 24787ec681f3Smrg crocus_update_compiled_ff_gs(ice); 24797ec681f3Smrg } 24807ec681f3Smrg 24817ec681f3Smrg if (screen->devinfo.ver < 6) { 24827ec681f3Smrg if (ice->state.dirty & CROCUS_DIRTY_GEN4_CLIP_PROG) 24837ec681f3Smrg crocus_update_compiled_clip(ice); 24847ec681f3Smrg if (ice->state.dirty & CROCUS_DIRTY_GEN4_SF_PROG) 24857ec681f3Smrg crocus_update_compiled_sf(ice); 24867ec681f3Smrg } 24877ec681f3Smrg 24887ec681f3Smrg 24897ec681f3Smrg /* Changing shader interfaces may require a URB configuration. */ 24907ec681f3Smrg if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) { 24917ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 24927ec681f3Smrg struct brw_vue_prog_data *old = old_prog_datas[i]; 24937ec681f3Smrg struct brw_vue_prog_data *new = get_vue_prog_data(ice, i); 24947ec681f3Smrg if (!!old != !!new || 24957ec681f3Smrg (new && new->urb_entry_size != old->urb_entry_size)) { 24967ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN6_URB; 24977ec681f3Smrg break; 24987ec681f3Smrg } 24997ec681f3Smrg } 25007ec681f3Smrg } 25017ec681f3Smrg 25027ec681f3Smrg if (ice->state.stage_dirty & CROCUS_RENDER_STAGE_DIRTY_CONSTANTS) { 25037ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { 25047ec681f3Smrg if (ice->state.stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << i)) 25057ec681f3Smrg crocus_update_pull_constant_descriptors(ice, i); 25067ec681f3Smrg } 25077ec681f3Smrg } 25087ec681f3Smrg return true; 25097ec681f3Smrg} 25107ec681f3Smrg 25117ec681f3Smrgstatic struct crocus_compiled_shader * 25127ec681f3Smrgcrocus_compile_cs(struct crocus_context *ice, 25137ec681f3Smrg struct crocus_uncompiled_shader *ish, 25147ec681f3Smrg const struct brw_cs_prog_key *key) 25157ec681f3Smrg{ 25167ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 25177ec681f3Smrg const struct brw_compiler *compiler = screen->compiler; 25187ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 25197ec681f3Smrg struct brw_cs_prog_data *cs_prog_data = 25207ec681f3Smrg rzalloc(mem_ctx, struct brw_cs_prog_data); 25217ec681f3Smrg struct brw_stage_prog_data *prog_data = &cs_prog_data->base; 25227ec681f3Smrg enum brw_param_builtin *system_values; 25237ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 25247ec681f3Smrg unsigned num_system_values; 25257ec681f3Smrg unsigned num_cbufs; 25267ec681f3Smrg 25277ec681f3Smrg nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); 25287ec681f3Smrg 25297ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); 25307ec681f3Smrg 25317ec681f3Smrg crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, 25327ec681f3Smrg &num_system_values, &num_cbufs); 25337ec681f3Smrg crocus_lower_swizzles(nir, &key->base.tex); 25347ec681f3Smrg struct crocus_binding_table bt; 25357ec681f3Smrg crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, 25367ec681f3Smrg num_system_values, num_cbufs, &key->base.tex); 25377ec681f3Smrg 25387ec681f3Smrg struct brw_compile_cs_params params = { 25397ec681f3Smrg .nir = nir, 25407ec681f3Smrg .key = key, 25417ec681f3Smrg .prog_data = cs_prog_data, 25427ec681f3Smrg .log_data = &ice->dbg, 25437ec681f3Smrg }; 25447ec681f3Smrg 25457ec681f3Smrg const unsigned *program = 25467ec681f3Smrg brw_compile_cs(compiler, mem_ctx, ¶ms); 25477ec681f3Smrg if (program == NULL) { 25487ec681f3Smrg dbg_printf("Failed to compile compute shader: %s\n", params.error_str); 25497ec681f3Smrg ralloc_free(mem_ctx); 25507ec681f3Smrg return false; 25517ec681f3Smrg } 25527ec681f3Smrg 25537ec681f3Smrg if (ish->compiled_once) { 25547ec681f3Smrg crocus_debug_recompile(ice, &nir->info, &key->base); 25557ec681f3Smrg } else { 25567ec681f3Smrg ish->compiled_once = true; 25577ec681f3Smrg } 25587ec681f3Smrg 25597ec681f3Smrg struct crocus_compiled_shader *shader = 25607ec681f3Smrg crocus_upload_shader(ice, CROCUS_CACHE_CS, sizeof(*key), key, program, 25617ec681f3Smrg prog_data->program_size, 25627ec681f3Smrg prog_data, sizeof(*cs_prog_data), NULL, 25637ec681f3Smrg system_values, num_system_values, 25647ec681f3Smrg num_cbufs, &bt); 25657ec681f3Smrg 25667ec681f3Smrg crocus_disk_cache_store(screen->disk_cache, ish, shader, 25677ec681f3Smrg ice->shaders.cache_bo_map, 25687ec681f3Smrg key, sizeof(*key)); 25697ec681f3Smrg 25707ec681f3Smrg ralloc_free(mem_ctx); 25717ec681f3Smrg return shader; 25727ec681f3Smrg} 25737ec681f3Smrg 25747ec681f3Smrgstatic void 25757ec681f3Smrgcrocus_update_compiled_cs(struct crocus_context *ice) 25767ec681f3Smrg{ 25777ec681f3Smrg struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 25787ec681f3Smrg struct crocus_uncompiled_shader *ish = 25797ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; 25807ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 25817ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 25827ec681f3Smrg struct brw_cs_prog_key key = { KEY_INIT() }; 25837ec681f3Smrg 25847ec681f3Smrg if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) 25857ec681f3Smrg crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_COMPUTE, ish, 25867ec681f3Smrg ish->nir->info.uses_texture_gather, &key.base.tex); 25877ec681f3Smrg screen->vtbl.populate_cs_key(ice, &key); 25887ec681f3Smrg 25897ec681f3Smrg struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_CS]; 25907ec681f3Smrg struct crocus_compiled_shader *shader = 25917ec681f3Smrg crocus_find_cached_shader(ice, CROCUS_CACHE_CS, sizeof(key), &key); 25927ec681f3Smrg 25937ec681f3Smrg if (!shader) 25947ec681f3Smrg shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)); 25957ec681f3Smrg 25967ec681f3Smrg if (!shader) 25977ec681f3Smrg shader = crocus_compile_cs(ice, ish, &key); 25987ec681f3Smrg 25997ec681f3Smrg if (old != shader) { 26007ec681f3Smrg ice->shaders.prog[CROCUS_CACHE_CS] = shader; 26017ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS | 26027ec681f3Smrg CROCUS_STAGE_DIRTY_BINDINGS_CS | 26037ec681f3Smrg CROCUS_STAGE_DIRTY_CONSTANTS_CS; 26047ec681f3Smrg shs->sysvals_need_upload = true; 26057ec681f3Smrg } 26067ec681f3Smrg} 26077ec681f3Smrg 26087ec681f3Smrgvoid 26097ec681f3Smrgcrocus_update_compiled_compute_shader(struct crocus_context *ice) 26107ec681f3Smrg{ 26117ec681f3Smrg if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_CS) 26127ec681f3Smrg crocus_update_compiled_cs(ice); 26137ec681f3Smrg 26147ec681f3Smrg if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS) 26157ec681f3Smrg crocus_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE); 26167ec681f3Smrg} 26177ec681f3Smrg 26187ec681f3Smrgvoid 26197ec681f3Smrgcrocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, 26207ec681f3Smrg unsigned threads, 26217ec681f3Smrg uint32_t *dst) 26227ec681f3Smrg{ 26237ec681f3Smrg assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0); 26247ec681f3Smrg assert(cs_prog_data->push.cross_thread.size == 0); 26257ec681f3Smrg assert(cs_prog_data->push.per_thread.dwords == 1); 26267ec681f3Smrg assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 26277ec681f3Smrg for (unsigned t = 0; t < threads; t++) 26287ec681f3Smrg dst[8 * t] = t; 26297ec681f3Smrg} 26307ec681f3Smrg 26317ec681f3Smrg/** 26327ec681f3Smrg * Allocate scratch BOs as needed for the given per-thread size and stage. 26337ec681f3Smrg */ 26347ec681f3Smrgstruct crocus_bo * 26357ec681f3Smrgcrocus_get_scratch_space(struct crocus_context *ice, 26367ec681f3Smrg unsigned per_thread_scratch, 26377ec681f3Smrg gl_shader_stage stage) 26387ec681f3Smrg{ 26397ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 26407ec681f3Smrg struct crocus_bufmgr *bufmgr = screen->bufmgr; 26417ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 26427ec681f3Smrg 26437ec681f3Smrg unsigned encoded_size = ffs(per_thread_scratch) - 11; 26447ec681f3Smrg assert(encoded_size < (1 << 16)); 26457ec681f3Smrg 26467ec681f3Smrg struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; 26477ec681f3Smrg 26487ec681f3Smrg if (!*bop) { 26497ec681f3Smrg assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids)); 26507ec681f3Smrg uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage]; 26517ec681f3Smrg *bop = crocus_bo_alloc(bufmgr, "scratch", size); 26527ec681f3Smrg } 26537ec681f3Smrg 26547ec681f3Smrg return *bop; 26557ec681f3Smrg} 26567ec681f3Smrg 26577ec681f3Smrg/* ------------------------------------------------------------------- */ 26587ec681f3Smrg 26597ec681f3Smrg/** 26607ec681f3Smrg * The pipe->create_[stage]_state() driver hooks. 26617ec681f3Smrg * 26627ec681f3Smrg * Performs basic NIR preprocessing, records any state dependencies, and 26637ec681f3Smrg * returns an crocus_uncompiled_shader as the Gallium CSO. 26647ec681f3Smrg * 26657ec681f3Smrg * Actual shader compilation to assembly happens later, at first use. 26667ec681f3Smrg */ 26677ec681f3Smrgstatic void * 26687ec681f3Smrgcrocus_create_uncompiled_shader(struct pipe_context *ctx, 26697ec681f3Smrg nir_shader *nir, 26707ec681f3Smrg const struct pipe_stream_output_info *so_info) 26717ec681f3Smrg{ 26727ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 26737ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 26747ec681f3Smrg struct crocus_uncompiled_shader *ish = 26757ec681f3Smrg calloc(1, sizeof(struct crocus_uncompiled_shader)); 26767ec681f3Smrg if (!ish) 26777ec681f3Smrg return NULL; 26787ec681f3Smrg 26797ec681f3Smrg if (devinfo->ver >= 6) 26807ec681f3Smrg NIR_PASS(ish->needs_edge_flag, nir, crocus_fix_edge_flags); 26817ec681f3Smrg else 26827ec681f3Smrg ish->needs_edge_flag = false; 26837ec681f3Smrg 26847ec681f3Smrg brw_preprocess_nir(screen->compiler, nir, NULL); 26857ec681f3Smrg 26867ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo); 26877ec681f3Smrg NIR_PASS_V(nir, crocus_lower_storage_image_derefs); 26887ec681f3Smrg 26897ec681f3Smrg nir_sweep(nir); 26907ec681f3Smrg 26917ec681f3Smrg ish->program_id = get_new_program_id(screen); 26927ec681f3Smrg ish->nir = nir; 26937ec681f3Smrg if (so_info) { 26947ec681f3Smrg memcpy(&ish->stream_output, so_info, sizeof(*so_info)); 26957ec681f3Smrg update_so_info(&ish->stream_output, nir->info.outputs_written); 26967ec681f3Smrg } 26977ec681f3Smrg 26987ec681f3Smrg if (screen->disk_cache) { 26997ec681f3Smrg /* Serialize the NIR to a binary blob that we can hash for the disk 27007ec681f3Smrg * cache. Drop unnecessary information (like variable names) 27017ec681f3Smrg * so the serialized NIR is smaller, and also to let us detect more 27027ec681f3Smrg * isomorphic shaders when hashing, increasing cache hits. 27037ec681f3Smrg */ 27047ec681f3Smrg struct blob blob; 27057ec681f3Smrg blob_init(&blob); 27067ec681f3Smrg nir_serialize(&blob, nir, true); 27077ec681f3Smrg _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1); 27087ec681f3Smrg blob_finish(&blob); 27097ec681f3Smrg } 27107ec681f3Smrg 27117ec681f3Smrg return ish; 27127ec681f3Smrg} 27137ec681f3Smrg 27147ec681f3Smrgstatic struct crocus_uncompiled_shader * 27157ec681f3Smrgcrocus_create_shader_state(struct pipe_context *ctx, 27167ec681f3Smrg const struct pipe_shader_state *state) 27177ec681f3Smrg{ 27187ec681f3Smrg struct nir_shader *nir; 27197ec681f3Smrg 27207ec681f3Smrg if (state->type == PIPE_SHADER_IR_TGSI) 27217ec681f3Smrg nir = tgsi_to_nir(state->tokens, ctx->screen, false); 27227ec681f3Smrg else 27237ec681f3Smrg nir = state->ir.nir; 27247ec681f3Smrg 27257ec681f3Smrg return crocus_create_uncompiled_shader(ctx, nir, &state->stream_output); 27267ec681f3Smrg} 27277ec681f3Smrg 27287ec681f3Smrgstatic void * 27297ec681f3Smrgcrocus_create_vs_state(struct pipe_context *ctx, 27307ec681f3Smrg const struct pipe_shader_state *state) 27317ec681f3Smrg{ 27327ec681f3Smrg struct crocus_context *ice = (void *) ctx; 27337ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 27347ec681f3Smrg struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state); 27357ec681f3Smrg 27367ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_TEXTURES); 27377ec681f3Smrg /* User clip planes or gen5 sprite coord enable */ 27387ec681f3Smrg if (ish->nir->info.clip_distance_array_size == 0 || 27397ec681f3Smrg screen->devinfo.ver <= 5) 27407ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_RASTERIZER); 27417ec681f3Smrg 27427ec681f3Smrg if (screen->devinfo.verx10 < 75) 27437ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_VERTEX_ELEMENTS); 27447ec681f3Smrg 27457ec681f3Smrg if (screen->precompile) { 27467ec681f3Smrg struct brw_vs_prog_key key = { KEY_INIT() }; 27477ec681f3Smrg 27487ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 27497ec681f3Smrg crocus_compile_vs(ice, ish, &key); 27507ec681f3Smrg } 27517ec681f3Smrg 27527ec681f3Smrg return ish; 27537ec681f3Smrg} 27547ec681f3Smrg 27557ec681f3Smrgstatic void * 27567ec681f3Smrgcrocus_create_tcs_state(struct pipe_context *ctx, 27577ec681f3Smrg const struct pipe_shader_state *state) 27587ec681f3Smrg{ 27597ec681f3Smrg struct crocus_context *ice = (void *) ctx; 27607ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 27617ec681f3Smrg struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state); 27627ec681f3Smrg struct shader_info *info = &ish->nir->info; 27637ec681f3Smrg 27647ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_TEXTURES); 27657ec681f3Smrg if (screen->precompile) { 27667ec681f3Smrg const unsigned _GL_TRIANGLES = 0x0004; 27677ec681f3Smrg struct brw_tcs_prog_key key = { 27687ec681f3Smrg KEY_INIT(), 27697ec681f3Smrg // XXX: make sure the linker fills this out from the TES... 27707ec681f3Smrg .tes_primitive_mode = 27717ec681f3Smrg info->tess.primitive_mode ? info->tess.primitive_mode 27727ec681f3Smrg : _GL_TRIANGLES, 27737ec681f3Smrg .outputs_written = info->outputs_written, 27747ec681f3Smrg .patch_outputs_written = info->patch_outputs_written, 27757ec681f3Smrg }; 27767ec681f3Smrg 27777ec681f3Smrg key.input_vertices = info->tess.tcs_vertices_out; 27787ec681f3Smrg 27797ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 27807ec681f3Smrg crocus_compile_tcs(ice, ish, &key); 27817ec681f3Smrg } 27827ec681f3Smrg 27837ec681f3Smrg return ish; 27847ec681f3Smrg} 27857ec681f3Smrg 27867ec681f3Smrgstatic void * 27877ec681f3Smrgcrocus_create_tes_state(struct pipe_context *ctx, 27887ec681f3Smrg const struct pipe_shader_state *state) 27897ec681f3Smrg{ 27907ec681f3Smrg struct crocus_context *ice = (void *) ctx; 27917ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 27927ec681f3Smrg struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state); 27937ec681f3Smrg struct shader_info *info = &ish->nir->info; 27947ec681f3Smrg 27957ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_TEXTURES); 27967ec681f3Smrg /* User clip planes */ 27977ec681f3Smrg if (ish->nir->info.clip_distance_array_size == 0) 27987ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_RASTERIZER); 27997ec681f3Smrg 28007ec681f3Smrg if (screen->precompile) { 28017ec681f3Smrg struct brw_tes_prog_key key = { 28027ec681f3Smrg KEY_INIT(), 28037ec681f3Smrg // XXX: not ideal, need TCS output/TES input unification 28047ec681f3Smrg .inputs_read = info->inputs_read, 28057ec681f3Smrg .patch_inputs_read = info->patch_inputs_read, 28067ec681f3Smrg }; 28077ec681f3Smrg 28087ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 28097ec681f3Smrg crocus_compile_tes(ice, ish, &key); 28107ec681f3Smrg } 28117ec681f3Smrg 28127ec681f3Smrg return ish; 28137ec681f3Smrg} 28147ec681f3Smrg 28157ec681f3Smrgstatic void * 28167ec681f3Smrgcrocus_create_gs_state(struct pipe_context *ctx, 28177ec681f3Smrg const struct pipe_shader_state *state) 28187ec681f3Smrg{ 28197ec681f3Smrg struct crocus_context *ice = (void *) ctx; 28207ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 28217ec681f3Smrg struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state); 28227ec681f3Smrg 28237ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_TEXTURES); 28247ec681f3Smrg /* User clip planes */ 28257ec681f3Smrg if (ish->nir->info.clip_distance_array_size == 0) 28267ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_RASTERIZER); 28277ec681f3Smrg 28287ec681f3Smrg if (screen->precompile) { 28297ec681f3Smrg struct brw_gs_prog_key key = { KEY_INIT() }; 28307ec681f3Smrg 28317ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 28327ec681f3Smrg crocus_compile_gs(ice, ish, &key); 28337ec681f3Smrg } 28347ec681f3Smrg 28357ec681f3Smrg return ish; 28367ec681f3Smrg} 28377ec681f3Smrg 28387ec681f3Smrgstatic void * 28397ec681f3Smrgcrocus_create_fs_state(struct pipe_context *ctx, 28407ec681f3Smrg const struct pipe_shader_state *state) 28417ec681f3Smrg{ 28427ec681f3Smrg struct crocus_context *ice = (void *) ctx; 28437ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 28447ec681f3Smrg struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state); 28457ec681f3Smrg struct shader_info *info = &ish->nir->info; 28467ec681f3Smrg 28477ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_FRAMEBUFFER) | 28487ec681f3Smrg (1ull << CROCUS_NOS_DEPTH_STENCIL_ALPHA) | 28497ec681f3Smrg (1ull << CROCUS_NOS_RASTERIZER) | 28507ec681f3Smrg (1ull << CROCUS_NOS_TEXTURES) | 28517ec681f3Smrg (1ull << CROCUS_NOS_BLEND); 28527ec681f3Smrg 28537ec681f3Smrg /* The program key needs the VUE map if there are > 16 inputs or gen4/5 */ 28547ec681f3Smrg if (screen->devinfo.ver < 6 || util_bitcount64(ish->nir->info.inputs_read & 28557ec681f3Smrg BRW_FS_VARYING_INPUT_MASK) > 16) { 28567ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_LAST_VUE_MAP); 28577ec681f3Smrg } 28587ec681f3Smrg 28597ec681f3Smrg if (screen->precompile) { 28607ec681f3Smrg const uint64_t color_outputs = info->outputs_written & 28617ec681f3Smrg ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | 28627ec681f3Smrg BITFIELD64_BIT(FRAG_RESULT_STENCIL) | 28637ec681f3Smrg BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); 28647ec681f3Smrg 28657ec681f3Smrg bool can_rearrange_varyings = 28667ec681f3Smrg screen->devinfo.ver > 6 && util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16; 28677ec681f3Smrg 28687ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 28697ec681f3Smrg struct brw_wm_prog_key key = { 28707ec681f3Smrg KEY_INIT(), 28717ec681f3Smrg .nr_color_regions = util_bitcount(color_outputs), 28727ec681f3Smrg .coherent_fb_fetch = false, 28737ec681f3Smrg .input_slots_valid = 28747ec681f3Smrg can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS, 28757ec681f3Smrg }; 28767ec681f3Smrg 28777ec681f3Smrg struct brw_vue_map vue_map; 28787ec681f3Smrg if (devinfo->ver < 6) { 28797ec681f3Smrg brw_compute_vue_map(devinfo, &vue_map, 28807ec681f3Smrg info->inputs_read | VARYING_BIT_POS, 28817ec681f3Smrg false, /* pos slots */ 1); 28827ec681f3Smrg } 28837ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 28847ec681f3Smrg crocus_compile_fs(ice, ish, &key, &vue_map); 28857ec681f3Smrg } 28867ec681f3Smrg 28877ec681f3Smrg return ish; 28887ec681f3Smrg} 28897ec681f3Smrg 28907ec681f3Smrgstatic void * 28917ec681f3Smrgcrocus_create_compute_state(struct pipe_context *ctx, 28927ec681f3Smrg const struct pipe_compute_state *state) 28937ec681f3Smrg{ 28947ec681f3Smrg assert(state->ir_type == PIPE_SHADER_IR_NIR); 28957ec681f3Smrg 28967ec681f3Smrg struct crocus_context *ice = (void *) ctx; 28977ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 28987ec681f3Smrg struct crocus_uncompiled_shader *ish = 28997ec681f3Smrg crocus_create_uncompiled_shader(ctx, (void *) state->prog, NULL); 29007ec681f3Smrg 29017ec681f3Smrg ish->nos |= (1ull << CROCUS_NOS_TEXTURES); 29027ec681f3Smrg // XXX: disallow more than 64KB of shared variables 29037ec681f3Smrg 29047ec681f3Smrg if (screen->precompile) { 29057ec681f3Smrg struct brw_cs_prog_key key = { KEY_INIT() }; 29067ec681f3Smrg 29077ec681f3Smrg if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) 29087ec681f3Smrg crocus_compile_cs(ice, ish, &key); 29097ec681f3Smrg } 29107ec681f3Smrg 29117ec681f3Smrg return ish; 29127ec681f3Smrg} 29137ec681f3Smrg 29147ec681f3Smrg/** 29157ec681f3Smrg * The pipe->delete_[stage]_state() driver hooks. 29167ec681f3Smrg * 29177ec681f3Smrg * Frees the crocus_uncompiled_shader. 29187ec681f3Smrg */ 29197ec681f3Smrgstatic void 29207ec681f3Smrgcrocus_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage) 29217ec681f3Smrg{ 29227ec681f3Smrg struct crocus_uncompiled_shader *ish = state; 29237ec681f3Smrg struct crocus_context *ice = (void *) ctx; 29247ec681f3Smrg 29257ec681f3Smrg if (ice->shaders.uncompiled[stage] == ish) { 29267ec681f3Smrg ice->shaders.uncompiled[stage] = NULL; 29277ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage; 29287ec681f3Smrg } 29297ec681f3Smrg 29307ec681f3Smrg if (ish->const_data) { 29317ec681f3Smrg pipe_resource_reference(&ish->const_data, NULL); 29327ec681f3Smrg pipe_resource_reference(&ish->const_data_state.res, NULL); 29337ec681f3Smrg } 29347ec681f3Smrg 29357ec681f3Smrg ralloc_free(ish->nir); 29367ec681f3Smrg free(ish); 29377ec681f3Smrg} 29387ec681f3Smrg 29397ec681f3Smrgstatic void 29407ec681f3Smrgcrocus_delete_vs_state(struct pipe_context *ctx, void *state) 29417ec681f3Smrg{ 29427ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_VERTEX); 29437ec681f3Smrg} 29447ec681f3Smrg 29457ec681f3Smrgstatic void 29467ec681f3Smrgcrocus_delete_tcs_state(struct pipe_context *ctx, void *state) 29477ec681f3Smrg{ 29487ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL); 29497ec681f3Smrg} 29507ec681f3Smrg 29517ec681f3Smrgstatic void 29527ec681f3Smrgcrocus_delete_tes_state(struct pipe_context *ctx, void *state) 29537ec681f3Smrg{ 29547ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL); 29557ec681f3Smrg} 29567ec681f3Smrg 29577ec681f3Smrgstatic void 29587ec681f3Smrgcrocus_delete_gs_state(struct pipe_context *ctx, void *state) 29597ec681f3Smrg{ 29607ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY); 29617ec681f3Smrg} 29627ec681f3Smrg 29637ec681f3Smrgstatic void 29647ec681f3Smrgcrocus_delete_fs_state(struct pipe_context *ctx, void *state) 29657ec681f3Smrg{ 29667ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT); 29677ec681f3Smrg} 29687ec681f3Smrg 29697ec681f3Smrgstatic void 29707ec681f3Smrgcrocus_delete_cs_state(struct pipe_context *ctx, void *state) 29717ec681f3Smrg{ 29727ec681f3Smrg crocus_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE); 29737ec681f3Smrg} 29747ec681f3Smrg 29757ec681f3Smrg/** 29767ec681f3Smrg * The pipe->bind_[stage]_state() driver hook. 29777ec681f3Smrg * 29787ec681f3Smrg * Binds an uncompiled shader as the current one for a particular stage. 29797ec681f3Smrg * Updates dirty tracking to account for the shader's NOS. 29807ec681f3Smrg */ 29817ec681f3Smrgstatic void 29827ec681f3Smrgbind_shader_state(struct crocus_context *ice, 29837ec681f3Smrg struct crocus_uncompiled_shader *ish, 29847ec681f3Smrg gl_shader_stage stage) 29857ec681f3Smrg{ 29867ec681f3Smrg uint64_t dirty_bit = CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage; 29877ec681f3Smrg const uint64_t nos = ish ? ish->nos : 0; 29887ec681f3Smrg 29897ec681f3Smrg const struct shader_info *old_info = crocus_get_shader_info(ice, stage); 29907ec681f3Smrg const struct shader_info *new_info = ish ? &ish->nir->info : NULL; 29917ec681f3Smrg 29927ec681f3Smrg if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) != 29937ec681f3Smrg (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) { 29947ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage; 29957ec681f3Smrg } 29967ec681f3Smrg 29977ec681f3Smrg ice->shaders.uncompiled[stage] = ish; 29987ec681f3Smrg ice->state.stage_dirty |= dirty_bit; 29997ec681f3Smrg 30007ec681f3Smrg /* Record that CSOs need to mark CROCUS_DIRTY_UNCOMPILED_XS when they change 30017ec681f3Smrg * (or that they no longer need to do so). 30027ec681f3Smrg */ 30037ec681f3Smrg for (int i = 0; i < CROCUS_NOS_COUNT; i++) { 30047ec681f3Smrg if (nos & (1 << i)) 30057ec681f3Smrg ice->state.stage_dirty_for_nos[i] |= dirty_bit; 30067ec681f3Smrg else 30077ec681f3Smrg ice->state.stage_dirty_for_nos[i] &= ~dirty_bit; 30087ec681f3Smrg } 30097ec681f3Smrg} 30107ec681f3Smrg 30117ec681f3Smrgstatic void 30127ec681f3Smrgcrocus_bind_vs_state(struct pipe_context *ctx, void *state) 30137ec681f3Smrg{ 30147ec681f3Smrg struct crocus_context *ice = (struct crocus_context *)ctx; 30157ec681f3Smrg struct crocus_uncompiled_shader *new_ish = state; 30167ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 30177ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 30187ec681f3Smrg 30197ec681f3Smrg if (new_ish && 30207ec681f3Smrg ice->state.window_space_position != 30217ec681f3Smrg new_ish->nir->info.vs.window_space_position) { 30227ec681f3Smrg ice->state.window_space_position = 30237ec681f3Smrg new_ish->nir->info.vs.window_space_position; 30247ec681f3Smrg 30257ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP | 30267ec681f3Smrg CROCUS_DIRTY_RASTER | 30277ec681f3Smrg CROCUS_DIRTY_CC_VIEWPORT; 30287ec681f3Smrg } 30297ec681f3Smrg 30307ec681f3Smrg if (devinfo->ver == 6) { 30317ec681f3Smrg ice->state.stage_dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; 30327ec681f3Smrg } 30337ec681f3Smrg 30347ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX); 30357ec681f3Smrg} 30367ec681f3Smrg 30377ec681f3Smrgstatic void 30387ec681f3Smrgcrocus_bind_tcs_state(struct pipe_context *ctx, void *state) 30397ec681f3Smrg{ 30407ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL); 30417ec681f3Smrg} 30427ec681f3Smrg 30437ec681f3Smrgstatic void 30447ec681f3Smrgcrocus_bind_tes_state(struct pipe_context *ctx, void *state) 30457ec681f3Smrg{ 30467ec681f3Smrg struct crocus_context *ice = (struct crocus_context *)ctx; 30477ec681f3Smrg 30487ec681f3Smrg /* Enabling/disabling optional stages requires a URB reconfiguration. */ 30497ec681f3Smrg if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) 30507ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN6_URB; 30517ec681f3Smrg 30527ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL); 30537ec681f3Smrg} 30547ec681f3Smrg 30557ec681f3Smrgstatic void 30567ec681f3Smrgcrocus_bind_gs_state(struct pipe_context *ctx, void *state) 30577ec681f3Smrg{ 30587ec681f3Smrg struct crocus_context *ice = (struct crocus_context *)ctx; 30597ec681f3Smrg 30607ec681f3Smrg /* Enabling/disabling optional stages requires a URB reconfiguration. */ 30617ec681f3Smrg if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 30627ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN6_URB; 30637ec681f3Smrg 30647ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY); 30657ec681f3Smrg} 30667ec681f3Smrg 30677ec681f3Smrgstatic void 30687ec681f3Smrgcrocus_bind_fs_state(struct pipe_context *ctx, void *state) 30697ec681f3Smrg{ 30707ec681f3Smrg struct crocus_context *ice = (struct crocus_context *) ctx; 30717ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *) ctx->screen; 30727ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 30737ec681f3Smrg struct crocus_uncompiled_shader *old_ish = 30747ec681f3Smrg ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 30757ec681f3Smrg struct crocus_uncompiled_shader *new_ish = state; 30767ec681f3Smrg 30777ec681f3Smrg const unsigned color_bits = 30787ec681f3Smrg BITFIELD64_BIT(FRAG_RESULT_COLOR) | 30797ec681f3Smrg BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS); 30807ec681f3Smrg 30817ec681f3Smrg /* Fragment shader outputs influence HasWriteableRT */ 30827ec681f3Smrg if (!old_ish || !new_ish || 30837ec681f3Smrg (old_ish->nir->info.outputs_written & color_bits) != 30847ec681f3Smrg (new_ish->nir->info.outputs_written & color_bits)) { 30857ec681f3Smrg if (devinfo->ver == 8) 30867ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND; 30877ec681f3Smrg else 30887ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_WM; 30897ec681f3Smrg } 30907ec681f3Smrg 30917ec681f3Smrg if (devinfo->ver == 8) 30927ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX; 30937ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT); 30947ec681f3Smrg} 30957ec681f3Smrg 30967ec681f3Smrgstatic void 30977ec681f3Smrgcrocus_bind_cs_state(struct pipe_context *ctx, void *state) 30987ec681f3Smrg{ 30997ec681f3Smrg bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE); 31007ec681f3Smrg} 31017ec681f3Smrg 31027ec681f3Smrgvoid 31037ec681f3Smrgcrocus_init_program_functions(struct pipe_context *ctx) 31047ec681f3Smrg{ 31057ec681f3Smrg ctx->create_vs_state = crocus_create_vs_state; 31067ec681f3Smrg ctx->create_tcs_state = crocus_create_tcs_state; 31077ec681f3Smrg ctx->create_tes_state = crocus_create_tes_state; 31087ec681f3Smrg ctx->create_gs_state = crocus_create_gs_state; 31097ec681f3Smrg ctx->create_fs_state = crocus_create_fs_state; 31107ec681f3Smrg ctx->create_compute_state = crocus_create_compute_state; 31117ec681f3Smrg 31127ec681f3Smrg ctx->delete_vs_state = crocus_delete_vs_state; 31137ec681f3Smrg ctx->delete_tcs_state = crocus_delete_tcs_state; 31147ec681f3Smrg ctx->delete_tes_state = crocus_delete_tes_state; 31157ec681f3Smrg ctx->delete_gs_state = crocus_delete_gs_state; 31167ec681f3Smrg ctx->delete_fs_state = crocus_delete_fs_state; 31177ec681f3Smrg ctx->delete_compute_state = crocus_delete_cs_state; 31187ec681f3Smrg 31197ec681f3Smrg ctx->bind_vs_state = crocus_bind_vs_state; 31207ec681f3Smrg ctx->bind_tcs_state = crocus_bind_tcs_state; 31217ec681f3Smrg ctx->bind_tes_state = crocus_bind_tes_state; 31227ec681f3Smrg ctx->bind_gs_state = crocus_bind_gs_state; 31237ec681f3Smrg ctx->bind_fs_state = crocus_bind_fs_state; 31247ec681f3Smrg ctx->bind_compute_state = crocus_bind_cs_state; 31257ec681f3Smrg} 3126