101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014-2017 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <inttypes.h> 257ec681f3Smrg#include "util/format/u_format.h" 2601e04c3fSmrg#include "util/u_math.h" 2701e04c3fSmrg#include "util/u_memory.h" 2801e04c3fSmrg#include "util/ralloc.h" 2901e04c3fSmrg#include "util/hash_table.h" 309f464c52Smaya#include "util/u_upload_mgr.h" 3101e04c3fSmrg#include "tgsi/tgsi_dump.h" 3201e04c3fSmrg#include "tgsi/tgsi_parse.h" 3301e04c3fSmrg#include "compiler/nir/nir.h" 3401e04c3fSmrg#include "compiler/nir/nir_builder.h" 3501e04c3fSmrg#include "nir/tgsi_to_nir.h" 3601e04c3fSmrg#include "compiler/v3d_compiler.h" 3701e04c3fSmrg#include "v3d_context.h" 3801e04c3fSmrg#include "broadcom/cle/v3d_packet_v33_pack.h" 399f464c52Smaya 409f464c52Smayastatic struct v3d_compiled_shader * 419f464c52Smayav3d_get_compiled_shader(struct v3d_context *v3d, 429f464c52Smaya struct v3d_key *key, size_t key_size); 439f464c52Smayastatic void 449f464c52Smayav3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 459f464c52Smaya struct v3d_key *key); 4601e04c3fSmrg 4701e04c3fSmrgstatic gl_varying_slot 4801e04c3fSmrgv3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) 4901e04c3fSmrg{ 507ec681f3Smrg nir_foreach_shader_out_variable(var, s) { 5101e04c3fSmrg if (var->data.driver_location == driver_location) { 5201e04c3fSmrg return var->data.location; 5301e04c3fSmrg } 5401e04c3fSmrg } 5501e04c3fSmrg 5601e04c3fSmrg return -1; 5701e04c3fSmrg} 5801e04c3fSmrg 5901e04c3fSmrg/** 6001e04c3fSmrg * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. 6101e04c3fSmrg * 6201e04c3fSmrg * A shader can have 16 of these specs, and each one of them can write up to 6301e04c3fSmrg * 16 dwords. Since we allow a total of 64 transform feedback output 6401e04c3fSmrg * components (not 16 vectors), we have to group the writes of multiple 6501e04c3fSmrg * varyings together in a single data spec. 6601e04c3fSmrg */ 6701e04c3fSmrgstatic void 6801e04c3fSmrgv3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, 6901e04c3fSmrg const struct pipe_stream_output_info *stream_output) 7001e04c3fSmrg{ 7101e04c3fSmrg if (!stream_output->num_outputs) 7201e04c3fSmrg return; 7301e04c3fSmrg 7401e04c3fSmrg struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; 7501e04c3fSmrg int slot_count = 0; 7601e04c3fSmrg 7701e04c3fSmrg for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { 7801e04c3fSmrg uint32_t buffer_offset = 0; 7901e04c3fSmrg uint32_t vpm_start = slot_count; 8001e04c3fSmrg 8101e04c3fSmrg for (int i = 0; i < stream_output->num_outputs; i++) { 8201e04c3fSmrg const struct pipe_stream_output *output = 8301e04c3fSmrg &stream_output->output[i]; 8401e04c3fSmrg 8501e04c3fSmrg if (output->output_buffer != buffer) 8601e04c3fSmrg continue; 8701e04c3fSmrg 8801e04c3fSmrg /* We assume that the SO outputs appear in increasing 8901e04c3fSmrg * order in the buffer. 9001e04c3fSmrg */ 9101e04c3fSmrg assert(output->dst_offset >= buffer_offset); 9201e04c3fSmrg 9301e04c3fSmrg /* Pad any undefined slots in the output */ 9401e04c3fSmrg for (int j = buffer_offset; j < output->dst_offset; j++) { 9501e04c3fSmrg slots[slot_count] = 9601e04c3fSmrg v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); 9701e04c3fSmrg slot_count++; 9801e04c3fSmrg buffer_offset++; 9901e04c3fSmrg } 10001e04c3fSmrg 10101e04c3fSmrg /* Set the coordinate shader up to output the 10201e04c3fSmrg * components of this varying. 10301e04c3fSmrg */ 10401e04c3fSmrg for (int j = 0; j < output->num_components; j++) { 10501e04c3fSmrg gl_varying_slot slot = 10601e04c3fSmrg v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index); 10701e04c3fSmrg 10801e04c3fSmrg slots[slot_count] = 10901e04c3fSmrg v3d_slot_from_slot_and_component(slot, 11001e04c3fSmrg output->start_component + j); 11101e04c3fSmrg slot_count++; 11201e04c3fSmrg buffer_offset++; 11301e04c3fSmrg } 11401e04c3fSmrg } 11501e04c3fSmrg 11601e04c3fSmrg uint32_t vpm_size = slot_count - vpm_start; 11701e04c3fSmrg if (!vpm_size) 11801e04c3fSmrg continue; 11901e04c3fSmrg 12001e04c3fSmrg uint32_t vpm_start_offset = vpm_start + 6; 12101e04c3fSmrg 12201e04c3fSmrg while (vpm_size) { 12301e04c3fSmrg uint32_t write_size = MIN2(vpm_size, 1 << 4); 12401e04c3fSmrg 12501e04c3fSmrg struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { 12601e04c3fSmrg /* We need the offset from the coordinate shader's VPM 12701e04c3fSmrg * output block, which has the [X, Y, Z, W, Xs, Ys] 12801e04c3fSmrg * values at the start. 12901e04c3fSmrg */ 13001e04c3fSmrg .first_shaded_vertex_value_to_output = vpm_start_offset, 13101e04c3fSmrg .number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size, 13201e04c3fSmrg .output_buffer_to_write_to = buffer, 13301e04c3fSmrg }; 13401e04c3fSmrg 13501e04c3fSmrg /* GFXH-1559 */ 13601e04c3fSmrg assert(unpacked.first_shaded_vertex_value_to_output != 8 || 13701e04c3fSmrg so->num_tf_specs != 0); 13801e04c3fSmrg 13901e04c3fSmrg assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); 14001e04c3fSmrg V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 14101e04c3fSmrg (void *)&so->tf_specs[so->num_tf_specs], 14201e04c3fSmrg &unpacked); 14301e04c3fSmrg 14401e04c3fSmrg /* If point size is being written by the shader, then 14501e04c3fSmrg * all the VPM start offsets are shifted up by one. 14601e04c3fSmrg * We won't know that until the variant is compiled, 14701e04c3fSmrg * though. 14801e04c3fSmrg */ 14901e04c3fSmrg unpacked.first_shaded_vertex_value_to_output++; 15001e04c3fSmrg 15101e04c3fSmrg /* GFXH-1559 */ 15201e04c3fSmrg assert(unpacked.first_shaded_vertex_value_to_output != 8 || 15301e04c3fSmrg so->num_tf_specs != 0); 15401e04c3fSmrg 15501e04c3fSmrg V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 15601e04c3fSmrg (void *)&so->tf_specs_psiz[so->num_tf_specs], 15701e04c3fSmrg &unpacked); 15801e04c3fSmrg so->num_tf_specs++; 15901e04c3fSmrg vpm_start_offset += write_size; 16001e04c3fSmrg vpm_size -= write_size; 16101e04c3fSmrg } 16201e04c3fSmrg so->base.stream_output.stride[buffer] = 16301e04c3fSmrg stream_output->stride[buffer]; 16401e04c3fSmrg } 16501e04c3fSmrg 16601e04c3fSmrg so->num_tf_outputs = slot_count; 16701e04c3fSmrg so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, 16801e04c3fSmrg slot_count); 16901e04c3fSmrg memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); 17001e04c3fSmrg} 17101e04c3fSmrg 17201e04c3fSmrgstatic int 1739f464c52Smayatype_size(const struct glsl_type *type, bool bindless) 17401e04c3fSmrg{ 17501e04c3fSmrg return glsl_count_attribute_slots(type, false); 17601e04c3fSmrg} 17701e04c3fSmrg 1787ec681f3Smrgstatic void 1797ec681f3Smrgprecompile_all_outputs(nir_shader *s, 1807ec681f3Smrg struct v3d_varying_slot *outputs, 1817ec681f3Smrg uint8_t *num_outputs) 1827ec681f3Smrg{ 1837ec681f3Smrg nir_foreach_shader_out_variable(var, s) { 1847ec681f3Smrg const int array_len = MAX2(glsl_get_length(var->type), 1); 1857ec681f3Smrg for (int j = 0; j < array_len; j++) { 1867ec681f3Smrg const int slot = var->data.location + j; 1877ec681f3Smrg const int num_components = 1887ec681f3Smrg glsl_get_components(var->type); 1897ec681f3Smrg for (int i = 0; i < num_components; i++) { 1907ec681f3Smrg const int swiz = var->data.location_frac + i; 1917ec681f3Smrg outputs[(*num_outputs)++] = 1927ec681f3Smrg v3d_slot_from_slot_and_component(slot, 1937ec681f3Smrg swiz); 1947ec681f3Smrg } 1957ec681f3Smrg } 1967ec681f3Smrg } 1977ec681f3Smrg} 1987ec681f3Smrg 1999f464c52Smaya/** 2009f464c52Smaya * Precompiles a shader variant at shader state creation time if 2019f464c52Smaya * V3D_DEBUG=precompile is set. Used for shader-db 2029f464c52Smaya * (https://gitlab.freedesktop.org/mesa/shader-db) 2039f464c52Smaya */ 2049f464c52Smayastatic void 2059f464c52Smayav3d_shader_precompile(struct v3d_context *v3d, 2069f464c52Smaya struct v3d_uncompiled_shader *so) 20701e04c3fSmrg{ 2089f464c52Smaya nir_shader *s = so->base.ir.nir; 2099f464c52Smaya 2109f464c52Smaya if (s->info.stage == MESA_SHADER_FRAGMENT) { 2119f464c52Smaya struct v3d_fs_key key = { 2129f464c52Smaya .base.shader_state = so, 2139f464c52Smaya }; 2149f464c52Smaya 2157ec681f3Smrg nir_foreach_shader_out_variable(var, s) { 2169f464c52Smaya if (var->data.location == FRAG_RESULT_COLOR) { 2179f464c52Smaya key.cbufs |= 1 << 0; 2189f464c52Smaya } else if (var->data.location >= FRAG_RESULT_DATA0) { 2199f464c52Smaya key.cbufs |= 1 << (var->data.location - 2209f464c52Smaya FRAG_RESULT_DATA0); 2219f464c52Smaya } 2229f464c52Smaya } 2239f464c52Smaya 2247ec681f3Smrg key.logicop_func = PIPE_LOGICOP_COPY; 2257ec681f3Smrg 2269f464c52Smaya v3d_setup_shared_precompile_key(so, &key.base); 2279f464c52Smaya v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 2287ec681f3Smrg } else if (s->info.stage == MESA_SHADER_GEOMETRY) { 2297ec681f3Smrg struct v3d_gs_key key = { 2309f464c52Smaya .base.shader_state = so, 2317ec681f3Smrg .base.is_last_geometry_stage = true, 2329f464c52Smaya }; 2339f464c52Smaya 2349f464c52Smaya v3d_setup_shared_precompile_key(so, &key.base); 2359f464c52Smaya 2367ec681f3Smrg precompile_all_outputs(s, 2377ec681f3Smrg key.used_outputs, 2387ec681f3Smrg &key.num_used_outputs); 2399f464c52Smaya 2407ec681f3Smrg v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 2417ec681f3Smrg 2427ec681f3Smrg /* Compile GS bin shader: only position (XXX: include TF) */ 2437ec681f3Smrg key.is_coord = true; 2447ec681f3Smrg key.num_used_outputs = 0; 2457ec681f3Smrg for (int i = 0; i < 4; i++) { 2467ec681f3Smrg key.used_outputs[key.num_used_outputs++] = 2477ec681f3Smrg v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 2487ec681f3Smrg i); 2499f464c52Smaya } 2507ec681f3Smrg v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 2517ec681f3Smrg } else { 2527ec681f3Smrg assert(s->info.stage == MESA_SHADER_VERTEX); 2537ec681f3Smrg struct v3d_vs_key key = { 2547ec681f3Smrg .base.shader_state = so, 2557ec681f3Smrg /* Emit fixed function outputs */ 2567ec681f3Smrg .base.is_last_geometry_stage = true, 2577ec681f3Smrg }; 2587ec681f3Smrg 2597ec681f3Smrg v3d_setup_shared_precompile_key(so, &key.base); 2607ec681f3Smrg 2617ec681f3Smrg precompile_all_outputs(s, 2627ec681f3Smrg key.used_outputs, 2637ec681f3Smrg &key.num_used_outputs); 2649f464c52Smaya 2659f464c52Smaya v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 2669f464c52Smaya 2679f464c52Smaya /* Compile VS bin shader: only position (XXX: include TF) */ 2689f464c52Smaya key.is_coord = true; 2697ec681f3Smrg key.num_used_outputs = 0; 2709f464c52Smaya for (int i = 0; i < 4; i++) { 2717ec681f3Smrg key.used_outputs[key.num_used_outputs++] = 2729f464c52Smaya v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 2739f464c52Smaya i); 2749f464c52Smaya } 2759f464c52Smaya v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 2769f464c52Smaya } 27701e04c3fSmrg} 27801e04c3fSmrg 27901e04c3fSmrgstatic void * 2809f464c52Smayav3d_uncompiled_shader_create(struct pipe_context *pctx, 2819f464c52Smaya enum pipe_shader_ir type, void *ir) 28201e04c3fSmrg{ 28301e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 28401e04c3fSmrg struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); 28501e04c3fSmrg if (!so) 28601e04c3fSmrg return NULL; 28701e04c3fSmrg 28801e04c3fSmrg so->program_id = v3d->next_uncompiled_program_id++; 28901e04c3fSmrg 29001e04c3fSmrg nir_shader *s; 29101e04c3fSmrg 2929f464c52Smaya if (type == PIPE_SHADER_IR_NIR) { 29301e04c3fSmrg /* The backend takes ownership of the NIR shader on state 29401e04c3fSmrg * creation. 29501e04c3fSmrg */ 2969f464c52Smaya s = ir; 29701e04c3fSmrg } else { 2989f464c52Smaya assert(type == PIPE_SHADER_IR_TGSI); 29901e04c3fSmrg 3007ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_TGSI)) { 30101e04c3fSmrg fprintf(stderr, "prog %d TGSI:\n", 30201e04c3fSmrg so->program_id); 3039f464c52Smaya tgsi_dump(ir, 0); 30401e04c3fSmrg fprintf(stderr, "\n"); 30501e04c3fSmrg } 3067ec681f3Smrg s = tgsi_to_nir(ir, pctx->screen, false); 30701e04c3fSmrg } 30801e04c3fSmrg 3097ec681f3Smrg if (s->info.stage != MESA_SHADER_VERTEX && 3107ec681f3Smrg s->info.stage != MESA_SHADER_GEOMETRY) { 3117ec681f3Smrg NIR_PASS_V(s, nir_lower_io, 3127ec681f3Smrg nir_var_shader_in | nir_var_shader_out, 3137ec681f3Smrg type_size, (nir_lower_io_options)0); 3147ec681f3Smrg } 31501e04c3fSmrg 31601e04c3fSmrg NIR_PASS_V(s, nir_lower_regs_to_ssa); 31701e04c3fSmrg NIR_PASS_V(s, nir_normalize_cubemap_coords); 31801e04c3fSmrg 31901e04c3fSmrg NIR_PASS_V(s, nir_lower_load_const_to_scalar); 32001e04c3fSmrg 3217ec681f3Smrg v3d_optimize_nir(NULL, s); 32201e04c3fSmrg 3237ec681f3Smrg NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 32401e04c3fSmrg 32501e04c3fSmrg /* Garbage collect dead instructions */ 32601e04c3fSmrg nir_sweep(s); 32701e04c3fSmrg 32801e04c3fSmrg so->base.type = PIPE_SHADER_IR_NIR; 32901e04c3fSmrg so->base.ir.nir = s; 33001e04c3fSmrg 3317ec681f3Smrg if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 3327ec681f3Smrg v3d_debug_flag_for_shader_stage(s->info.stage)))) { 33301e04c3fSmrg fprintf(stderr, "%s prog %d NIR:\n", 33401e04c3fSmrg gl_shader_stage_name(s->info.stage), 33501e04c3fSmrg so->program_id); 33601e04c3fSmrg nir_print_shader(s, stderr); 33701e04c3fSmrg fprintf(stderr, "\n"); 33801e04c3fSmrg } 33901e04c3fSmrg 3407ec681f3Smrg if (unlikely(V3D_DEBUG & V3D_DEBUG_PRECOMPILE)) 3419f464c52Smaya v3d_shader_precompile(v3d, so); 3429f464c52Smaya 34301e04c3fSmrg return so; 34401e04c3fSmrg} 34501e04c3fSmrg 3469f464c52Smayastatic void 3479f464c52Smayav3d_shader_debug_output(const char *message, void *data) 3489f464c52Smaya{ 3499f464c52Smaya struct v3d_context *v3d = data; 3509f464c52Smaya 3519f464c52Smaya pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); 3529f464c52Smaya} 3539f464c52Smaya 3549f464c52Smayastatic void * 3559f464c52Smayav3d_shader_state_create(struct pipe_context *pctx, 3569f464c52Smaya const struct pipe_shader_state *cso) 3579f464c52Smaya{ 3589f464c52Smaya struct v3d_uncompiled_shader *so = 3599f464c52Smaya v3d_uncompiled_shader_create(pctx, 3609f464c52Smaya cso->type, 3619f464c52Smaya (cso->type == PIPE_SHADER_IR_TGSI ? 3629f464c52Smaya (void *)cso->tokens : 3639f464c52Smaya cso->ir.nir)); 3649f464c52Smaya 3659f464c52Smaya v3d_set_transform_feedback_outputs(so, &cso->stream_output); 3669f464c52Smaya 3679f464c52Smaya return so; 3689f464c52Smaya} 3699f464c52Smaya 3709f464c52Smayastruct v3d_compiled_shader * 3719f464c52Smayav3d_get_compiled_shader(struct v3d_context *v3d, 3729f464c52Smaya struct v3d_key *key, 3739f464c52Smaya size_t key_size) 37401e04c3fSmrg{ 37501e04c3fSmrg struct v3d_uncompiled_shader *shader_state = key->shader_state; 37601e04c3fSmrg nir_shader *s = shader_state->base.ir.nir; 37701e04c3fSmrg 3789f464c52Smaya struct hash_table *ht = v3d->prog.cache[s->info.stage]; 37901e04c3fSmrg struct hash_entry *entry = _mesa_hash_table_search(ht, key); 38001e04c3fSmrg if (entry) 38101e04c3fSmrg return entry->data; 38201e04c3fSmrg 38301e04c3fSmrg struct v3d_compiled_shader *shader = 38401e04c3fSmrg rzalloc(NULL, struct v3d_compiled_shader); 38501e04c3fSmrg 38601e04c3fSmrg int program_id = shader_state->program_id; 38701e04c3fSmrg int variant_id = 38801e04c3fSmrg p_atomic_inc_return(&shader_state->compiled_variant_count); 38901e04c3fSmrg uint64_t *qpu_insts; 39001e04c3fSmrg uint32_t shader_size; 39101e04c3fSmrg 3929f464c52Smaya qpu_insts = v3d_compile(v3d->screen->compiler, key, 3939f464c52Smaya &shader->prog_data.base, s, 3949f464c52Smaya v3d_shader_debug_output, 3959f464c52Smaya v3d, 3969f464c52Smaya program_id, variant_id, &shader_size); 3979f464c52Smaya ralloc_steal(shader, shader->prog_data.base); 39801e04c3fSmrg 39901e04c3fSmrg v3d_set_shader_uniform_dirty_flags(shader); 40001e04c3fSmrg 4019f464c52Smaya if (shader_size) { 4029f464c52Smaya u_upload_data(v3d->state_uploader, 0, shader_size, 8, 4039f464c52Smaya qpu_insts, &shader->offset, &shader->resource); 4049f464c52Smaya } 40501e04c3fSmrg 40601e04c3fSmrg free(qpu_insts); 40701e04c3fSmrg 4089f464c52Smaya if (ht) { 4099f464c52Smaya struct v3d_key *dup_key; 4109f464c52Smaya dup_key = ralloc_size(shader, key_size); 4119f464c52Smaya memcpy(dup_key, key, key_size); 4129f464c52Smaya _mesa_hash_table_insert(ht, dup_key, shader); 4139f464c52Smaya } 41401e04c3fSmrg 41501e04c3fSmrg if (shader->prog_data.base->spill_size > 41601e04c3fSmrg v3d->prog.spill_size_per_thread) { 4179f464c52Smaya /* The TIDX register we use for choosing the area to access 4189f464c52Smaya * for scratch space is: (core << 6) | (qpu << 2) | thread. 4199f464c52Smaya * Even at minimum threadcount in a particular shader, that 4209f464c52Smaya * means we still multiply by qpus by 4. 42101e04c3fSmrg */ 4229f464c52Smaya int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * 4239f464c52Smaya shader->prog_data.base->spill_size); 42401e04c3fSmrg 42501e04c3fSmrg v3d_bo_unreference(&v3d->prog.spill_bo); 42601e04c3fSmrg v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, 42701e04c3fSmrg total_spill_size, "spill"); 42801e04c3fSmrg v3d->prog.spill_size_per_thread = 42901e04c3fSmrg shader->prog_data.base->spill_size; 43001e04c3fSmrg } 43101e04c3fSmrg 43201e04c3fSmrg return shader; 43301e04c3fSmrg} 43401e04c3fSmrg 4359f464c52Smayastatic void 4369f464c52Smayav3d_free_compiled_shader(struct v3d_compiled_shader *shader) 4379f464c52Smaya{ 4389f464c52Smaya pipe_resource_reference(&shader->resource, NULL); 4399f464c52Smaya ralloc_free(shader); 4409f464c52Smaya} 4419f464c52Smaya 44201e04c3fSmrgstatic void 44301e04c3fSmrgv3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, 44401e04c3fSmrg struct v3d_texture_stateobj *texstate) 44501e04c3fSmrg{ 44601e04c3fSmrg const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 44701e04c3fSmrg 4487ec681f3Smrg key->num_tex_used = texstate->num_textures; 4497ec681f3Smrg key->num_samplers_used = texstate->num_textures; 4507ec681f3Smrg assert(key->num_tex_used == key->num_samplers_used); 45101e04c3fSmrg for (int i = 0; i < texstate->num_textures; i++) { 45201e04c3fSmrg struct pipe_sampler_view *sampler = texstate->textures[i]; 45301e04c3fSmrg struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); 45401e04c3fSmrg struct pipe_sampler_state *sampler_state = 45501e04c3fSmrg texstate->samplers[i]; 45601e04c3fSmrg 45701e04c3fSmrg if (!sampler) 45801e04c3fSmrg continue; 45901e04c3fSmrg 4607ec681f3Smrg key->sampler[i].return_size = 46101e04c3fSmrg v3d_get_tex_return_size(devinfo, 46201e04c3fSmrg sampler->format, 46301e04c3fSmrg sampler_state->compare_mode); 46401e04c3fSmrg 46501e04c3fSmrg /* For 16-bit, we set up the sampler to always return 2 46601e04c3fSmrg * channels (meaning no recompiles for most statechanges), 46701e04c3fSmrg * while for 32 we actually scale the returns with channels. 46801e04c3fSmrg */ 4697ec681f3Smrg if (key->sampler[i].return_size == 16) { 4707ec681f3Smrg key->sampler[i].return_channels = 2; 47101e04c3fSmrg } else if (devinfo->ver > 40) { 4727ec681f3Smrg key->sampler[i].return_channels = 4; 47301e04c3fSmrg } else { 4747ec681f3Smrg key->sampler[i].return_channels = 47501e04c3fSmrg v3d_get_tex_return_channels(devinfo, 47601e04c3fSmrg sampler->format); 47701e04c3fSmrg } 47801e04c3fSmrg 4797ec681f3Smrg if (key->sampler[i].return_size == 32 && devinfo->ver < 40) { 48001e04c3fSmrg memcpy(key->tex[i].swizzle, 48101e04c3fSmrg v3d_sampler->swizzle, 48201e04c3fSmrg sizeof(v3d_sampler->swizzle)); 48301e04c3fSmrg } else { 48401e04c3fSmrg /* For 16-bit returns, we let the sampler state handle 48501e04c3fSmrg * the swizzle. 48601e04c3fSmrg */ 48701e04c3fSmrg key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 48801e04c3fSmrg key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 48901e04c3fSmrg key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 49001e04c3fSmrg key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 49101e04c3fSmrg } 49201e04c3fSmrg } 4939f464c52Smaya} 49401e04c3fSmrg 4959f464c52Smayastatic void 4969f464c52Smayav3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 4979f464c52Smaya struct v3d_key *key) 4989f464c52Smaya{ 4999f464c52Smaya nir_shader *s = uncompiled->base.ir.nir; 5009f464c52Smaya 5017ec681f3Smrg /* Note that below we access they key's texture and sampler fields 5027ec681f3Smrg * using the same index. On OpenGL they are the same (they are 5037ec681f3Smrg * combined) 5047ec681f3Smrg */ 5057ec681f3Smrg key->num_tex_used = s->info.num_textures; 5067ec681f3Smrg key->num_samplers_used = s->info.num_textures; 5079f464c52Smaya for (int i = 0; i < s->info.num_textures; i++) { 5087ec681f3Smrg key->sampler[i].return_size = 16; 5097ec681f3Smrg key->sampler[i].return_channels = 2; 5109f464c52Smaya 5119f464c52Smaya key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 5129f464c52Smaya key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 5139f464c52Smaya key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 5149f464c52Smaya key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 5159f464c52Smaya } 51601e04c3fSmrg} 51701e04c3fSmrg 51801e04c3fSmrgstatic void 51901e04c3fSmrgv3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) 52001e04c3fSmrg{ 52101e04c3fSmrg struct v3d_job *job = v3d->job; 52201e04c3fSmrg struct v3d_fs_key local_key; 52301e04c3fSmrg struct v3d_fs_key *key = &local_key; 5249f464c52Smaya nir_shader *s = v3d->prog.bind_fs->base.ir.nir; 52501e04c3fSmrg 5267ec681f3Smrg if (!(v3d->dirty & (V3D_DIRTY_PRIM_MODE | 5277ec681f3Smrg V3D_DIRTY_BLEND | 5287ec681f3Smrg V3D_DIRTY_FRAMEBUFFER | 5297ec681f3Smrg V3D_DIRTY_ZSA | 5307ec681f3Smrg V3D_DIRTY_RASTERIZER | 5317ec681f3Smrg V3D_DIRTY_SAMPLE_STATE | 5327ec681f3Smrg V3D_DIRTY_FRAGTEX | 5337ec681f3Smrg V3D_DIRTY_UNCOMPILED_FS))) { 53401e04c3fSmrg return; 53501e04c3fSmrg } 53601e04c3fSmrg 53701e04c3fSmrg memset(key, 0, sizeof(*key)); 5389f464c52Smaya v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); 53901e04c3fSmrg key->base.shader_state = v3d->prog.bind_fs; 5409f464c52Smaya key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 54101e04c3fSmrg key->is_points = (prim_mode == PIPE_PRIM_POINTS); 54201e04c3fSmrg key->is_lines = (prim_mode >= PIPE_PRIM_LINES && 54301e04c3fSmrg prim_mode <= PIPE_PRIM_LINE_STRIP); 5447ec681f3Smrg key->line_smoothing = (key->is_lines && 5457ec681f3Smrg v3d_line_smoothing_enabled(v3d)); 5467ec681f3Smrg key->has_gs = v3d->prog.bind_gs != NULL; 54701e04c3fSmrg if (v3d->blend->base.logicop_enable) { 54801e04c3fSmrg key->logicop_func = v3d->blend->base.logicop_func; 54901e04c3fSmrg } else { 55001e04c3fSmrg key->logicop_func = PIPE_LOGICOP_COPY; 55101e04c3fSmrg } 55201e04c3fSmrg if (job->msaa) { 55301e04c3fSmrg key->msaa = v3d->rasterizer->base.multisample; 55401e04c3fSmrg key->sample_coverage = (v3d->rasterizer->base.multisample && 5559f464c52Smaya v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); 55601e04c3fSmrg key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; 55701e04c3fSmrg key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; 55801e04c3fSmrg } 55901e04c3fSmrg 56001e04c3fSmrg key->swap_color_rb = v3d->swap_color_rb; 56101e04c3fSmrg 5629f464c52Smaya for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { 56301e04c3fSmrg struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; 56401e04c3fSmrg if (!cbuf) 56501e04c3fSmrg continue; 56601e04c3fSmrg 5679f464c52Smaya /* gl_FragColor's propagation to however many bound color 5689f464c52Smaya * buffers there are means that the shader compile needs to 5699f464c52Smaya * know what buffers are present. 5709f464c52Smaya */ 5719f464c52Smaya key->cbufs |= 1 << i; 5729f464c52Smaya 5737ec681f3Smrg /* If logic operations are enabled then we might emit color 5747ec681f3Smrg * reads and we need to know the color buffer format and 5757ec681f3Smrg * swizzle for that. 5767ec681f3Smrg */ 5777ec681f3Smrg if (key->logicop_func != PIPE_LOGICOP_COPY) { 5787ec681f3Smrg key->color_fmt[i].format = cbuf->format; 5797ec681f3Smrg key->color_fmt[i].swizzle = 5807ec681f3Smrg v3d_get_format_swizzle(&v3d->screen->devinfo, 5817ec681f3Smrg cbuf->format); 5827ec681f3Smrg } 5837ec681f3Smrg 58401e04c3fSmrg const struct util_format_description *desc = 58501e04c3fSmrg util_format_description(cbuf->format); 58601e04c3fSmrg 58701e04c3fSmrg if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 58801e04c3fSmrg desc->channel[0].size == 32) { 58901e04c3fSmrg key->f32_color_rb |= 1 << i; 59001e04c3fSmrg } 59101e04c3fSmrg 5929f464c52Smaya if (s->info.fs.untyped_color_outputs) { 59301e04c3fSmrg if (util_format_is_pure_uint(cbuf->format)) 59401e04c3fSmrg key->uint_color_rb |= 1 << i; 59501e04c3fSmrg else if (util_format_is_pure_sint(cbuf->format)) 59601e04c3fSmrg key->int_color_rb |= 1 << i; 59701e04c3fSmrg } 59801e04c3fSmrg } 59901e04c3fSmrg 60001e04c3fSmrg if (key->is_points) { 60101e04c3fSmrg key->point_sprite_mask = 60201e04c3fSmrg v3d->rasterizer->base.sprite_coord_enable; 6037ec681f3Smrg /* this is handled by lower_wpos_pntc */ 6047ec681f3Smrg key->point_coord_upper_left = false; 60501e04c3fSmrg } 60601e04c3fSmrg 60701e04c3fSmrg struct v3d_compiled_shader *old_fs = v3d->prog.fs; 6089f464c52Smaya v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 60901e04c3fSmrg if (v3d->prog.fs == old_fs) 61001e04c3fSmrg return; 61101e04c3fSmrg 6127ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_FS; 61301e04c3fSmrg 61401e04c3fSmrg if (old_fs) { 61501e04c3fSmrg if (v3d->prog.fs->prog_data.fs->flat_shade_flags != 61601e04c3fSmrg old_fs->prog_data.fs->flat_shade_flags) { 6177ec681f3Smrg v3d->dirty |= V3D_DIRTY_FLAT_SHADE_FLAGS; 61801e04c3fSmrg } 61901e04c3fSmrg 62001e04c3fSmrg if (v3d->prog.fs->prog_data.fs->noperspective_flags != 62101e04c3fSmrg old_fs->prog_data.fs->noperspective_flags) { 6227ec681f3Smrg v3d->dirty |= V3D_DIRTY_NOPERSPECTIVE_FLAGS; 62301e04c3fSmrg } 62401e04c3fSmrg 62501e04c3fSmrg if (v3d->prog.fs->prog_data.fs->centroid_flags != 62601e04c3fSmrg old_fs->prog_data.fs->centroid_flags) { 6277ec681f3Smrg v3d->dirty |= V3D_DIRTY_CENTROID_FLAGS; 62801e04c3fSmrg } 62901e04c3fSmrg } 63001e04c3fSmrg 63101e04c3fSmrg if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots, 63201e04c3fSmrg old_fs->prog_data.fs->input_slots, 63301e04c3fSmrg sizeof(v3d->prog.fs->prog_data.fs->input_slots))) { 6347ec681f3Smrg v3d->dirty |= V3D_DIRTY_FS_INPUTS; 6357ec681f3Smrg } 6367ec681f3Smrg} 6377ec681f3Smrg 6387ec681f3Smrgstatic void 6397ec681f3Smrgv3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode) 6407ec681f3Smrg{ 6417ec681f3Smrg struct v3d_gs_key local_key; 6427ec681f3Smrg struct v3d_gs_key *key = &local_key; 6437ec681f3Smrg 6447ec681f3Smrg if (!(v3d->dirty & (V3D_DIRTY_GEOMTEX | 6457ec681f3Smrg V3D_DIRTY_RASTERIZER | 6467ec681f3Smrg V3D_DIRTY_UNCOMPILED_GS | 6477ec681f3Smrg V3D_DIRTY_PRIM_MODE | 6487ec681f3Smrg V3D_DIRTY_FS_INPUTS))) { 6497ec681f3Smrg return; 6507ec681f3Smrg } 6517ec681f3Smrg 6527ec681f3Smrg if (!v3d->prog.bind_gs) { 6537ec681f3Smrg v3d->prog.gs = NULL; 6547ec681f3Smrg v3d->prog.gs_bin = NULL; 6557ec681f3Smrg return; 6567ec681f3Smrg } 6577ec681f3Smrg 6587ec681f3Smrg memset(key, 0, sizeof(*key)); 6597ec681f3Smrg v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]); 6607ec681f3Smrg key->base.shader_state = v3d->prog.bind_gs; 6617ec681f3Smrg key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 6627ec681f3Smrg key->base.is_last_geometry_stage = true; 6637ec681f3Smrg key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 6647ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 6657ec681f3Smrg sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 6667ec681f3Smrg memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 6677ec681f3Smrg sizeof(key->used_outputs)); 6687ec681f3Smrg 6697ec681f3Smrg key->per_vertex_point_size = 6707ec681f3Smrg (prim_mode == PIPE_PRIM_POINTS && 6717ec681f3Smrg v3d->rasterizer->base.point_size_per_vertex); 6727ec681f3Smrg 6737ec681f3Smrg struct v3d_compiled_shader *gs = 6747ec681f3Smrg v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 6757ec681f3Smrg if (gs != v3d->prog.gs) { 6767ec681f3Smrg v3d->prog.gs = gs; 6777ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_GS; 6787ec681f3Smrg } 6797ec681f3Smrg 6807ec681f3Smrg key->is_coord = true; 6817ec681f3Smrg 6827ec681f3Smrg /* The last bin-mode shader in the geometry pipeline only outputs 6837ec681f3Smrg * varyings used by transform feedback. 6847ec681f3Smrg */ 6857ec681f3Smrg struct v3d_uncompiled_shader *shader_state = key->base.shader_state; 6867ec681f3Smrg memcpy(key->used_outputs, shader_state->tf_outputs, 6877ec681f3Smrg sizeof(*key->used_outputs) * shader_state->num_tf_outputs); 6887ec681f3Smrg if (shader_state->num_tf_outputs < key->num_used_outputs) { 6897ec681f3Smrg uint32_t size = sizeof(*key->used_outputs) * 6907ec681f3Smrg (key->num_used_outputs - 6917ec681f3Smrg shader_state->num_tf_outputs); 6927ec681f3Smrg memset(&key->used_outputs[shader_state->num_tf_outputs], 6937ec681f3Smrg 0, size); 6947ec681f3Smrg } 6957ec681f3Smrg key->num_used_outputs = shader_state->num_tf_outputs; 6967ec681f3Smrg 6977ec681f3Smrg struct v3d_compiled_shader *old_gs = v3d->prog.gs; 6987ec681f3Smrg struct v3d_compiled_shader *gs_bin = 6997ec681f3Smrg v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 7007ec681f3Smrg if (gs_bin != old_gs) { 7017ec681f3Smrg v3d->prog.gs_bin = gs_bin; 7027ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_GS_BIN; 7037ec681f3Smrg } 7047ec681f3Smrg 7057ec681f3Smrg if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots, 7067ec681f3Smrg old_gs->prog_data.gs->input_slots, 7077ec681f3Smrg sizeof(v3d->prog.gs->prog_data.gs->input_slots))) { 7087ec681f3Smrg v3d->dirty |= V3D_DIRTY_GS_INPUTS; 70901e04c3fSmrg } 71001e04c3fSmrg} 71101e04c3fSmrg 71201e04c3fSmrgstatic void 71301e04c3fSmrgv3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) 71401e04c3fSmrg{ 71501e04c3fSmrg struct v3d_vs_key local_key; 71601e04c3fSmrg struct v3d_vs_key *key = &local_key; 71701e04c3fSmrg 7187ec681f3Smrg if (!(v3d->dirty & (V3D_DIRTY_VERTTEX | 7197ec681f3Smrg V3D_DIRTY_VTXSTATE | 7207ec681f3Smrg V3D_DIRTY_UNCOMPILED_VS | 7217ec681f3Smrg (v3d->prog.bind_gs ? 0 : V3D_DIRTY_RASTERIZER) | 7227ec681f3Smrg (v3d->prog.bind_gs ? 0 : V3D_DIRTY_PRIM_MODE) | 7237ec681f3Smrg (v3d->prog.bind_gs ? V3D_DIRTY_GS_INPUTS : 7247ec681f3Smrg V3D_DIRTY_FS_INPUTS)))) { 72501e04c3fSmrg return; 72601e04c3fSmrg } 72701e04c3fSmrg 72801e04c3fSmrg memset(key, 0, sizeof(*key)); 7299f464c52Smaya v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); 73001e04c3fSmrg key->base.shader_state = v3d->prog.bind_vs; 7319f464c52Smaya key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 7327ec681f3Smrg key->base.is_last_geometry_stage = !v3d->prog.bind_gs; 7337ec681f3Smrg 7347ec681f3Smrg if (!v3d->prog.bind_gs) { 7357ec681f3Smrg key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 7367ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 7377ec681f3Smrg sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 7387ec681f3Smrg memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 7397ec681f3Smrg sizeof(key->used_outputs)); 7407ec681f3Smrg } else { 7417ec681f3Smrg key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs; 7427ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 7437ec681f3Smrg sizeof(v3d->prog.gs->prog_data.gs->input_slots)); 7447ec681f3Smrg memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots, 7457ec681f3Smrg sizeof(key->used_outputs)); 7467ec681f3Smrg } 74701e04c3fSmrg 74801e04c3fSmrg key->per_vertex_point_size = 74901e04c3fSmrg (prim_mode == PIPE_PRIM_POINTS && 75001e04c3fSmrg v3d->rasterizer->base.point_size_per_vertex); 75101e04c3fSmrg 7527ec681f3Smrg nir_shader *s = v3d->prog.bind_vs->base.ir.nir; 7537ec681f3Smrg uint64_t inputs_read = s->info.inputs_read; 7547ec681f3Smrg assert(util_bitcount(inputs_read) <= v3d->vtx->num_elements); 7557ec681f3Smrg 7567ec681f3Smrg while (inputs_read) { 7577ec681f3Smrg int location = u_bit_scan64(&inputs_read); 7587ec681f3Smrg nir_variable *var = 7597ec681f3Smrg nir_find_variable_with_location(s, nir_var_shader_in, location); 7607ec681f3Smrg assert (var != NULL); 7617ec681f3Smrg int driver_location = var->data.driver_location; 7627ec681f3Smrg switch (v3d->vtx->pipe[driver_location].src_format) { 7637ec681f3Smrg case PIPE_FORMAT_B8G8R8A8_UNORM: 7647ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_UNORM: 7657ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_SNORM: 7667ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_USCALED: 7677ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_SSCALED: 7687ec681f3Smrg key->va_swap_rb_mask |= 1 << location; 7697ec681f3Smrg break; 7707ec681f3Smrg default: 7717ec681f3Smrg break; 7727ec681f3Smrg } 7737ec681f3Smrg } 7747ec681f3Smrg 77501e04c3fSmrg struct v3d_compiled_shader *vs = 7769f464c52Smaya v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 77701e04c3fSmrg if (vs != v3d->prog.vs) { 77801e04c3fSmrg v3d->prog.vs = vs; 7797ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_VS; 78001e04c3fSmrg } 78101e04c3fSmrg 78201e04c3fSmrg key->is_coord = true; 7837ec681f3Smrg 7847ec681f3Smrg /* Coord shaders only output varyings used by transform feedback, 7857ec681f3Smrg * unless they are linked to other shaders in the geometry side 7867ec681f3Smrg * of the pipeline, since in that case any of the output varyings 7877ec681f3Smrg * could be required in later geometry stages to compute 7887ec681f3Smrg * gl_Position or TF outputs. 7897ec681f3Smrg */ 7907ec681f3Smrg if (!v3d->prog.bind_gs) { 7917ec681f3Smrg struct v3d_uncompiled_shader *shader_state = 7927ec681f3Smrg key->base.shader_state; 7937ec681f3Smrg memcpy(key->used_outputs, shader_state->tf_outputs, 7947ec681f3Smrg sizeof(*key->used_outputs) * 7957ec681f3Smrg shader_state->num_tf_outputs); 7967ec681f3Smrg if (shader_state->num_tf_outputs < key->num_used_outputs) { 7977ec681f3Smrg uint32_t tail_bytes = 7987ec681f3Smrg sizeof(*key->used_outputs) * 7997ec681f3Smrg (key->num_used_outputs - 8007ec681f3Smrg shader_state->num_tf_outputs); 8017ec681f3Smrg memset(&key->used_outputs[shader_state->num_tf_outputs], 8027ec681f3Smrg 0, tail_bytes); 8037ec681f3Smrg } 8047ec681f3Smrg key->num_used_outputs = shader_state->num_tf_outputs; 8057ec681f3Smrg } else { 8067ec681f3Smrg key->num_used_outputs = v3d->prog.gs_bin->prog_data.gs->num_inputs; 8077ec681f3Smrg STATIC_ASSERT(sizeof(key->used_outputs) == 8087ec681f3Smrg sizeof(v3d->prog.gs_bin->prog_data.gs->input_slots)); 8097ec681f3Smrg memcpy(key->used_outputs, v3d->prog.gs_bin->prog_data.gs->input_slots, 8107ec681f3Smrg sizeof(key->used_outputs)); 81101e04c3fSmrg } 81201e04c3fSmrg 81301e04c3fSmrg struct v3d_compiled_shader *cs = 8149f464c52Smaya v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 81501e04c3fSmrg if (cs != v3d->prog.cs) { 81601e04c3fSmrg v3d->prog.cs = cs; 8177ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_CS; 81801e04c3fSmrg } 81901e04c3fSmrg} 82001e04c3fSmrg 82101e04c3fSmrgvoid 82201e04c3fSmrgv3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) 82301e04c3fSmrg{ 82401e04c3fSmrg v3d_update_compiled_fs(v3d, prim_mode); 8257ec681f3Smrg v3d_update_compiled_gs(v3d, prim_mode); 82601e04c3fSmrg v3d_update_compiled_vs(v3d, prim_mode); 82701e04c3fSmrg} 82801e04c3fSmrg 8299f464c52Smayavoid 8309f464c52Smayav3d_update_compiled_cs(struct v3d_context *v3d) 8319f464c52Smaya{ 8329f464c52Smaya struct v3d_key local_key; 8339f464c52Smaya struct v3d_key *key = &local_key; 8349f464c52Smaya 8357ec681f3Smrg if (!(v3d->dirty & (V3D_DIRTY_UNCOMPILED_CS | 8367ec681f3Smrg V3D_DIRTY_COMPTEX))) { 8379f464c52Smaya return; 8389f464c52Smaya } 8399f464c52Smaya 8409f464c52Smaya memset(key, 0, sizeof(*key)); 8419f464c52Smaya v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); 8429f464c52Smaya key->shader_state = v3d->prog.bind_compute; 8439f464c52Smaya 8449f464c52Smaya struct v3d_compiled_shader *cs = 8459f464c52Smaya v3d_get_compiled_shader(v3d, key, sizeof(*key)); 8469f464c52Smaya if (cs != v3d->prog.compute) { 8479f464c52Smaya v3d->prog.compute = cs; 8487ec681f3Smrg v3d->dirty |= V3D_DIRTY_COMPILED_CS; /* XXX */ 8499f464c52Smaya } 8509f464c52Smaya} 8519f464c52Smaya 85201e04c3fSmrgstatic uint32_t 85301e04c3fSmrgfs_cache_hash(const void *key) 85401e04c3fSmrg{ 85501e04c3fSmrg return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); 85601e04c3fSmrg} 85701e04c3fSmrg 8587ec681f3Smrgstatic uint32_t 8597ec681f3Smrggs_cache_hash(const void *key) 8607ec681f3Smrg{ 8617ec681f3Smrg return _mesa_hash_data(key, sizeof(struct v3d_gs_key)); 8627ec681f3Smrg} 8637ec681f3Smrg 86401e04c3fSmrgstatic uint32_t 86501e04c3fSmrgvs_cache_hash(const void *key) 86601e04c3fSmrg{ 86701e04c3fSmrg return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); 86801e04c3fSmrg} 86901e04c3fSmrg 8709f464c52Smayastatic uint32_t 8719f464c52Smayacs_cache_hash(const void *key) 8729f464c52Smaya{ 8739f464c52Smaya return _mesa_hash_data(key, sizeof(struct v3d_key)); 8749f464c52Smaya} 8759f464c52Smaya 87601e04c3fSmrgstatic bool 87701e04c3fSmrgfs_cache_compare(const void *key1, const void *key2) 87801e04c3fSmrg{ 87901e04c3fSmrg return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; 88001e04c3fSmrg} 88101e04c3fSmrg 8827ec681f3Smrgstatic bool 8837ec681f3Smrggs_cache_compare(const void *key1, const void *key2) 8847ec681f3Smrg{ 8857ec681f3Smrg return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0; 8867ec681f3Smrg} 8877ec681f3Smrg 88801e04c3fSmrgstatic bool 88901e04c3fSmrgvs_cache_compare(const void *key1, const void *key2) 89001e04c3fSmrg{ 89101e04c3fSmrg return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; 89201e04c3fSmrg} 89301e04c3fSmrg 8949f464c52Smayastatic bool 8959f464c52Smayacs_cache_compare(const void *key1, const void *key2) 89601e04c3fSmrg{ 8979f464c52Smaya return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; 89801e04c3fSmrg} 89901e04c3fSmrg 90001e04c3fSmrgstatic void 90101e04c3fSmrgv3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) 90201e04c3fSmrg{ 90301e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 90401e04c3fSmrg struct v3d_uncompiled_shader *so = hwcso; 9059f464c52Smaya nir_shader *s = so->base.ir.nir; 90601e04c3fSmrg 9079f464c52Smaya hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { 9089f464c52Smaya const struct v3d_key *key = entry->key; 9099f464c52Smaya struct v3d_compiled_shader *shader = entry->data; 9109f464c52Smaya 9119f464c52Smaya if (key->shader_state != so) 9129f464c52Smaya continue; 9139f464c52Smaya 9149f464c52Smaya if (v3d->prog.fs == shader) 9159f464c52Smaya v3d->prog.fs = NULL; 9169f464c52Smaya if (v3d->prog.vs == shader) 9179f464c52Smaya v3d->prog.vs = NULL; 9189f464c52Smaya if (v3d->prog.cs == shader) 9199f464c52Smaya v3d->prog.cs = NULL; 9209f464c52Smaya if (v3d->prog.compute == shader) 9219f464c52Smaya v3d->prog.compute = NULL; 9229f464c52Smaya 9239f464c52Smaya _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); 9249f464c52Smaya v3d_free_compiled_shader(shader); 92501e04c3fSmrg } 92601e04c3fSmrg 92701e04c3fSmrg ralloc_free(so->base.ir.nir); 92801e04c3fSmrg free(so); 92901e04c3fSmrg} 93001e04c3fSmrg 93101e04c3fSmrgstatic void 93201e04c3fSmrgv3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) 93301e04c3fSmrg{ 93401e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 93501e04c3fSmrg v3d->prog.bind_fs = hwcso; 9367ec681f3Smrg v3d->dirty |= V3D_DIRTY_UNCOMPILED_FS; 9377ec681f3Smrg} 9387ec681f3Smrg 9397ec681f3Smrgstatic void 9407ec681f3Smrgv3d_gp_state_bind(struct pipe_context *pctx, void *hwcso) 9417ec681f3Smrg{ 9427ec681f3Smrg struct v3d_context *v3d = v3d_context(pctx); 9437ec681f3Smrg v3d->prog.bind_gs = hwcso; 9447ec681f3Smrg v3d->dirty |= V3D_DIRTY_UNCOMPILED_GS; 94501e04c3fSmrg} 94601e04c3fSmrg 94701e04c3fSmrgstatic void 94801e04c3fSmrgv3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) 94901e04c3fSmrg{ 95001e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 95101e04c3fSmrg v3d->prog.bind_vs = hwcso; 9527ec681f3Smrg v3d->dirty |= V3D_DIRTY_UNCOMPILED_VS; 95301e04c3fSmrg} 95401e04c3fSmrg 9559f464c52Smayastatic void 9569f464c52Smayav3d_compute_state_bind(struct pipe_context *pctx, void *state) 9579f464c52Smaya{ 9589f464c52Smaya struct v3d_context *v3d = v3d_context(pctx); 9599f464c52Smaya 9609f464c52Smaya v3d->prog.bind_compute = state; 9617ec681f3Smrg v3d->dirty |= V3D_DIRTY_UNCOMPILED_CS; 9629f464c52Smaya} 9639f464c52Smaya 9649f464c52Smayastatic void * 9659f464c52Smayav3d_create_compute_state(struct pipe_context *pctx, 9669f464c52Smaya const struct pipe_compute_state *cso) 9679f464c52Smaya{ 9689f464c52Smaya return v3d_uncompiled_shader_create(pctx, cso->ir_type, 9699f464c52Smaya (void *)cso->prog); 9709f464c52Smaya} 9719f464c52Smaya 97201e04c3fSmrgvoid 97301e04c3fSmrgv3d_program_init(struct pipe_context *pctx) 97401e04c3fSmrg{ 97501e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 97601e04c3fSmrg 97701e04c3fSmrg pctx->create_vs_state = v3d_shader_state_create; 97801e04c3fSmrg pctx->delete_vs_state = v3d_shader_state_delete; 97901e04c3fSmrg 9807ec681f3Smrg pctx->create_gs_state = v3d_shader_state_create; 9817ec681f3Smrg pctx->delete_gs_state = v3d_shader_state_delete; 9827ec681f3Smrg 98301e04c3fSmrg pctx->create_fs_state = v3d_shader_state_create; 98401e04c3fSmrg pctx->delete_fs_state = v3d_shader_state_delete; 98501e04c3fSmrg 98601e04c3fSmrg pctx->bind_fs_state = v3d_fp_state_bind; 9877ec681f3Smrg pctx->bind_gs_state = v3d_gp_state_bind; 98801e04c3fSmrg pctx->bind_vs_state = v3d_vp_state_bind; 98901e04c3fSmrg 9909f464c52Smaya if (v3d->screen->has_csd) { 9919f464c52Smaya pctx->create_compute_state = v3d_create_compute_state; 9929f464c52Smaya pctx->delete_compute_state = v3d_shader_state_delete; 9939f464c52Smaya pctx->bind_compute_state = v3d_compute_state_bind; 9949f464c52Smaya } 9959f464c52Smaya 9969f464c52Smaya v3d->prog.cache[MESA_SHADER_VERTEX] = 9979f464c52Smaya _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); 9987ec681f3Smrg v3d->prog.cache[MESA_SHADER_GEOMETRY] = 9997ec681f3Smrg _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare); 10009f464c52Smaya v3d->prog.cache[MESA_SHADER_FRAGMENT] = 10019f464c52Smaya _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); 10029f464c52Smaya v3d->prog.cache[MESA_SHADER_COMPUTE] = 10039f464c52Smaya _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); 100401e04c3fSmrg} 100501e04c3fSmrg 100601e04c3fSmrgvoid 100701e04c3fSmrgv3d_program_fini(struct pipe_context *pctx) 100801e04c3fSmrg{ 100901e04c3fSmrg struct v3d_context *v3d = v3d_context(pctx); 101001e04c3fSmrg 10119f464c52Smaya for (int i = 0; i < MESA_SHADER_STAGES; i++) { 10129f464c52Smaya struct hash_table *cache = v3d->prog.cache[i]; 10139f464c52Smaya if (!cache) 10149f464c52Smaya continue; 101501e04c3fSmrg 10169f464c52Smaya hash_table_foreach(cache, entry) { 10179f464c52Smaya struct v3d_compiled_shader *shader = entry->data; 10189f464c52Smaya v3d_free_compiled_shader(shader); 10199f464c52Smaya _mesa_hash_table_remove(cache, entry); 10209f464c52Smaya } 102101e04c3fSmrg } 102201e04c3fSmrg 102301e04c3fSmrg v3d_bo_unreference(&v3d->prog.spill_bo); 102401e04c3fSmrg} 1025