1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014-2017 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <inttypes.h> 25b8e80941Smrg#include "util/u_format.h" 26b8e80941Smrg#include "util/u_math.h" 27b8e80941Smrg#include "util/u_memory.h" 28b8e80941Smrg#include "util/ralloc.h" 29b8e80941Smrg#include "util/hash_table.h" 30b8e80941Smrg#include "util/u_upload_mgr.h" 31b8e80941Smrg#include "tgsi/tgsi_dump.h" 32b8e80941Smrg#include "tgsi/tgsi_parse.h" 33b8e80941Smrg#include "compiler/nir/nir.h" 34b8e80941Smrg#include "compiler/nir/nir_builder.h" 35b8e80941Smrg#include "nir/tgsi_to_nir.h" 36b8e80941Smrg#include "compiler/v3d_compiler.h" 37b8e80941Smrg#include "v3d_context.h" 38b8e80941Smrg#include "broadcom/cle/v3d_packet_v33_pack.h" 39b8e80941Smrg 40b8e80941Smrgstatic struct v3d_compiled_shader * 41b8e80941Smrgv3d_get_compiled_shader(struct v3d_context *v3d, 42b8e80941Smrg struct v3d_key *key, size_t key_size); 43b8e80941Smrgstatic void 44b8e80941Smrgv3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 45b8e80941Smrg struct v3d_key *key); 46b8e80941Smrg 47b8e80941Smrgstatic gl_varying_slot 48b8e80941Smrgv3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) 49b8e80941Smrg{ 50b8e80941Smrg nir_foreach_variable(var, &s->outputs) { 51b8e80941Smrg if (var->data.driver_location == driver_location) { 52b8e80941Smrg return var->data.location; 53b8e80941Smrg } 54b8e80941Smrg } 55b8e80941Smrg 56b8e80941Smrg return -1; 57b8e80941Smrg} 58b8e80941Smrg 59b8e80941Smrg/** 60b8e80941Smrg * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. 61b8e80941Smrg * 62b8e80941Smrg * A shader can have 16 of these specs, and each one of them can write up to 63b8e80941Smrg * 16 dwords. Since we allow a total of 64 transform feedback output 64b8e80941Smrg * components (not 16 vectors), we have to group the writes of multiple 65b8e80941Smrg * varyings together in a single data spec. 66b8e80941Smrg */ 67b8e80941Smrgstatic void 68b8e80941Smrgv3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, 69b8e80941Smrg const struct pipe_stream_output_info *stream_output) 70b8e80941Smrg{ 71b8e80941Smrg if (!stream_output->num_outputs) 72b8e80941Smrg return; 73b8e80941Smrg 74b8e80941Smrg struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; 75b8e80941Smrg int slot_count = 0; 76b8e80941Smrg 77b8e80941Smrg for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { 78b8e80941Smrg uint32_t buffer_offset = 0; 79b8e80941Smrg uint32_t vpm_start = slot_count; 80b8e80941Smrg 81b8e80941Smrg for (int i = 0; i < stream_output->num_outputs; i++) { 82b8e80941Smrg const struct pipe_stream_output *output = 83b8e80941Smrg &stream_output->output[i]; 84b8e80941Smrg 85b8e80941Smrg if (output->output_buffer != buffer) 86b8e80941Smrg continue; 87b8e80941Smrg 88b8e80941Smrg /* We assume that the SO outputs appear in increasing 89b8e80941Smrg * order in the buffer. 90b8e80941Smrg */ 91b8e80941Smrg assert(output->dst_offset >= buffer_offset); 92b8e80941Smrg 93b8e80941Smrg /* Pad any undefined slots in the output */ 94b8e80941Smrg for (int j = buffer_offset; j < output->dst_offset; j++) { 95b8e80941Smrg slots[slot_count] = 96b8e80941Smrg v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); 97b8e80941Smrg slot_count++; 98b8e80941Smrg buffer_offset++; 99b8e80941Smrg } 100b8e80941Smrg 101b8e80941Smrg /* Set the coordinate shader up to output the 102b8e80941Smrg * components of this varying. 103b8e80941Smrg */ 104b8e80941Smrg for (int j = 0; j < output->num_components; j++) { 105b8e80941Smrg gl_varying_slot slot = 106b8e80941Smrg v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index); 107b8e80941Smrg 108b8e80941Smrg slots[slot_count] = 109b8e80941Smrg v3d_slot_from_slot_and_component(slot, 110b8e80941Smrg output->start_component + j); 111b8e80941Smrg slot_count++; 112b8e80941Smrg buffer_offset++; 113b8e80941Smrg } 114b8e80941Smrg } 115b8e80941Smrg 116b8e80941Smrg uint32_t vpm_size = slot_count - vpm_start; 117b8e80941Smrg if (!vpm_size) 118b8e80941Smrg continue; 119b8e80941Smrg 120b8e80941Smrg uint32_t vpm_start_offset = vpm_start + 6; 121b8e80941Smrg 122b8e80941Smrg while (vpm_size) { 123b8e80941Smrg uint32_t write_size = MIN2(vpm_size, 1 << 4); 124b8e80941Smrg 125b8e80941Smrg struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { 126b8e80941Smrg /* We need the offset from the coordinate shader's VPM 127b8e80941Smrg * output block, which has the [X, Y, Z, W, Xs, Ys] 128b8e80941Smrg * values at the start. 129b8e80941Smrg */ 130b8e80941Smrg .first_shaded_vertex_value_to_output = vpm_start_offset, 131b8e80941Smrg .number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size, 132b8e80941Smrg .output_buffer_to_write_to = buffer, 133b8e80941Smrg }; 134b8e80941Smrg 135b8e80941Smrg /* GFXH-1559 */ 136b8e80941Smrg assert(unpacked.first_shaded_vertex_value_to_output != 8 || 137b8e80941Smrg so->num_tf_specs != 0); 138b8e80941Smrg 139b8e80941Smrg assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); 140b8e80941Smrg V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 141b8e80941Smrg (void *)&so->tf_specs[so->num_tf_specs], 142b8e80941Smrg &unpacked); 143b8e80941Smrg 144b8e80941Smrg /* If point size is being written by the shader, then 145b8e80941Smrg * all the VPM start offsets are shifted up by one. 146b8e80941Smrg * We won't know that until the variant is compiled, 147b8e80941Smrg * though. 148b8e80941Smrg */ 149b8e80941Smrg unpacked.first_shaded_vertex_value_to_output++; 150b8e80941Smrg 151b8e80941Smrg /* GFXH-1559 */ 152b8e80941Smrg assert(unpacked.first_shaded_vertex_value_to_output != 8 || 153b8e80941Smrg so->num_tf_specs != 0); 154b8e80941Smrg 155b8e80941Smrg V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 156b8e80941Smrg (void *)&so->tf_specs_psiz[so->num_tf_specs], 157b8e80941Smrg &unpacked); 158b8e80941Smrg so->num_tf_specs++; 159b8e80941Smrg vpm_start_offset += write_size; 160b8e80941Smrg vpm_size -= write_size; 161b8e80941Smrg } 162b8e80941Smrg so->base.stream_output.stride[buffer] = 163b8e80941Smrg stream_output->stride[buffer]; 164b8e80941Smrg } 165b8e80941Smrg 166b8e80941Smrg so->num_tf_outputs = slot_count; 167b8e80941Smrg so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, 168b8e80941Smrg slot_count); 169b8e80941Smrg memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); 170b8e80941Smrg} 171b8e80941Smrg 172b8e80941Smrgstatic int 173b8e80941Smrgtype_size(const struct glsl_type *type, bool bindless) 174b8e80941Smrg{ 175b8e80941Smrg return glsl_count_attribute_slots(type, false); 176b8e80941Smrg} 177b8e80941Smrg 178b8e80941Smrg/** 179b8e80941Smrg * Precompiles a shader variant at shader state creation time if 180b8e80941Smrg * V3D_DEBUG=precompile is set. Used for shader-db 181b8e80941Smrg * (https://gitlab.freedesktop.org/mesa/shader-db) 182b8e80941Smrg */ 183b8e80941Smrgstatic void 184b8e80941Smrgv3d_shader_precompile(struct v3d_context *v3d, 185b8e80941Smrg struct v3d_uncompiled_shader *so) 186b8e80941Smrg{ 187b8e80941Smrg nir_shader *s = so->base.ir.nir; 188b8e80941Smrg 189b8e80941Smrg if (s->info.stage == MESA_SHADER_FRAGMENT) { 190b8e80941Smrg struct v3d_fs_key key = { 191b8e80941Smrg .base.shader_state = so, 192b8e80941Smrg }; 193b8e80941Smrg 194b8e80941Smrg nir_foreach_variable(var, &s->outputs) { 195b8e80941Smrg if (var->data.location == FRAG_RESULT_COLOR) { 196b8e80941Smrg key.cbufs |= 1 << 0; 197b8e80941Smrg } else if (var->data.location >= FRAG_RESULT_DATA0) { 198b8e80941Smrg key.cbufs |= 1 << (var->data.location - 199b8e80941Smrg FRAG_RESULT_DATA0); 200b8e80941Smrg } 201b8e80941Smrg } 202b8e80941Smrg 203b8e80941Smrg v3d_setup_shared_precompile_key(so, &key.base); 204b8e80941Smrg v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 205b8e80941Smrg } else { 206b8e80941Smrg struct v3d_vs_key key = { 207b8e80941Smrg .base.shader_state = so, 208b8e80941Smrg }; 209b8e80941Smrg 210b8e80941Smrg v3d_setup_shared_precompile_key(so, &key.base); 211b8e80941Smrg 212b8e80941Smrg /* Compile VS: All outputs */ 213b8e80941Smrg nir_foreach_variable(var, &s->outputs) { 214b8e80941Smrg unsigned array_len = MAX2(glsl_get_length(var->type), 1); 215b8e80941Smrg assert(array_len == 1); 216b8e80941Smrg (void)array_len; 217b8e80941Smrg 218b8e80941Smrg int slot = var->data.location; 219b8e80941Smrg for (int i = 0; i < glsl_get_components(var->type); i++) { 220b8e80941Smrg int swiz = var->data.location_frac + i; 221b8e80941Smrg key.fs_inputs[key.num_fs_inputs++] = 222b8e80941Smrg v3d_slot_from_slot_and_component(slot, 223b8e80941Smrg swiz); 224b8e80941Smrg } 225b8e80941Smrg } 226b8e80941Smrg 227b8e80941Smrg v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 228b8e80941Smrg 229b8e80941Smrg /* Compile VS bin shader: only position (XXX: include TF) */ 230b8e80941Smrg key.is_coord = true; 231b8e80941Smrg key.num_fs_inputs = 0; 232b8e80941Smrg for (int i = 0; i < 4; i++) { 233b8e80941Smrg key.fs_inputs[key.num_fs_inputs++] = 234b8e80941Smrg v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 235b8e80941Smrg i); 236b8e80941Smrg } 237b8e80941Smrg v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 238b8e80941Smrg } 239b8e80941Smrg} 240b8e80941Smrg 241b8e80941Smrgstatic void * 242b8e80941Smrgv3d_uncompiled_shader_create(struct pipe_context *pctx, 243b8e80941Smrg enum pipe_shader_ir type, void *ir) 244b8e80941Smrg{ 245b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 246b8e80941Smrg struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); 247b8e80941Smrg if (!so) 248b8e80941Smrg return NULL; 249b8e80941Smrg 250b8e80941Smrg so->program_id = v3d->next_uncompiled_program_id++; 251b8e80941Smrg 252b8e80941Smrg nir_shader *s; 253b8e80941Smrg 254b8e80941Smrg if (type == PIPE_SHADER_IR_NIR) { 255b8e80941Smrg /* The backend takes ownership of the NIR shader on state 256b8e80941Smrg * creation. 257b8e80941Smrg */ 258b8e80941Smrg s = ir; 259b8e80941Smrg } else { 260b8e80941Smrg assert(type == PIPE_SHADER_IR_TGSI); 261b8e80941Smrg 262b8e80941Smrg if (V3D_DEBUG & V3D_DEBUG_TGSI) { 263b8e80941Smrg fprintf(stderr, "prog %d TGSI:\n", 264b8e80941Smrg so->program_id); 265b8e80941Smrg tgsi_dump(ir, 0); 266b8e80941Smrg fprintf(stderr, "\n"); 267b8e80941Smrg } 268b8e80941Smrg s = tgsi_to_nir(ir, pctx->screen); 269b8e80941Smrg } 270b8e80941Smrg 271b8e80941Smrg nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform; 272b8e80941Smrg if (s->info.stage == MESA_SHADER_VERTEX) 273b8e80941Smrg lower_mode &= ~(nir_var_shader_in | nir_var_shader_out); 274b8e80941Smrg NIR_PASS_V(s, nir_lower_io, lower_mode, 275b8e80941Smrg type_size, 276b8e80941Smrg (nir_lower_io_options)0); 277b8e80941Smrg 278b8e80941Smrg NIR_PASS_V(s, nir_lower_regs_to_ssa); 279b8e80941Smrg NIR_PASS_V(s, nir_normalize_cubemap_coords); 280b8e80941Smrg 281b8e80941Smrg NIR_PASS_V(s, nir_lower_load_const_to_scalar); 282b8e80941Smrg 283b8e80941Smrg v3d_optimize_nir(s); 284b8e80941Smrg 285b8e80941Smrg NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); 286b8e80941Smrg 287b8e80941Smrg /* Garbage collect dead instructions */ 288b8e80941Smrg nir_sweep(s); 289b8e80941Smrg 290b8e80941Smrg so->base.type = PIPE_SHADER_IR_NIR; 291b8e80941Smrg so->base.ir.nir = s; 292b8e80941Smrg 293b8e80941Smrg if (V3D_DEBUG & (V3D_DEBUG_NIR | 294b8e80941Smrg v3d_debug_flag_for_shader_stage(s->info.stage))) { 295b8e80941Smrg fprintf(stderr, "%s prog %d NIR:\n", 296b8e80941Smrg gl_shader_stage_name(s->info.stage), 297b8e80941Smrg so->program_id); 298b8e80941Smrg nir_print_shader(s, stderr); 299b8e80941Smrg fprintf(stderr, "\n"); 300b8e80941Smrg } 301b8e80941Smrg 302b8e80941Smrg if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE) 303b8e80941Smrg v3d_shader_precompile(v3d, so); 304b8e80941Smrg 305b8e80941Smrg return so; 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrgstatic void 309b8e80941Smrgv3d_shader_debug_output(const char *message, void *data) 310b8e80941Smrg{ 311b8e80941Smrg struct v3d_context *v3d = data; 312b8e80941Smrg 313b8e80941Smrg pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); 314b8e80941Smrg} 315b8e80941Smrg 316b8e80941Smrgstatic void * 317b8e80941Smrgv3d_shader_state_create(struct pipe_context *pctx, 318b8e80941Smrg const struct pipe_shader_state *cso) 319b8e80941Smrg{ 320b8e80941Smrg struct v3d_uncompiled_shader *so = 321b8e80941Smrg v3d_uncompiled_shader_create(pctx, 322b8e80941Smrg cso->type, 323b8e80941Smrg (cso->type == PIPE_SHADER_IR_TGSI ? 324b8e80941Smrg (void *)cso->tokens : 325b8e80941Smrg cso->ir.nir)); 326b8e80941Smrg 327b8e80941Smrg v3d_set_transform_feedback_outputs(so, &cso->stream_output); 328b8e80941Smrg 329b8e80941Smrg return so; 330b8e80941Smrg} 331b8e80941Smrg 332b8e80941Smrgstruct v3d_compiled_shader * 333b8e80941Smrgv3d_get_compiled_shader(struct v3d_context *v3d, 334b8e80941Smrg struct v3d_key *key, 335b8e80941Smrg size_t key_size) 336b8e80941Smrg{ 337b8e80941Smrg struct v3d_uncompiled_shader *shader_state = key->shader_state; 338b8e80941Smrg nir_shader *s = shader_state->base.ir.nir; 339b8e80941Smrg 340b8e80941Smrg struct hash_table *ht = v3d->prog.cache[s->info.stage]; 341b8e80941Smrg struct hash_entry *entry = _mesa_hash_table_search(ht, key); 342b8e80941Smrg if (entry) 343b8e80941Smrg return entry->data; 344b8e80941Smrg 345b8e80941Smrg struct v3d_compiled_shader *shader = 346b8e80941Smrg rzalloc(NULL, struct v3d_compiled_shader); 347b8e80941Smrg 348b8e80941Smrg int program_id = shader_state->program_id; 349b8e80941Smrg int variant_id = 350b8e80941Smrg p_atomic_inc_return(&shader_state->compiled_variant_count); 351b8e80941Smrg uint64_t *qpu_insts; 352b8e80941Smrg uint32_t shader_size; 353b8e80941Smrg 354b8e80941Smrg qpu_insts = v3d_compile(v3d->screen->compiler, key, 355b8e80941Smrg &shader->prog_data.base, s, 356b8e80941Smrg v3d_shader_debug_output, 357b8e80941Smrg v3d, 358b8e80941Smrg program_id, variant_id, &shader_size); 359b8e80941Smrg ralloc_steal(shader, shader->prog_data.base); 360b8e80941Smrg 361b8e80941Smrg v3d_set_shader_uniform_dirty_flags(shader); 362b8e80941Smrg 363b8e80941Smrg if (shader_size) { 364b8e80941Smrg u_upload_data(v3d->state_uploader, 0, shader_size, 8, 365b8e80941Smrg qpu_insts, &shader->offset, &shader->resource); 366b8e80941Smrg } 367b8e80941Smrg 368b8e80941Smrg free(qpu_insts); 369b8e80941Smrg 370b8e80941Smrg if (ht) { 371b8e80941Smrg struct v3d_key *dup_key; 372b8e80941Smrg dup_key = ralloc_size(shader, key_size); 373b8e80941Smrg memcpy(dup_key, key, key_size); 374b8e80941Smrg _mesa_hash_table_insert(ht, dup_key, shader); 375b8e80941Smrg } 376b8e80941Smrg 377b8e80941Smrg if (shader->prog_data.base->spill_size > 378b8e80941Smrg v3d->prog.spill_size_per_thread) { 379b8e80941Smrg /* The TIDX register we use for choosing the area to access 380b8e80941Smrg * for scratch space is: (core << 6) | (qpu << 2) | thread. 381b8e80941Smrg * Even at minimum threadcount in a particular shader, that 382b8e80941Smrg * means we still multiply by qpus by 4. 383b8e80941Smrg */ 384b8e80941Smrg int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * 385b8e80941Smrg shader->prog_data.base->spill_size); 386b8e80941Smrg 387b8e80941Smrg v3d_bo_unreference(&v3d->prog.spill_bo); 388b8e80941Smrg v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, 389b8e80941Smrg total_spill_size, "spill"); 390b8e80941Smrg v3d->prog.spill_size_per_thread = 391b8e80941Smrg shader->prog_data.base->spill_size; 392b8e80941Smrg } 393b8e80941Smrg 394b8e80941Smrg return shader; 395b8e80941Smrg} 396b8e80941Smrg 397b8e80941Smrgstatic void 398b8e80941Smrgv3d_free_compiled_shader(struct v3d_compiled_shader *shader) 399b8e80941Smrg{ 400b8e80941Smrg pipe_resource_reference(&shader->resource, NULL); 401b8e80941Smrg ralloc_free(shader); 402b8e80941Smrg} 403b8e80941Smrg 404b8e80941Smrgstatic void 405b8e80941Smrgv3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, 406b8e80941Smrg struct v3d_texture_stateobj *texstate) 407b8e80941Smrg{ 408b8e80941Smrg const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 409b8e80941Smrg 410b8e80941Smrg for (int i = 0; i < texstate->num_textures; i++) { 411b8e80941Smrg struct pipe_sampler_view *sampler = texstate->textures[i]; 412b8e80941Smrg struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); 413b8e80941Smrg struct pipe_sampler_state *sampler_state = 414b8e80941Smrg texstate->samplers[i]; 415b8e80941Smrg 416b8e80941Smrg if (!sampler) 417b8e80941Smrg continue; 418b8e80941Smrg 419b8e80941Smrg key->tex[i].return_size = 420b8e80941Smrg v3d_get_tex_return_size(devinfo, 421b8e80941Smrg sampler->format, 422b8e80941Smrg sampler_state->compare_mode); 423b8e80941Smrg 424b8e80941Smrg /* For 16-bit, we set up the sampler to always return 2 425b8e80941Smrg * channels (meaning no recompiles for most statechanges), 426b8e80941Smrg * while for 32 we actually scale the returns with channels. 427b8e80941Smrg */ 428b8e80941Smrg if (key->tex[i].return_size == 16) { 429b8e80941Smrg key->tex[i].return_channels = 2; 430b8e80941Smrg } else if (devinfo->ver > 40) { 431b8e80941Smrg key->tex[i].return_channels = 4; 432b8e80941Smrg } else { 433b8e80941Smrg key->tex[i].return_channels = 434b8e80941Smrg v3d_get_tex_return_channels(devinfo, 435b8e80941Smrg sampler->format); 436b8e80941Smrg } 437b8e80941Smrg 438b8e80941Smrg if (key->tex[i].return_size == 32 && devinfo->ver < 40) { 439b8e80941Smrg memcpy(key->tex[i].swizzle, 440b8e80941Smrg v3d_sampler->swizzle, 441b8e80941Smrg sizeof(v3d_sampler->swizzle)); 442b8e80941Smrg } else { 443b8e80941Smrg /* For 16-bit returns, we let the sampler state handle 444b8e80941Smrg * the swizzle. 445b8e80941Smrg */ 446b8e80941Smrg key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 447b8e80941Smrg key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 448b8e80941Smrg key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 449b8e80941Smrg key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 450b8e80941Smrg } 451b8e80941Smrg 452b8e80941Smrg if (sampler) { 453b8e80941Smrg key->tex[i].clamp_s = 454b8e80941Smrg sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; 455b8e80941Smrg key->tex[i].clamp_t = 456b8e80941Smrg sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP; 457b8e80941Smrg key->tex[i].clamp_r = 458b8e80941Smrg sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; 459b8e80941Smrg } 460b8e80941Smrg } 461b8e80941Smrg} 462b8e80941Smrg 463b8e80941Smrgstatic void 464b8e80941Smrgv3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 465b8e80941Smrg struct v3d_key *key) 466b8e80941Smrg{ 467b8e80941Smrg nir_shader *s = uncompiled->base.ir.nir; 468b8e80941Smrg 469b8e80941Smrg for (int i = 0; i < s->info.num_textures; i++) { 470b8e80941Smrg key->tex[i].return_size = 16; 471b8e80941Smrg key->tex[i].return_channels = 2; 472b8e80941Smrg 473b8e80941Smrg key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 474b8e80941Smrg key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 475b8e80941Smrg key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 476b8e80941Smrg key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 477b8e80941Smrg } 478b8e80941Smrg} 479b8e80941Smrg 480b8e80941Smrgstatic void 481b8e80941Smrgv3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) 482b8e80941Smrg{ 483b8e80941Smrg struct v3d_job *job = v3d->job; 484b8e80941Smrg struct v3d_fs_key local_key; 485b8e80941Smrg struct v3d_fs_key *key = &local_key; 486b8e80941Smrg nir_shader *s = v3d->prog.bind_fs->base.ir.nir; 487b8e80941Smrg 488b8e80941Smrg if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | 489b8e80941Smrg VC5_DIRTY_BLEND | 490b8e80941Smrg VC5_DIRTY_FRAMEBUFFER | 491b8e80941Smrg VC5_DIRTY_ZSA | 492b8e80941Smrg VC5_DIRTY_RASTERIZER | 493b8e80941Smrg VC5_DIRTY_SAMPLE_STATE | 494b8e80941Smrg VC5_DIRTY_FRAGTEX | 495b8e80941Smrg VC5_DIRTY_UNCOMPILED_FS))) { 496b8e80941Smrg return; 497b8e80941Smrg } 498b8e80941Smrg 499b8e80941Smrg memset(key, 0, sizeof(*key)); 500b8e80941Smrg v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); 501b8e80941Smrg key->base.shader_state = v3d->prog.bind_fs; 502b8e80941Smrg key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 503b8e80941Smrg key->is_points = (prim_mode == PIPE_PRIM_POINTS); 504b8e80941Smrg key->is_lines = (prim_mode >= PIPE_PRIM_LINES && 505b8e80941Smrg prim_mode <= PIPE_PRIM_LINE_STRIP); 506b8e80941Smrg key->clamp_color = v3d->rasterizer->base.clamp_fragment_color; 507b8e80941Smrg if (v3d->blend->base.logicop_enable) { 508b8e80941Smrg key->logicop_func = v3d->blend->base.logicop_func; 509b8e80941Smrg } else { 510b8e80941Smrg key->logicop_func = PIPE_LOGICOP_COPY; 511b8e80941Smrg } 512b8e80941Smrg if (job->msaa) { 513b8e80941Smrg key->msaa = v3d->rasterizer->base.multisample; 514b8e80941Smrg key->sample_coverage = (v3d->rasterizer->base.multisample && 515b8e80941Smrg v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); 516b8e80941Smrg key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; 517b8e80941Smrg key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; 518b8e80941Smrg } 519b8e80941Smrg 520b8e80941Smrg key->depth_enabled = (v3d->zsa->base.depth.enabled || 521b8e80941Smrg v3d->zsa->base.stencil[0].enabled); 522b8e80941Smrg if (v3d->zsa->base.alpha.enabled) { 523b8e80941Smrg key->alpha_test = true; 524b8e80941Smrg key->alpha_test_func = v3d->zsa->base.alpha.func; 525b8e80941Smrg } 526b8e80941Smrg 527b8e80941Smrg key->swap_color_rb = v3d->swap_color_rb; 528b8e80941Smrg 529b8e80941Smrg for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { 530b8e80941Smrg struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; 531b8e80941Smrg if (!cbuf) 532b8e80941Smrg continue; 533b8e80941Smrg 534b8e80941Smrg /* gl_FragColor's propagation to however many bound color 535b8e80941Smrg * buffers there are means that the shader compile needs to 536b8e80941Smrg * know what buffers are present. 537b8e80941Smrg */ 538b8e80941Smrg key->cbufs |= 1 << i; 539b8e80941Smrg 540b8e80941Smrg const struct util_format_description *desc = 541b8e80941Smrg util_format_description(cbuf->format); 542b8e80941Smrg 543b8e80941Smrg if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 544b8e80941Smrg desc->channel[0].size == 32) { 545b8e80941Smrg key->f32_color_rb |= 1 << i; 546b8e80941Smrg } 547b8e80941Smrg 548b8e80941Smrg if (s->info.fs.untyped_color_outputs) { 549b8e80941Smrg if (util_format_is_pure_uint(cbuf->format)) 550b8e80941Smrg key->uint_color_rb |= 1 << i; 551b8e80941Smrg else if (util_format_is_pure_sint(cbuf->format)) 552b8e80941Smrg key->int_color_rb |= 1 << i; 553b8e80941Smrg } 554b8e80941Smrg } 555b8e80941Smrg 556b8e80941Smrg if (key->is_points) { 557b8e80941Smrg key->point_sprite_mask = 558b8e80941Smrg v3d->rasterizer->base.sprite_coord_enable; 559b8e80941Smrg key->point_coord_upper_left = 560b8e80941Smrg (v3d->rasterizer->base.sprite_coord_mode == 561b8e80941Smrg PIPE_SPRITE_COORD_UPPER_LEFT); 562b8e80941Smrg } 563b8e80941Smrg 564b8e80941Smrg key->light_twoside = v3d->rasterizer->base.light_twoside; 565b8e80941Smrg key->shade_model_flat = v3d->rasterizer->base.flatshade; 566b8e80941Smrg 567b8e80941Smrg struct v3d_compiled_shader *old_fs = v3d->prog.fs; 568b8e80941Smrg v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 569b8e80941Smrg if (v3d->prog.fs == old_fs) 570b8e80941Smrg return; 571b8e80941Smrg 572b8e80941Smrg v3d->dirty |= VC5_DIRTY_COMPILED_FS; 573b8e80941Smrg 574b8e80941Smrg if (old_fs) { 575b8e80941Smrg if (v3d->prog.fs->prog_data.fs->flat_shade_flags != 576b8e80941Smrg old_fs->prog_data.fs->flat_shade_flags) { 577b8e80941Smrg v3d->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; 578b8e80941Smrg } 579b8e80941Smrg 580b8e80941Smrg if (v3d->prog.fs->prog_data.fs->noperspective_flags != 581b8e80941Smrg old_fs->prog_data.fs->noperspective_flags) { 582b8e80941Smrg v3d->dirty |= VC5_DIRTY_NOPERSPECTIVE_FLAGS; 583b8e80941Smrg } 584b8e80941Smrg 585b8e80941Smrg if (v3d->prog.fs->prog_data.fs->centroid_flags != 586b8e80941Smrg old_fs->prog_data.fs->centroid_flags) { 587b8e80941Smrg v3d->dirty |= VC5_DIRTY_CENTROID_FLAGS; 588b8e80941Smrg } 589b8e80941Smrg } 590b8e80941Smrg 591b8e80941Smrg if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots, 592b8e80941Smrg old_fs->prog_data.fs->input_slots, 593b8e80941Smrg sizeof(v3d->prog.fs->prog_data.fs->input_slots))) { 594b8e80941Smrg v3d->dirty |= VC5_DIRTY_FS_INPUTS; 595b8e80941Smrg } 596b8e80941Smrg} 597b8e80941Smrg 598b8e80941Smrgstatic void 599b8e80941Smrgv3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) 600b8e80941Smrg{ 601b8e80941Smrg struct v3d_vs_key local_key; 602b8e80941Smrg struct v3d_vs_key *key = &local_key; 603b8e80941Smrg 604b8e80941Smrg if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | 605b8e80941Smrg VC5_DIRTY_RASTERIZER | 606b8e80941Smrg VC5_DIRTY_VERTTEX | 607b8e80941Smrg VC5_DIRTY_VTXSTATE | 608b8e80941Smrg VC5_DIRTY_UNCOMPILED_VS | 609b8e80941Smrg VC5_DIRTY_FS_INPUTS))) { 610b8e80941Smrg return; 611b8e80941Smrg } 612b8e80941Smrg 613b8e80941Smrg memset(key, 0, sizeof(*key)); 614b8e80941Smrg v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); 615b8e80941Smrg key->base.shader_state = v3d->prog.bind_vs; 616b8e80941Smrg key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 617b8e80941Smrg key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs; 618b8e80941Smrg STATIC_ASSERT(sizeof(key->fs_inputs) == 619b8e80941Smrg sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 620b8e80941Smrg memcpy(key->fs_inputs, v3d->prog.fs->prog_data.fs->input_slots, 621b8e80941Smrg sizeof(key->fs_inputs)); 622b8e80941Smrg key->clamp_color = v3d->rasterizer->base.clamp_vertex_color; 623b8e80941Smrg 624b8e80941Smrg key->per_vertex_point_size = 625b8e80941Smrg (prim_mode == PIPE_PRIM_POINTS && 626b8e80941Smrg v3d->rasterizer->base.point_size_per_vertex); 627b8e80941Smrg 628b8e80941Smrg struct v3d_compiled_shader *vs = 629b8e80941Smrg v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 630b8e80941Smrg if (vs != v3d->prog.vs) { 631b8e80941Smrg v3d->prog.vs = vs; 632b8e80941Smrg v3d->dirty |= VC5_DIRTY_COMPILED_VS; 633b8e80941Smrg } 634b8e80941Smrg 635b8e80941Smrg key->is_coord = true; 636b8e80941Smrg /* Coord shaders only output varyings used by transform feedback. */ 637b8e80941Smrg struct v3d_uncompiled_shader *shader_state = key->base.shader_state; 638b8e80941Smrg memcpy(key->fs_inputs, shader_state->tf_outputs, 639b8e80941Smrg sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); 640b8e80941Smrg if (shader_state->num_tf_outputs < key->num_fs_inputs) { 641b8e80941Smrg memset(&key->fs_inputs[shader_state->num_tf_outputs], 642b8e80941Smrg 0, 643b8e80941Smrg sizeof(*key->fs_inputs) * (key->num_fs_inputs - 644b8e80941Smrg shader_state->num_tf_outputs)); 645b8e80941Smrg } 646b8e80941Smrg key->num_fs_inputs = shader_state->num_tf_outputs; 647b8e80941Smrg 648b8e80941Smrg struct v3d_compiled_shader *cs = 649b8e80941Smrg v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 650b8e80941Smrg if (cs != v3d->prog.cs) { 651b8e80941Smrg v3d->prog.cs = cs; 652b8e80941Smrg v3d->dirty |= VC5_DIRTY_COMPILED_CS; 653b8e80941Smrg } 654b8e80941Smrg} 655b8e80941Smrg 656b8e80941Smrgvoid 657b8e80941Smrgv3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) 658b8e80941Smrg{ 659b8e80941Smrg v3d_update_compiled_fs(v3d, prim_mode); 660b8e80941Smrg v3d_update_compiled_vs(v3d, prim_mode); 661b8e80941Smrg} 662b8e80941Smrg 663b8e80941Smrgvoid 664b8e80941Smrgv3d_update_compiled_cs(struct v3d_context *v3d) 665b8e80941Smrg{ 666b8e80941Smrg struct v3d_key local_key; 667b8e80941Smrg struct v3d_key *key = &local_key; 668b8e80941Smrg 669b8e80941Smrg if (!(v3d->dirty & (~0 | /* XXX */ 670b8e80941Smrg VC5_DIRTY_VERTTEX | 671b8e80941Smrg VC5_DIRTY_UNCOMPILED_FS))) { 672b8e80941Smrg return; 673b8e80941Smrg } 674b8e80941Smrg 675b8e80941Smrg memset(key, 0, sizeof(*key)); 676b8e80941Smrg v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); 677b8e80941Smrg key->shader_state = v3d->prog.bind_compute; 678b8e80941Smrg 679b8e80941Smrg struct v3d_compiled_shader *cs = 680b8e80941Smrg v3d_get_compiled_shader(v3d, key, sizeof(*key)); 681b8e80941Smrg if (cs != v3d->prog.compute) { 682b8e80941Smrg v3d->prog.compute = cs; 683b8e80941Smrg v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */ 684b8e80941Smrg } 685b8e80941Smrg} 686b8e80941Smrg 687b8e80941Smrgstatic uint32_t 688b8e80941Smrgfs_cache_hash(const void *key) 689b8e80941Smrg{ 690b8e80941Smrg return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); 691b8e80941Smrg} 692b8e80941Smrg 693b8e80941Smrgstatic uint32_t 694b8e80941Smrgvs_cache_hash(const void *key) 695b8e80941Smrg{ 696b8e80941Smrg return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); 697b8e80941Smrg} 698b8e80941Smrg 699b8e80941Smrgstatic uint32_t 700b8e80941Smrgcs_cache_hash(const void *key) 701b8e80941Smrg{ 702b8e80941Smrg return _mesa_hash_data(key, sizeof(struct v3d_key)); 703b8e80941Smrg} 704b8e80941Smrg 705b8e80941Smrgstatic bool 706b8e80941Smrgfs_cache_compare(const void *key1, const void *key2) 707b8e80941Smrg{ 708b8e80941Smrg return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; 709b8e80941Smrg} 710b8e80941Smrg 711b8e80941Smrgstatic bool 712b8e80941Smrgvs_cache_compare(const void *key1, const void *key2) 713b8e80941Smrg{ 714b8e80941Smrg return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; 715b8e80941Smrg} 716b8e80941Smrg 717b8e80941Smrgstatic bool 718b8e80941Smrgcs_cache_compare(const void *key1, const void *key2) 719b8e80941Smrg{ 720b8e80941Smrg return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; 721b8e80941Smrg} 722b8e80941Smrg 723b8e80941Smrgstatic void 724b8e80941Smrgv3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) 725b8e80941Smrg{ 726b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 727b8e80941Smrg struct v3d_uncompiled_shader *so = hwcso; 728b8e80941Smrg nir_shader *s = so->base.ir.nir; 729b8e80941Smrg 730b8e80941Smrg hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { 731b8e80941Smrg const struct v3d_key *key = entry->key; 732b8e80941Smrg struct v3d_compiled_shader *shader = entry->data; 733b8e80941Smrg 734b8e80941Smrg if (key->shader_state != so) 735b8e80941Smrg continue; 736b8e80941Smrg 737b8e80941Smrg if (v3d->prog.fs == shader) 738b8e80941Smrg v3d->prog.fs = NULL; 739b8e80941Smrg if (v3d->prog.vs == shader) 740b8e80941Smrg v3d->prog.vs = NULL; 741b8e80941Smrg if (v3d->prog.cs == shader) 742b8e80941Smrg v3d->prog.cs = NULL; 743b8e80941Smrg if (v3d->prog.compute == shader) 744b8e80941Smrg v3d->prog.compute = NULL; 745b8e80941Smrg 746b8e80941Smrg _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); 747b8e80941Smrg v3d_free_compiled_shader(shader); 748b8e80941Smrg } 749b8e80941Smrg 750b8e80941Smrg ralloc_free(so->base.ir.nir); 751b8e80941Smrg free(so); 752b8e80941Smrg} 753b8e80941Smrg 754b8e80941Smrgstatic void 755b8e80941Smrgv3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) 756b8e80941Smrg{ 757b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 758b8e80941Smrg v3d->prog.bind_fs = hwcso; 759b8e80941Smrg v3d->dirty |= VC5_DIRTY_UNCOMPILED_FS; 760b8e80941Smrg} 761b8e80941Smrg 762b8e80941Smrgstatic void 763b8e80941Smrgv3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) 764b8e80941Smrg{ 765b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 766b8e80941Smrg v3d->prog.bind_vs = hwcso; 767b8e80941Smrg v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS; 768b8e80941Smrg} 769b8e80941Smrg 770b8e80941Smrgstatic void 771b8e80941Smrgv3d_compute_state_bind(struct pipe_context *pctx, void *state) 772b8e80941Smrg{ 773b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 774b8e80941Smrg 775b8e80941Smrg v3d->prog.bind_compute = state; 776b8e80941Smrg} 777b8e80941Smrg 778b8e80941Smrgstatic void * 779b8e80941Smrgv3d_create_compute_state(struct pipe_context *pctx, 780b8e80941Smrg const struct pipe_compute_state *cso) 781b8e80941Smrg{ 782b8e80941Smrg return v3d_uncompiled_shader_create(pctx, cso->ir_type, 783b8e80941Smrg (void *)cso->prog); 784b8e80941Smrg} 785b8e80941Smrg 786b8e80941Smrgvoid 787b8e80941Smrgv3d_program_init(struct pipe_context *pctx) 788b8e80941Smrg{ 789b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 790b8e80941Smrg 791b8e80941Smrg pctx->create_vs_state = v3d_shader_state_create; 792b8e80941Smrg pctx->delete_vs_state = v3d_shader_state_delete; 793b8e80941Smrg 794b8e80941Smrg pctx->create_fs_state = v3d_shader_state_create; 795b8e80941Smrg pctx->delete_fs_state = v3d_shader_state_delete; 796b8e80941Smrg 797b8e80941Smrg pctx->bind_fs_state = v3d_fp_state_bind; 798b8e80941Smrg pctx->bind_vs_state = v3d_vp_state_bind; 799b8e80941Smrg 800b8e80941Smrg if (v3d->screen->has_csd) { 801b8e80941Smrg pctx->create_compute_state = v3d_create_compute_state; 802b8e80941Smrg pctx->delete_compute_state = v3d_shader_state_delete; 803b8e80941Smrg pctx->bind_compute_state = v3d_compute_state_bind; 804b8e80941Smrg } 805b8e80941Smrg 806b8e80941Smrg v3d->prog.cache[MESA_SHADER_VERTEX] = 807b8e80941Smrg _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); 808b8e80941Smrg v3d->prog.cache[MESA_SHADER_FRAGMENT] = 809b8e80941Smrg _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); 810b8e80941Smrg v3d->prog.cache[MESA_SHADER_COMPUTE] = 811b8e80941Smrg _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); 812b8e80941Smrg} 813b8e80941Smrg 814b8e80941Smrgvoid 815b8e80941Smrgv3d_program_fini(struct pipe_context *pctx) 816b8e80941Smrg{ 817b8e80941Smrg struct v3d_context *v3d = v3d_context(pctx); 818b8e80941Smrg 819b8e80941Smrg for (int i = 0; i < MESA_SHADER_STAGES; i++) { 820b8e80941Smrg struct hash_table *cache = v3d->prog.cache[i]; 821b8e80941Smrg if (!cache) 822b8e80941Smrg continue; 823b8e80941Smrg 824b8e80941Smrg hash_table_foreach(cache, entry) { 825b8e80941Smrg struct v3d_compiled_shader *shader = entry->data; 826b8e80941Smrg v3d_free_compiled_shader(shader); 827b8e80941Smrg _mesa_hash_table_remove(cache, entry); 828b8e80941Smrg } 829b8e80941Smrg } 830b8e80941Smrg 831b8e80941Smrg v3d_bo_unreference(&v3d->prog.spill_bo); 832b8e80941Smrg} 833