1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <inttypes.h> 25#include "util/u_format.h" 26#include "util/u_math.h" 27#include "util/u_memory.h" 28#include "util/ralloc.h" 29#include "util/hash_table.h" 30#include "util/u_upload_mgr.h" 31#include "tgsi/tgsi_dump.h" 32#include "tgsi/tgsi_parse.h" 33#include "compiler/nir/nir.h" 34#include "compiler/nir/nir_builder.h" 35#include "nir/tgsi_to_nir.h" 36#include "compiler/v3d_compiler.h" 37#include "v3d_context.h" 38#include "broadcom/cle/v3d_packet_v33_pack.h" 39 40static struct v3d_compiled_shader * 41v3d_get_compiled_shader(struct v3d_context *v3d, 42 struct v3d_key *key, size_t key_size); 43static void 44v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 45 struct v3d_key *key); 46 47static gl_varying_slot 48v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) 49{ 50 nir_foreach_variable(var, &s->outputs) { 51 if (var->data.driver_location == driver_location) { 52 return var->data.location; 53 } 54 } 55 56 return -1; 57} 58 59/** 60 * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. 61 * 62 * A shader can have 16 of these specs, and each one of them can write up to 63 * 16 dwords. Since we allow a total of 64 transform feedback output 64 * components (not 16 vectors), we have to group the writes of multiple 65 * varyings together in a single data spec. 66 */ 67static void 68v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, 69 const struct pipe_stream_output_info *stream_output) 70{ 71 if (!stream_output->num_outputs) 72 return; 73 74 struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; 75 int slot_count = 0; 76 77 for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { 78 uint32_t buffer_offset = 0; 79 uint32_t vpm_start = slot_count; 80 81 for (int i = 0; i < stream_output->num_outputs; i++) { 82 const struct pipe_stream_output *output = 83 &stream_output->output[i]; 84 85 if (output->output_buffer != buffer) 86 continue; 87 88 /* We assume that the SO outputs appear in increasing 89 * order in the buffer. 90 */ 91 assert(output->dst_offset >= buffer_offset); 92 93 /* Pad any undefined slots in the output */ 94 for (int j = buffer_offset; j < output->dst_offset; j++) { 95 slots[slot_count] = 96 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); 97 slot_count++; 98 buffer_offset++; 99 } 100 101 /* Set the coordinate shader up to output the 102 * components of this varying. 103 */ 104 for (int j = 0; j < output->num_components; j++) { 105 gl_varying_slot slot = 106 v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index); 107 108 slots[slot_count] = 109 v3d_slot_from_slot_and_component(slot, 110 output->start_component + j); 111 slot_count++; 112 buffer_offset++; 113 } 114 } 115 116 uint32_t vpm_size = slot_count - vpm_start; 117 if (!vpm_size) 118 continue; 119 120 uint32_t vpm_start_offset = vpm_start + 6; 121 122 while (vpm_size) { 123 uint32_t write_size = MIN2(vpm_size, 1 << 4); 124 125 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { 126 /* We need the offset from the coordinate shader's VPM 127 * output block, which has the [X, Y, Z, W, Xs, Ys] 128 * values at the start. 129 */ 130 .first_shaded_vertex_value_to_output = vpm_start_offset, 131 .number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size, 132 .output_buffer_to_write_to = buffer, 133 }; 134 135 /* GFXH-1559 */ 136 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 137 so->num_tf_specs != 0); 138 139 assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); 140 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 141 (void *)&so->tf_specs[so->num_tf_specs], 142 &unpacked); 143 144 /* If point size is being written by the shader, then 145 * all the VPM start offsets are shifted up by one. 146 * We won't know that until the variant is compiled, 147 * though. 148 */ 149 unpacked.first_shaded_vertex_value_to_output++; 150 151 /* GFXH-1559 */ 152 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 153 so->num_tf_specs != 0); 154 155 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 156 (void *)&so->tf_specs_psiz[so->num_tf_specs], 157 &unpacked); 158 so->num_tf_specs++; 159 vpm_start_offset += write_size; 160 vpm_size -= write_size; 161 } 162 so->base.stream_output.stride[buffer] = 163 stream_output->stride[buffer]; 164 } 165 166 so->num_tf_outputs = slot_count; 167 so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, 168 slot_count); 169 memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); 170} 171 172static int 173type_size(const struct glsl_type *type, bool bindless) 174{ 175 return glsl_count_attribute_slots(type, false); 176} 177 178/** 179 * Precompiles a shader variant at shader state creation time if 180 * V3D_DEBUG=precompile is set. Used for shader-db 181 * (https://gitlab.freedesktop.org/mesa/shader-db) 182 */ 183static void 184v3d_shader_precompile(struct v3d_context *v3d, 185 struct v3d_uncompiled_shader *so) 186{ 187 nir_shader *s = so->base.ir.nir; 188 189 if (s->info.stage == MESA_SHADER_FRAGMENT) { 190 struct v3d_fs_key key = { 191 .base.shader_state = so, 192 }; 193 194 nir_foreach_variable(var, &s->outputs) { 195 if (var->data.location == FRAG_RESULT_COLOR) { 196 key.cbufs |= 1 << 0; 197 } else if (var->data.location >= FRAG_RESULT_DATA0) { 198 key.cbufs |= 1 << (var->data.location - 199 FRAG_RESULT_DATA0); 200 } 201 } 202 203 v3d_setup_shared_precompile_key(so, &key.base); 204 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 205 } else { 206 struct v3d_vs_key key = { 207 .base.shader_state = so, 208 }; 209 210 v3d_setup_shared_precompile_key(so, &key.base); 211 212 /* Compile VS: All outputs */ 213 nir_foreach_variable(var, &s->outputs) { 214 unsigned array_len = MAX2(glsl_get_length(var->type), 1); 215 assert(array_len == 1); 216 (void)array_len; 217 218 int slot = var->data.location; 219 for (int i = 0; i < glsl_get_components(var->type); i++) { 220 int swiz = var->data.location_frac + i; 221 key.fs_inputs[key.num_fs_inputs++] = 222 v3d_slot_from_slot_and_component(slot, 223 swiz); 224 } 225 } 226 227 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 228 229 /* Compile VS bin shader: only position (XXX: include TF) */ 230 key.is_coord = true; 231 key.num_fs_inputs = 0; 232 for (int i = 0; i < 4; i++) { 233 key.fs_inputs[key.num_fs_inputs++] = 234 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 235 i); 236 } 237 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 238 } 239} 240 241static void * 242v3d_uncompiled_shader_create(struct pipe_context *pctx, 243 enum pipe_shader_ir type, void *ir) 244{ 245 struct v3d_context *v3d = v3d_context(pctx); 246 struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); 247 if (!so) 248 return NULL; 249 250 so->program_id = v3d->next_uncompiled_program_id++; 251 252 nir_shader *s; 253 254 if (type == PIPE_SHADER_IR_NIR) { 255 /* The backend takes ownership of the NIR shader on state 256 * creation. 257 */ 258 s = ir; 259 } else { 260 assert(type == PIPE_SHADER_IR_TGSI); 261 262 if (V3D_DEBUG & V3D_DEBUG_TGSI) { 263 fprintf(stderr, "prog %d TGSI:\n", 264 so->program_id); 265 tgsi_dump(ir, 0); 266 fprintf(stderr, "\n"); 267 } 268 s = tgsi_to_nir(ir, pctx->screen); 269 } 270 271 nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform; 272 if (s->info.stage == MESA_SHADER_VERTEX) 273 lower_mode &= ~(nir_var_shader_in | nir_var_shader_out); 274 NIR_PASS_V(s, nir_lower_io, lower_mode, 275 type_size, 276 (nir_lower_io_options)0); 277 278 NIR_PASS_V(s, nir_lower_regs_to_ssa); 279 NIR_PASS_V(s, nir_normalize_cubemap_coords); 280 281 NIR_PASS_V(s, nir_lower_load_const_to_scalar); 282 283 v3d_optimize_nir(s); 284 285 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); 286 287 /* Garbage collect dead instructions */ 288 nir_sweep(s); 289 290 so->base.type = PIPE_SHADER_IR_NIR; 291 so->base.ir.nir = s; 292 293 if (V3D_DEBUG & (V3D_DEBUG_NIR | 294 v3d_debug_flag_for_shader_stage(s->info.stage))) { 295 fprintf(stderr, "%s prog %d NIR:\n", 296 gl_shader_stage_name(s->info.stage), 297 so->program_id); 298 nir_print_shader(s, stderr); 299 fprintf(stderr, "\n"); 300 } 301 302 if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE) 303 v3d_shader_precompile(v3d, so); 304 305 return so; 306} 307 308static void 309v3d_shader_debug_output(const char *message, void *data) 310{ 311 struct v3d_context *v3d = data; 312 313 pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); 314} 315 316static void * 317v3d_shader_state_create(struct pipe_context *pctx, 318 const struct pipe_shader_state *cso) 319{ 320 struct v3d_uncompiled_shader *so = 321 v3d_uncompiled_shader_create(pctx, 322 cso->type, 323 (cso->type == PIPE_SHADER_IR_TGSI ? 324 (void *)cso->tokens : 325 cso->ir.nir)); 326 327 v3d_set_transform_feedback_outputs(so, &cso->stream_output); 328 329 return so; 330} 331 332struct v3d_compiled_shader * 333v3d_get_compiled_shader(struct v3d_context *v3d, 334 struct v3d_key *key, 335 size_t key_size) 336{ 337 struct v3d_uncompiled_shader *shader_state = key->shader_state; 338 nir_shader *s = shader_state->base.ir.nir; 339 340 struct hash_table *ht = v3d->prog.cache[s->info.stage]; 341 struct hash_entry *entry = _mesa_hash_table_search(ht, key); 342 if (entry) 343 return entry->data; 344 345 struct v3d_compiled_shader *shader = 346 rzalloc(NULL, struct v3d_compiled_shader); 347 348 int program_id = shader_state->program_id; 349 int variant_id = 350 p_atomic_inc_return(&shader_state->compiled_variant_count); 351 uint64_t *qpu_insts; 352 uint32_t shader_size; 353 354 qpu_insts = v3d_compile(v3d->screen->compiler, key, 355 &shader->prog_data.base, s, 356 v3d_shader_debug_output, 357 v3d, 358 program_id, variant_id, &shader_size); 359 ralloc_steal(shader, shader->prog_data.base); 360 361 v3d_set_shader_uniform_dirty_flags(shader); 362 363 if (shader_size) { 364 u_upload_data(v3d->state_uploader, 0, shader_size, 8, 365 qpu_insts, &shader->offset, &shader->resource); 366 } 367 368 free(qpu_insts); 369 370 if (ht) { 371 struct v3d_key *dup_key; 372 dup_key = ralloc_size(shader, key_size); 373 memcpy(dup_key, key, key_size); 374 _mesa_hash_table_insert(ht, dup_key, shader); 375 } 376 377 if (shader->prog_data.base->spill_size > 378 v3d->prog.spill_size_per_thread) { 379 /* The TIDX register we use for choosing the area to access 380 * for scratch space is: (core << 6) | (qpu << 2) | thread. 381 * Even at minimum threadcount in a particular shader, that 382 * means we still multiply by qpus by 4. 383 */ 384 int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * 385 shader->prog_data.base->spill_size); 386 387 v3d_bo_unreference(&v3d->prog.spill_bo); 388 v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, 389 total_spill_size, "spill"); 390 v3d->prog.spill_size_per_thread = 391 shader->prog_data.base->spill_size; 392 } 393 394 return shader; 395} 396 397static void 398v3d_free_compiled_shader(struct v3d_compiled_shader *shader) 399{ 400 pipe_resource_reference(&shader->resource, NULL); 401 ralloc_free(shader); 402} 403 404static void 405v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, 406 struct v3d_texture_stateobj *texstate) 407{ 408 const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 409 410 for (int i = 0; i < texstate->num_textures; i++) { 411 struct pipe_sampler_view *sampler = texstate->textures[i]; 412 struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); 413 struct pipe_sampler_state *sampler_state = 414 texstate->samplers[i]; 415 416 if (!sampler) 417 continue; 418 419 key->tex[i].return_size = 420 v3d_get_tex_return_size(devinfo, 421 sampler->format, 422 sampler_state->compare_mode); 423 424 /* For 16-bit, we set up the sampler to always return 2 425 * channels (meaning no recompiles for most statechanges), 426 * while for 32 we actually scale the returns with channels. 427 */ 428 if (key->tex[i].return_size == 16) { 429 key->tex[i].return_channels = 2; 430 } else if (devinfo->ver > 40) { 431 key->tex[i].return_channels = 4; 432 } else { 433 key->tex[i].return_channels = 434 v3d_get_tex_return_channels(devinfo, 435 sampler->format); 436 } 437 438 if (key->tex[i].return_size == 32 && devinfo->ver < 40) { 439 memcpy(key->tex[i].swizzle, 440 v3d_sampler->swizzle, 441 sizeof(v3d_sampler->swizzle)); 442 } else { 443 /* For 16-bit returns, we let the sampler state handle 444 * the swizzle. 445 */ 446 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 447 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 448 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 449 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 450 } 451 452 if (sampler) { 453 key->tex[i].clamp_s = 454 sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; 455 key->tex[i].clamp_t = 456 sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP; 457 key->tex[i].clamp_r = 458 sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; 459 } 460 } 461} 462 463static void 464v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 465 struct v3d_key *key) 466{ 467 nir_shader *s = uncompiled->base.ir.nir; 468 469 for (int i = 0; i < s->info.num_textures; i++) { 470 key->tex[i].return_size = 16; 471 key->tex[i].return_channels = 2; 472 473 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 474 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 475 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 476 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 477 } 478} 479 480static void 481v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) 482{ 483 struct v3d_job *job = v3d->job; 484 struct v3d_fs_key local_key; 485 struct v3d_fs_key *key = &local_key; 486 nir_shader *s = v3d->prog.bind_fs->base.ir.nir; 487 488 if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | 489 VC5_DIRTY_BLEND | 490 VC5_DIRTY_FRAMEBUFFER | 491 VC5_DIRTY_ZSA | 492 VC5_DIRTY_RASTERIZER | 493 VC5_DIRTY_SAMPLE_STATE | 494 VC5_DIRTY_FRAGTEX | 495 VC5_DIRTY_UNCOMPILED_FS))) { 496 return; 497 } 498 499 memset(key, 0, sizeof(*key)); 500 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); 501 key->base.shader_state = v3d->prog.bind_fs; 502 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 503 key->is_points = (prim_mode == PIPE_PRIM_POINTS); 504 key->is_lines = (prim_mode >= PIPE_PRIM_LINES && 505 prim_mode <= PIPE_PRIM_LINE_STRIP); 506 key->clamp_color = v3d->rasterizer->base.clamp_fragment_color; 507 if (v3d->blend->base.logicop_enable) { 508 key->logicop_func = v3d->blend->base.logicop_func; 509 } else { 510 key->logicop_func = PIPE_LOGICOP_COPY; 511 } 512 if (job->msaa) { 513 key->msaa = v3d->rasterizer->base.multisample; 514 key->sample_coverage = (v3d->rasterizer->base.multisample && 515 v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); 516 key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; 517 key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; 518 } 519 520 key->depth_enabled = (v3d->zsa->base.depth.enabled || 521 v3d->zsa->base.stencil[0].enabled); 522 if (v3d->zsa->base.alpha.enabled) { 523 key->alpha_test = true; 524 key->alpha_test_func = v3d->zsa->base.alpha.func; 525 } 526 527 key->swap_color_rb = v3d->swap_color_rb; 528 529 for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { 530 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; 531 if (!cbuf) 532 continue; 533 534 /* gl_FragColor's propagation to however many bound color 535 * buffers there are means that the shader compile needs to 536 * know what buffers are present. 537 */ 538 key->cbufs |= 1 << i; 539 540 const struct util_format_description *desc = 541 util_format_description(cbuf->format); 542 543 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 544 desc->channel[0].size == 32) { 545 key->f32_color_rb |= 1 << i; 546 } 547 548 if (s->info.fs.untyped_color_outputs) { 549 if (util_format_is_pure_uint(cbuf->format)) 550 key->uint_color_rb |= 1 << i; 551 else if (util_format_is_pure_sint(cbuf->format)) 552 key->int_color_rb |= 1 << i; 553 } 554 } 555 556 if (key->is_points) { 557 key->point_sprite_mask = 558 v3d->rasterizer->base.sprite_coord_enable; 559 key->point_coord_upper_left = 560 (v3d->rasterizer->base.sprite_coord_mode == 561 PIPE_SPRITE_COORD_UPPER_LEFT); 562 } 563 564 key->light_twoside = v3d->rasterizer->base.light_twoside; 565 key->shade_model_flat = v3d->rasterizer->base.flatshade; 566 567 struct v3d_compiled_shader *old_fs = v3d->prog.fs; 568 v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 569 if (v3d->prog.fs == old_fs) 570 return; 571 572 v3d->dirty |= VC5_DIRTY_COMPILED_FS; 573 574 if (old_fs) { 575 if (v3d->prog.fs->prog_data.fs->flat_shade_flags != 576 old_fs->prog_data.fs->flat_shade_flags) { 577 v3d->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; 578 } 579 580 if (v3d->prog.fs->prog_data.fs->noperspective_flags != 581 old_fs->prog_data.fs->noperspective_flags) { 582 v3d->dirty |= VC5_DIRTY_NOPERSPECTIVE_FLAGS; 583 } 584 585 if (v3d->prog.fs->prog_data.fs->centroid_flags != 586 old_fs->prog_data.fs->centroid_flags) { 587 v3d->dirty |= VC5_DIRTY_CENTROID_FLAGS; 588 } 589 } 590 591 if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots, 592 old_fs->prog_data.fs->input_slots, 593 sizeof(v3d->prog.fs->prog_data.fs->input_slots))) { 594 v3d->dirty |= VC5_DIRTY_FS_INPUTS; 595 } 596} 597 598static void 599v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) 600{ 601 struct v3d_vs_key local_key; 602 struct v3d_vs_key *key = &local_key; 603 604 if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE | 605 VC5_DIRTY_RASTERIZER | 606 VC5_DIRTY_VERTTEX | 607 VC5_DIRTY_VTXSTATE | 608 VC5_DIRTY_UNCOMPILED_VS | 609 VC5_DIRTY_FS_INPUTS))) { 610 return; 611 } 612 613 memset(key, 0, sizeof(*key)); 614 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); 615 key->base.shader_state = v3d->prog.bind_vs; 616 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 617 key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs; 618 STATIC_ASSERT(sizeof(key->fs_inputs) == 619 sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 620 memcpy(key->fs_inputs, v3d->prog.fs->prog_data.fs->input_slots, 621 sizeof(key->fs_inputs)); 622 key->clamp_color = v3d->rasterizer->base.clamp_vertex_color; 623 624 key->per_vertex_point_size = 625 (prim_mode == PIPE_PRIM_POINTS && 626 v3d->rasterizer->base.point_size_per_vertex); 627 628 struct v3d_compiled_shader *vs = 629 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 630 if (vs != v3d->prog.vs) { 631 v3d->prog.vs = vs; 632 v3d->dirty |= VC5_DIRTY_COMPILED_VS; 633 } 634 635 key->is_coord = true; 636 /* Coord shaders only output varyings used by transform feedback. */ 637 struct v3d_uncompiled_shader *shader_state = key->base.shader_state; 638 memcpy(key->fs_inputs, shader_state->tf_outputs, 639 sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); 640 if (shader_state->num_tf_outputs < key->num_fs_inputs) { 641 memset(&key->fs_inputs[shader_state->num_tf_outputs], 642 0, 643 sizeof(*key->fs_inputs) * (key->num_fs_inputs - 644 shader_state->num_tf_outputs)); 645 } 646 key->num_fs_inputs = shader_state->num_tf_outputs; 647 648 struct v3d_compiled_shader *cs = 649 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 650 if (cs != v3d->prog.cs) { 651 v3d->prog.cs = cs; 652 v3d->dirty |= VC5_DIRTY_COMPILED_CS; 653 } 654} 655 656void 657v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) 658{ 659 v3d_update_compiled_fs(v3d, prim_mode); 660 v3d_update_compiled_vs(v3d, prim_mode); 661} 662 663void 664v3d_update_compiled_cs(struct v3d_context *v3d) 665{ 666 struct v3d_key local_key; 667 struct v3d_key *key = &local_key; 668 669 if (!(v3d->dirty & (~0 | /* XXX */ 670 VC5_DIRTY_VERTTEX | 671 VC5_DIRTY_UNCOMPILED_FS))) { 672 return; 673 } 674 675 memset(key, 0, sizeof(*key)); 676 v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); 677 key->shader_state = v3d->prog.bind_compute; 678 679 struct v3d_compiled_shader *cs = 680 v3d_get_compiled_shader(v3d, key, sizeof(*key)); 681 if (cs != v3d->prog.compute) { 682 v3d->prog.compute = cs; 683 v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */ 684 } 685} 686 687static uint32_t 688fs_cache_hash(const void *key) 689{ 690 return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); 691} 692 693static uint32_t 694vs_cache_hash(const void *key) 695{ 696 return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); 697} 698 699static uint32_t 700cs_cache_hash(const void *key) 701{ 702 return _mesa_hash_data(key, sizeof(struct v3d_key)); 703} 704 705static bool 706fs_cache_compare(const void *key1, const void *key2) 707{ 708 return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; 709} 710 711static bool 712vs_cache_compare(const void *key1, const void *key2) 713{ 714 return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; 715} 716 717static bool 718cs_cache_compare(const void *key1, const void *key2) 719{ 720 return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; 721} 722 723static void 724v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) 725{ 726 struct v3d_context *v3d = v3d_context(pctx); 727 struct v3d_uncompiled_shader *so = hwcso; 728 nir_shader *s = so->base.ir.nir; 729 730 hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { 731 const struct v3d_key *key = entry->key; 732 struct v3d_compiled_shader *shader = entry->data; 733 734 if (key->shader_state != so) 735 continue; 736 737 if (v3d->prog.fs == shader) 738 v3d->prog.fs = NULL; 739 if (v3d->prog.vs == shader) 740 v3d->prog.vs = NULL; 741 if (v3d->prog.cs == shader) 742 v3d->prog.cs = NULL; 743 if (v3d->prog.compute == shader) 744 v3d->prog.compute = NULL; 745 746 _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); 747 v3d_free_compiled_shader(shader); 748 } 749 750 ralloc_free(so->base.ir.nir); 751 free(so); 752} 753 754static void 755v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) 756{ 757 struct v3d_context *v3d = v3d_context(pctx); 758 v3d->prog.bind_fs = hwcso; 759 v3d->dirty |= VC5_DIRTY_UNCOMPILED_FS; 760} 761 762static void 763v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) 764{ 765 struct v3d_context *v3d = v3d_context(pctx); 766 v3d->prog.bind_vs = hwcso; 767 v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS; 768} 769 770static void 771v3d_compute_state_bind(struct pipe_context *pctx, void *state) 772{ 773 struct v3d_context *v3d = v3d_context(pctx); 774 775 v3d->prog.bind_compute = state; 776} 777 778static void * 779v3d_create_compute_state(struct pipe_context *pctx, 780 const struct pipe_compute_state *cso) 781{ 782 return v3d_uncompiled_shader_create(pctx, cso->ir_type, 783 (void *)cso->prog); 784} 785 786void 787v3d_program_init(struct pipe_context *pctx) 788{ 789 struct v3d_context *v3d = v3d_context(pctx); 790 791 pctx->create_vs_state = v3d_shader_state_create; 792 pctx->delete_vs_state = v3d_shader_state_delete; 793 794 pctx->create_fs_state = v3d_shader_state_create; 795 pctx->delete_fs_state = v3d_shader_state_delete; 796 797 pctx->bind_fs_state = v3d_fp_state_bind; 798 pctx->bind_vs_state = v3d_vp_state_bind; 799 800 if (v3d->screen->has_csd) { 801 pctx->create_compute_state = v3d_create_compute_state; 802 pctx->delete_compute_state = v3d_shader_state_delete; 803 pctx->bind_compute_state = v3d_compute_state_bind; 804 } 805 806 v3d->prog.cache[MESA_SHADER_VERTEX] = 807 _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); 808 v3d->prog.cache[MESA_SHADER_FRAGMENT] = 809 _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); 810 v3d->prog.cache[MESA_SHADER_COMPUTE] = 811 _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); 812} 813 814void 815v3d_program_fini(struct pipe_context *pctx) 816{ 817 struct v3d_context *v3d = v3d_context(pctx); 818 819 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 820 struct hash_table *cache = v3d->prog.cache[i]; 821 if (!cache) 822 continue; 823 824 hash_table_foreach(cache, entry) { 825 struct v3d_compiled_shader *shader = entry->data; 826 v3d_free_compiled_shader(shader); 827 _mesa_hash_table_remove(cache, entry); 828 } 829 } 830 831 v3d_bo_unreference(&v3d->prog.spill_bo); 832} 833