1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <inttypes.h> 25#include "util/format/u_format.h" 26#include "util/u_math.h" 27#include "util/u_memory.h" 28#include "util/ralloc.h" 29#include "util/hash_table.h" 30#include "util/u_upload_mgr.h" 31#include "tgsi/tgsi_dump.h" 32#include "tgsi/tgsi_parse.h" 33#include "compiler/nir/nir.h" 34#include "compiler/nir/nir_builder.h" 35#include "nir/tgsi_to_nir.h" 36#include "compiler/v3d_compiler.h" 37#include "v3d_context.h" 38#include "broadcom/cle/v3d_packet_v33_pack.h" 39 40static struct v3d_compiled_shader * 41v3d_get_compiled_shader(struct v3d_context *v3d, 42 struct v3d_key *key, size_t key_size); 43static void 44v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 45 struct v3d_key *key); 46 47static gl_varying_slot 48v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) 49{ 50 nir_foreach_shader_out_variable(var, s) { 51 if (var->data.driver_location == driver_location) { 52 return var->data.location; 53 } 54 } 55 56 return -1; 57} 58 59/** 60 * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. 61 * 62 * A shader can have 16 of these specs, and each one of them can write up to 63 * 16 dwords. Since we allow a total of 64 transform feedback output 64 * components (not 16 vectors), we have to group the writes of multiple 65 * varyings together in a single data spec. 66 */ 67static void 68v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, 69 const struct pipe_stream_output_info *stream_output) 70{ 71 if (!stream_output->num_outputs) 72 return; 73 74 struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; 75 int slot_count = 0; 76 77 for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { 78 uint32_t buffer_offset = 0; 79 uint32_t vpm_start = slot_count; 80 81 for (int i = 0; i < stream_output->num_outputs; i++) { 82 const struct pipe_stream_output *output = 83 &stream_output->output[i]; 84 85 if (output->output_buffer != buffer) 86 continue; 87 88 /* We assume that the SO outputs appear in increasing 89 * order in the buffer. 90 */ 91 assert(output->dst_offset >= buffer_offset); 92 93 /* Pad any undefined slots in the output */ 94 for (int j = buffer_offset; j < output->dst_offset; j++) { 95 slots[slot_count] = 96 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); 97 slot_count++; 98 buffer_offset++; 99 } 100 101 /* Set the coordinate shader up to output the 102 * components of this varying. 103 */ 104 for (int j = 0; j < output->num_components; j++) { 105 gl_varying_slot slot = 106 v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index); 107 108 slots[slot_count] = 109 v3d_slot_from_slot_and_component(slot, 110 output->start_component + j); 111 slot_count++; 112 buffer_offset++; 113 } 114 } 115 116 uint32_t vpm_size = slot_count - vpm_start; 117 if (!vpm_size) 118 continue; 119 120 uint32_t vpm_start_offset = vpm_start + 6; 121 122 while (vpm_size) { 123 uint32_t write_size = MIN2(vpm_size, 1 << 4); 124 125 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { 126 /* We need the offset from the coordinate shader's VPM 127 * output block, which has the [X, Y, Z, W, Xs, Ys] 128 * values at the start. 129 */ 130 .first_shaded_vertex_value_to_output = vpm_start_offset, 131 .number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size, 132 .output_buffer_to_write_to = buffer, 133 }; 134 135 /* GFXH-1559 */ 136 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 137 so->num_tf_specs != 0); 138 139 assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); 140 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 141 (void *)&so->tf_specs[so->num_tf_specs], 142 &unpacked); 143 144 /* If point size is being written by the shader, then 145 * all the VPM start offsets are shifted up by one. 146 * We won't know that until the variant is compiled, 147 * though. 148 */ 149 unpacked.first_shaded_vertex_value_to_output++; 150 151 /* GFXH-1559 */ 152 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 153 so->num_tf_specs != 0); 154 155 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 156 (void *)&so->tf_specs_psiz[so->num_tf_specs], 157 &unpacked); 158 so->num_tf_specs++; 159 vpm_start_offset += write_size; 160 vpm_size -= write_size; 161 } 162 so->base.stream_output.stride[buffer] = 163 stream_output->stride[buffer]; 164 } 165 166 so->num_tf_outputs = slot_count; 167 so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, 168 slot_count); 169 memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); 170} 171 172static int 173type_size(const struct glsl_type *type, bool bindless) 174{ 175 return glsl_count_attribute_slots(type, false); 176} 177 178static void 179precompile_all_outputs(nir_shader *s, 180 struct v3d_varying_slot *outputs, 181 uint8_t *num_outputs) 182{ 183 nir_foreach_shader_out_variable(var, s) { 184 const int array_len = MAX2(glsl_get_length(var->type), 1); 185 for (int j = 0; j < array_len; j++) { 186 const int slot = var->data.location + j; 187 const int num_components = 188 glsl_get_components(var->type); 189 for (int i = 0; i < num_components; i++) { 190 const int swiz = var->data.location_frac + i; 191 outputs[(*num_outputs)++] = 192 v3d_slot_from_slot_and_component(slot, 193 swiz); 194 } 195 } 196 } 197} 198 199/** 200 * Precompiles a shader variant at shader state creation time if 201 * V3D_DEBUG=precompile is set. Used for shader-db 202 * (https://gitlab.freedesktop.org/mesa/shader-db) 203 */ 204static void 205v3d_shader_precompile(struct v3d_context *v3d, 206 struct v3d_uncompiled_shader *so) 207{ 208 nir_shader *s = so->base.ir.nir; 209 210 if (s->info.stage == MESA_SHADER_FRAGMENT) { 211 struct v3d_fs_key key = { 212 .base.shader_state = so, 213 }; 214 215 nir_foreach_shader_out_variable(var, s) { 216 if (var->data.location == FRAG_RESULT_COLOR) { 217 key.cbufs |= 1 << 0; 218 } else if (var->data.location >= FRAG_RESULT_DATA0) { 219 key.cbufs |= 1 << (var->data.location - 220 FRAG_RESULT_DATA0); 221 } 222 } 223 224 key.logicop_func = PIPE_LOGICOP_COPY; 225 226 v3d_setup_shared_precompile_key(so, &key.base); 227 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 228 } else if (s->info.stage == MESA_SHADER_GEOMETRY) { 229 struct v3d_gs_key key = { 230 .base.shader_state = so, 231 .base.is_last_geometry_stage = true, 232 }; 233 234 v3d_setup_shared_precompile_key(so, &key.base); 235 236 precompile_all_outputs(s, 237 key.used_outputs, 238 &key.num_used_outputs); 239 240 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 241 242 /* Compile GS bin shader: only position (XXX: include TF) */ 243 key.is_coord = true; 244 key.num_used_outputs = 0; 245 for (int i = 0; i < 4; i++) { 246 key.used_outputs[key.num_used_outputs++] = 247 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 248 i); 249 } 250 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 251 } else { 252 assert(s->info.stage == MESA_SHADER_VERTEX); 253 struct v3d_vs_key key = { 254 .base.shader_state = so, 255 /* Emit fixed function outputs */ 256 .base.is_last_geometry_stage = true, 257 }; 258 259 v3d_setup_shared_precompile_key(so, &key.base); 260 261 precompile_all_outputs(s, 262 key.used_outputs, 263 &key.num_used_outputs); 264 265 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 266 267 /* Compile VS bin shader: only position (XXX: include TF) */ 268 key.is_coord = true; 269 key.num_used_outputs = 0; 270 for (int i = 0; i < 4; i++) { 271 key.used_outputs[key.num_used_outputs++] = 272 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 273 i); 274 } 275 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 276 } 277} 278 279static void * 280v3d_uncompiled_shader_create(struct pipe_context *pctx, 281 enum pipe_shader_ir type, void *ir) 282{ 283 struct v3d_context *v3d = v3d_context(pctx); 284 struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); 285 if (!so) 286 return NULL; 287 288 so->program_id = v3d->next_uncompiled_program_id++; 289 290 nir_shader *s; 291 292 if (type == PIPE_SHADER_IR_NIR) { 293 /* The backend takes ownership of the NIR shader on state 294 * creation. 295 */ 296 s = ir; 297 } else { 298 assert(type == PIPE_SHADER_IR_TGSI); 299 300 if (unlikely(V3D_DEBUG & V3D_DEBUG_TGSI)) { 301 fprintf(stderr, "prog %d TGSI:\n", 302 so->program_id); 303 tgsi_dump(ir, 0); 304 fprintf(stderr, "\n"); 305 } 306 s = tgsi_to_nir(ir, pctx->screen, false); 307 } 308 309 if (s->info.stage != MESA_SHADER_VERTEX && 310 s->info.stage != MESA_SHADER_GEOMETRY) { 311 NIR_PASS_V(s, nir_lower_io, 312 nir_var_shader_in | nir_var_shader_out, 313 type_size, (nir_lower_io_options)0); 314 } 315 316 NIR_PASS_V(s, nir_lower_regs_to_ssa); 317 NIR_PASS_V(s, nir_normalize_cubemap_coords); 318 319 NIR_PASS_V(s, nir_lower_load_const_to_scalar); 320 321 v3d_optimize_nir(NULL, s); 322 323 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 324 325 /* Garbage collect dead instructions */ 326 nir_sweep(s); 327 328 so->base.type = PIPE_SHADER_IR_NIR; 329 so->base.ir.nir = s; 330 331 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 332 v3d_debug_flag_for_shader_stage(s->info.stage)))) { 333 fprintf(stderr, "%s prog %d NIR:\n", 334 gl_shader_stage_name(s->info.stage), 335 so->program_id); 336 nir_print_shader(s, stderr); 337 fprintf(stderr, "\n"); 338 } 339 340 if (unlikely(V3D_DEBUG & V3D_DEBUG_PRECOMPILE)) 341 v3d_shader_precompile(v3d, so); 342 343 return so; 344} 345 346static void 347v3d_shader_debug_output(const char *message, void *data) 348{ 349 struct v3d_context *v3d = data; 350 351 pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); 352} 353 354static void * 355v3d_shader_state_create(struct pipe_context *pctx, 356 const struct pipe_shader_state *cso) 357{ 358 struct v3d_uncompiled_shader *so = 359 v3d_uncompiled_shader_create(pctx, 360 cso->type, 361 (cso->type == PIPE_SHADER_IR_TGSI ? 362 (void *)cso->tokens : 363 cso->ir.nir)); 364 365 v3d_set_transform_feedback_outputs(so, &cso->stream_output); 366 367 return so; 368} 369 370struct v3d_compiled_shader * 371v3d_get_compiled_shader(struct v3d_context *v3d, 372 struct v3d_key *key, 373 size_t key_size) 374{ 375 struct v3d_uncompiled_shader *shader_state = key->shader_state; 376 nir_shader *s = shader_state->base.ir.nir; 377 378 struct hash_table *ht = v3d->prog.cache[s->info.stage]; 379 struct hash_entry *entry = _mesa_hash_table_search(ht, key); 380 if (entry) 381 return entry->data; 382 383 struct v3d_compiled_shader *shader = 384 rzalloc(NULL, struct v3d_compiled_shader); 385 386 int program_id = shader_state->program_id; 387 int variant_id = 388 p_atomic_inc_return(&shader_state->compiled_variant_count); 389 uint64_t *qpu_insts; 390 uint32_t shader_size; 391 392 qpu_insts = v3d_compile(v3d->screen->compiler, key, 393 &shader->prog_data.base, s, 394 v3d_shader_debug_output, 395 v3d, 396 program_id, variant_id, &shader_size); 397 ralloc_steal(shader, shader->prog_data.base); 398 399 v3d_set_shader_uniform_dirty_flags(shader); 400 401 if (shader_size) { 402 u_upload_data(v3d->state_uploader, 0, shader_size, 8, 403 qpu_insts, &shader->offset, &shader->resource); 404 } 405 406 free(qpu_insts); 407 408 if (ht) { 409 struct v3d_key *dup_key; 410 dup_key = ralloc_size(shader, key_size); 411 memcpy(dup_key, key, key_size); 412 _mesa_hash_table_insert(ht, dup_key, shader); 413 } 414 415 if (shader->prog_data.base->spill_size > 416 v3d->prog.spill_size_per_thread) { 417 /* The TIDX register we use for choosing the area to access 418 * for scratch space is: (core << 6) | (qpu << 2) | thread. 419 * Even at minimum threadcount in a particular shader, that 420 * means we still multiply by qpus by 4. 421 */ 422 int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * 423 shader->prog_data.base->spill_size); 424 425 v3d_bo_unreference(&v3d->prog.spill_bo); 426 v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, 427 total_spill_size, "spill"); 428 v3d->prog.spill_size_per_thread = 429 shader->prog_data.base->spill_size; 430 } 431 432 return shader; 433} 434 435static void 436v3d_free_compiled_shader(struct v3d_compiled_shader *shader) 437{ 438 pipe_resource_reference(&shader->resource, NULL); 439 ralloc_free(shader); 440} 441 442static void 443v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, 444 struct v3d_texture_stateobj *texstate) 445{ 446 const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 447 448 key->num_tex_used = texstate->num_textures; 449 key->num_samplers_used = texstate->num_textures; 450 assert(key->num_tex_used == key->num_samplers_used); 451 for (int i = 0; i < texstate->num_textures; i++) { 452 struct pipe_sampler_view *sampler = texstate->textures[i]; 453 struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); 454 struct pipe_sampler_state *sampler_state = 455 texstate->samplers[i]; 456 457 if (!sampler) 458 continue; 459 460 key->sampler[i].return_size = 461 v3d_get_tex_return_size(devinfo, 462 sampler->format, 463 sampler_state->compare_mode); 464 465 /* For 16-bit, we set up the sampler to always return 2 466 * channels (meaning no recompiles for most statechanges), 467 * while for 32 we actually scale the returns with channels. 468 */ 469 if (key->sampler[i].return_size == 16) { 470 key->sampler[i].return_channels = 2; 471 } else if (devinfo->ver > 40) { 472 key->sampler[i].return_channels = 4; 473 } else { 474 key->sampler[i].return_channels = 475 v3d_get_tex_return_channels(devinfo, 476 sampler->format); 477 } 478 479 if (key->sampler[i].return_size == 32 && devinfo->ver < 40) { 480 memcpy(key->tex[i].swizzle, 481 v3d_sampler->swizzle, 482 sizeof(v3d_sampler->swizzle)); 483 } else { 484 /* For 16-bit returns, we let the sampler state handle 485 * the swizzle. 486 */ 487 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 488 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 489 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 490 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 491 } 492 } 493} 494 495static void 496v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 497 struct v3d_key *key) 498{ 499 nir_shader *s = uncompiled->base.ir.nir; 500 501 /* Note that below we access they key's texture and sampler fields 502 * using the same index. On OpenGL they are the same (they are 503 * combined) 504 */ 505 key->num_tex_used = s->info.num_textures; 506 key->num_samplers_used = s->info.num_textures; 507 for (int i = 0; i < s->info.num_textures; i++) { 508 key->sampler[i].return_size = 16; 509 key->sampler[i].return_channels = 2; 510 511 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 512 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 513 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 514 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 515 } 516} 517 518static void 519v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) 520{ 521 struct v3d_job *job = v3d->job; 522 struct v3d_fs_key local_key; 523 struct v3d_fs_key *key = &local_key; 524 nir_shader *s = v3d->prog.bind_fs->base.ir.nir; 525 526 if (!(v3d->dirty & (V3D_DIRTY_PRIM_MODE | 527 V3D_DIRTY_BLEND | 528 V3D_DIRTY_FRAMEBUFFER | 529 V3D_DIRTY_ZSA | 530 V3D_DIRTY_RASTERIZER | 531 V3D_DIRTY_SAMPLE_STATE | 532 V3D_DIRTY_FRAGTEX | 533 V3D_DIRTY_UNCOMPILED_FS))) { 534 return; 535 } 536 537 memset(key, 0, sizeof(*key)); 538 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); 539 key->base.shader_state = v3d->prog.bind_fs; 540 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 541 key->is_points = (prim_mode == PIPE_PRIM_POINTS); 542 key->is_lines = (prim_mode >= PIPE_PRIM_LINES && 543 prim_mode <= PIPE_PRIM_LINE_STRIP); 544 key->line_smoothing = (key->is_lines && 545 v3d_line_smoothing_enabled(v3d)); 546 key->has_gs = v3d->prog.bind_gs != NULL; 547 if (v3d->blend->base.logicop_enable) { 548 key->logicop_func = v3d->blend->base.logicop_func; 549 } else { 550 key->logicop_func = PIPE_LOGICOP_COPY; 551 } 552 if (job->msaa) { 553 key->msaa = v3d->rasterizer->base.multisample; 554 key->sample_coverage = (v3d->rasterizer->base.multisample && 555 v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); 556 key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; 557 key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; 558 } 559 560 key->swap_color_rb = v3d->swap_color_rb; 561 562 for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { 563 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; 564 if (!cbuf) 565 continue; 566 567 /* gl_FragColor's propagation to however many bound color 568 * buffers there are means that the shader compile needs to 569 * know what buffers are present. 570 */ 571 key->cbufs |= 1 << i; 572 573 /* If logic operations are enabled then we might emit color 574 * reads and we need to know the color buffer format and 575 * swizzle for that. 576 */ 577 if (key->logicop_func != PIPE_LOGICOP_COPY) { 578 key->color_fmt[i].format = cbuf->format; 579 key->color_fmt[i].swizzle = 580 v3d_get_format_swizzle(&v3d->screen->devinfo, 581 cbuf->format); 582 } 583 584 const struct util_format_description *desc = 585 util_format_description(cbuf->format); 586 587 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 588 desc->channel[0].size == 32) { 589 key->f32_color_rb |= 1 << i; 590 } 591 592 if (s->info.fs.untyped_color_outputs) { 593 if (util_format_is_pure_uint(cbuf->format)) 594 key->uint_color_rb |= 1 << i; 595 else if (util_format_is_pure_sint(cbuf->format)) 596 key->int_color_rb |= 1 << i; 597 } 598 } 599 600 if (key->is_points) { 601 key->point_sprite_mask = 602 v3d->rasterizer->base.sprite_coord_enable; 603 /* this is handled by lower_wpos_pntc */ 604 key->point_coord_upper_left = false; 605 } 606 607 struct v3d_compiled_shader *old_fs = v3d->prog.fs; 608 v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 609 if (v3d->prog.fs == old_fs) 610 return; 611 612 v3d->dirty |= V3D_DIRTY_COMPILED_FS; 613 614 if (old_fs) { 615 if (v3d->prog.fs->prog_data.fs->flat_shade_flags != 616 old_fs->prog_data.fs->flat_shade_flags) { 617 v3d->dirty |= V3D_DIRTY_FLAT_SHADE_FLAGS; 618 } 619 620 if (v3d->prog.fs->prog_data.fs->noperspective_flags != 621 old_fs->prog_data.fs->noperspective_flags) { 622 v3d->dirty |= V3D_DIRTY_NOPERSPECTIVE_FLAGS; 623 } 624 625 if (v3d->prog.fs->prog_data.fs->centroid_flags != 626 old_fs->prog_data.fs->centroid_flags) { 627 v3d->dirty |= V3D_DIRTY_CENTROID_FLAGS; 628 } 629 } 630 631 if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots, 632 old_fs->prog_data.fs->input_slots, 633 sizeof(v3d->prog.fs->prog_data.fs->input_slots))) { 634 v3d->dirty |= V3D_DIRTY_FS_INPUTS; 635 } 636} 637 638static void 639v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode) 640{ 641 struct v3d_gs_key local_key; 642 struct v3d_gs_key *key = &local_key; 643 644 if (!(v3d->dirty & (V3D_DIRTY_GEOMTEX | 645 V3D_DIRTY_RASTERIZER | 646 V3D_DIRTY_UNCOMPILED_GS | 647 V3D_DIRTY_PRIM_MODE | 648 V3D_DIRTY_FS_INPUTS))) { 649 return; 650 } 651 652 if (!v3d->prog.bind_gs) { 653 v3d->prog.gs = NULL; 654 v3d->prog.gs_bin = NULL; 655 return; 656 } 657 658 memset(key, 0, sizeof(*key)); 659 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]); 660 key->base.shader_state = v3d->prog.bind_gs; 661 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 662 key->base.is_last_geometry_stage = true; 663 key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 664 STATIC_ASSERT(sizeof(key->used_outputs) == 665 sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 666 memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 667 sizeof(key->used_outputs)); 668 669 key->per_vertex_point_size = 670 (prim_mode == PIPE_PRIM_POINTS && 671 v3d->rasterizer->base.point_size_per_vertex); 672 673 struct v3d_compiled_shader *gs = 674 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 675 if (gs != v3d->prog.gs) { 676 v3d->prog.gs = gs; 677 v3d->dirty |= V3D_DIRTY_COMPILED_GS; 678 } 679 680 key->is_coord = true; 681 682 /* The last bin-mode shader in the geometry pipeline only outputs 683 * varyings used by transform feedback. 684 */ 685 struct v3d_uncompiled_shader *shader_state = key->base.shader_state; 686 memcpy(key->used_outputs, shader_state->tf_outputs, 687 sizeof(*key->used_outputs) * shader_state->num_tf_outputs); 688 if (shader_state->num_tf_outputs < key->num_used_outputs) { 689 uint32_t size = sizeof(*key->used_outputs) * 690 (key->num_used_outputs - 691 shader_state->num_tf_outputs); 692 memset(&key->used_outputs[shader_state->num_tf_outputs], 693 0, size); 694 } 695 key->num_used_outputs = shader_state->num_tf_outputs; 696 697 struct v3d_compiled_shader *old_gs = v3d->prog.gs; 698 struct v3d_compiled_shader *gs_bin = 699 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 700 if (gs_bin != old_gs) { 701 v3d->prog.gs_bin = gs_bin; 702 v3d->dirty |= V3D_DIRTY_COMPILED_GS_BIN; 703 } 704 705 if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots, 706 old_gs->prog_data.gs->input_slots, 707 sizeof(v3d->prog.gs->prog_data.gs->input_slots))) { 708 v3d->dirty |= V3D_DIRTY_GS_INPUTS; 709 } 710} 711 712static void 713v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) 714{ 715 struct v3d_vs_key local_key; 716 struct v3d_vs_key *key = &local_key; 717 718 if (!(v3d->dirty & (V3D_DIRTY_VERTTEX | 719 V3D_DIRTY_VTXSTATE | 720 V3D_DIRTY_UNCOMPILED_VS | 721 (v3d->prog.bind_gs ? 0 : V3D_DIRTY_RASTERIZER) | 722 (v3d->prog.bind_gs ? 0 : V3D_DIRTY_PRIM_MODE) | 723 (v3d->prog.bind_gs ? V3D_DIRTY_GS_INPUTS : 724 V3D_DIRTY_FS_INPUTS)))) { 725 return; 726 } 727 728 memset(key, 0, sizeof(*key)); 729 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); 730 key->base.shader_state = v3d->prog.bind_vs; 731 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 732 key->base.is_last_geometry_stage = !v3d->prog.bind_gs; 733 734 if (!v3d->prog.bind_gs) { 735 key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 736 STATIC_ASSERT(sizeof(key->used_outputs) == 737 sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 738 memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 739 sizeof(key->used_outputs)); 740 } else { 741 key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs; 742 STATIC_ASSERT(sizeof(key->used_outputs) == 743 sizeof(v3d->prog.gs->prog_data.gs->input_slots)); 744 memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots, 745 sizeof(key->used_outputs)); 746 } 747 748 key->per_vertex_point_size = 749 (prim_mode == PIPE_PRIM_POINTS && 750 v3d->rasterizer->base.point_size_per_vertex); 751 752 nir_shader *s = v3d->prog.bind_vs->base.ir.nir; 753 uint64_t inputs_read = s->info.inputs_read; 754 assert(util_bitcount(inputs_read) <= v3d->vtx->num_elements); 755 756 while (inputs_read) { 757 int location = u_bit_scan64(&inputs_read); 758 nir_variable *var = 759 nir_find_variable_with_location(s, nir_var_shader_in, location); 760 assert (var != NULL); 761 int driver_location = var->data.driver_location; 762 switch (v3d->vtx->pipe[driver_location].src_format) { 763 case PIPE_FORMAT_B8G8R8A8_UNORM: 764 case PIPE_FORMAT_B10G10R10A2_UNORM: 765 case PIPE_FORMAT_B10G10R10A2_SNORM: 766 case PIPE_FORMAT_B10G10R10A2_USCALED: 767 case PIPE_FORMAT_B10G10R10A2_SSCALED: 768 key->va_swap_rb_mask |= 1 << location; 769 break; 770 default: 771 break; 772 } 773 } 774 775 struct v3d_compiled_shader *vs = 776 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 777 if (vs != v3d->prog.vs) { 778 v3d->prog.vs = vs; 779 v3d->dirty |= V3D_DIRTY_COMPILED_VS; 780 } 781 782 key->is_coord = true; 783 784 /* Coord shaders only output varyings used by transform feedback, 785 * unless they are linked to other shaders in the geometry side 786 * of the pipeline, since in that case any of the output varyings 787 * could be required in later geometry stages to compute 788 * gl_Position or TF outputs. 789 */ 790 if (!v3d->prog.bind_gs) { 791 struct v3d_uncompiled_shader *shader_state = 792 key->base.shader_state; 793 memcpy(key->used_outputs, shader_state->tf_outputs, 794 sizeof(*key->used_outputs) * 795 shader_state->num_tf_outputs); 796 if (shader_state->num_tf_outputs < key->num_used_outputs) { 797 uint32_t tail_bytes = 798 sizeof(*key->used_outputs) * 799 (key->num_used_outputs - 800 shader_state->num_tf_outputs); 801 memset(&key->used_outputs[shader_state->num_tf_outputs], 802 0, tail_bytes); 803 } 804 key->num_used_outputs = shader_state->num_tf_outputs; 805 } else { 806 key->num_used_outputs = v3d->prog.gs_bin->prog_data.gs->num_inputs; 807 STATIC_ASSERT(sizeof(key->used_outputs) == 808 sizeof(v3d->prog.gs_bin->prog_data.gs->input_slots)); 809 memcpy(key->used_outputs, v3d->prog.gs_bin->prog_data.gs->input_slots, 810 sizeof(key->used_outputs)); 811 } 812 813 struct v3d_compiled_shader *cs = 814 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 815 if (cs != v3d->prog.cs) { 816 v3d->prog.cs = cs; 817 v3d->dirty |= V3D_DIRTY_COMPILED_CS; 818 } 819} 820 821void 822v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) 823{ 824 v3d_update_compiled_fs(v3d, prim_mode); 825 v3d_update_compiled_gs(v3d, prim_mode); 826 v3d_update_compiled_vs(v3d, prim_mode); 827} 828 829void 830v3d_update_compiled_cs(struct v3d_context *v3d) 831{ 832 struct v3d_key local_key; 833 struct v3d_key *key = &local_key; 834 835 if (!(v3d->dirty & (V3D_DIRTY_UNCOMPILED_CS | 836 V3D_DIRTY_COMPTEX))) { 837 return; 838 } 839 840 memset(key, 0, sizeof(*key)); 841 v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); 842 key->shader_state = v3d->prog.bind_compute; 843 844 struct v3d_compiled_shader *cs = 845 v3d_get_compiled_shader(v3d, key, sizeof(*key)); 846 if (cs != v3d->prog.compute) { 847 v3d->prog.compute = cs; 848 v3d->dirty |= V3D_DIRTY_COMPILED_CS; /* XXX */ 849 } 850} 851 852static uint32_t 853fs_cache_hash(const void *key) 854{ 855 return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); 856} 857 858static uint32_t 859gs_cache_hash(const void *key) 860{ 861 return _mesa_hash_data(key, sizeof(struct v3d_gs_key)); 862} 863 864static uint32_t 865vs_cache_hash(const void *key) 866{ 867 return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); 868} 869 870static uint32_t 871cs_cache_hash(const void *key) 872{ 873 return _mesa_hash_data(key, sizeof(struct v3d_key)); 874} 875 876static bool 877fs_cache_compare(const void *key1, const void *key2) 878{ 879 return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; 880} 881 882static bool 883gs_cache_compare(const void *key1, const void *key2) 884{ 885 return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0; 886} 887 888static bool 889vs_cache_compare(const void *key1, const void *key2) 890{ 891 return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; 892} 893 894static bool 895cs_cache_compare(const void *key1, const void *key2) 896{ 897 return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; 898} 899 900static void 901v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) 902{ 903 struct v3d_context *v3d = v3d_context(pctx); 904 struct v3d_uncompiled_shader *so = hwcso; 905 nir_shader *s = so->base.ir.nir; 906 907 hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { 908 const struct v3d_key *key = entry->key; 909 struct v3d_compiled_shader *shader = entry->data; 910 911 if (key->shader_state != so) 912 continue; 913 914 if (v3d->prog.fs == shader) 915 v3d->prog.fs = NULL; 916 if (v3d->prog.vs == shader) 917 v3d->prog.vs = NULL; 918 if (v3d->prog.cs == shader) 919 v3d->prog.cs = NULL; 920 if (v3d->prog.compute == shader) 921 v3d->prog.compute = NULL; 922 923 _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); 924 v3d_free_compiled_shader(shader); 925 } 926 927 ralloc_free(so->base.ir.nir); 928 free(so); 929} 930 931static void 932v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) 933{ 934 struct v3d_context *v3d = v3d_context(pctx); 935 v3d->prog.bind_fs = hwcso; 936 v3d->dirty |= V3D_DIRTY_UNCOMPILED_FS; 937} 938 939static void 940v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso) 941{ 942 struct v3d_context *v3d = v3d_context(pctx); 943 v3d->prog.bind_gs = hwcso; 944 v3d->dirty |= V3D_DIRTY_UNCOMPILED_GS; 945} 946 947static void 948v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) 949{ 950 struct v3d_context *v3d = v3d_context(pctx); 951 v3d->prog.bind_vs = hwcso; 952 v3d->dirty |= V3D_DIRTY_UNCOMPILED_VS; 953} 954 955static void 956v3d_compute_state_bind(struct pipe_context *pctx, void *state) 957{ 958 struct v3d_context *v3d = v3d_context(pctx); 959 960 v3d->prog.bind_compute = state; 961 v3d->dirty |= V3D_DIRTY_UNCOMPILED_CS; 962} 963 964static void * 965v3d_create_compute_state(struct pipe_context *pctx, 966 const struct pipe_compute_state *cso) 967{ 968 return v3d_uncompiled_shader_create(pctx, cso->ir_type, 969 (void *)cso->prog); 970} 971 972void 973v3d_program_init(struct pipe_context *pctx) 974{ 975 struct v3d_context *v3d = v3d_context(pctx); 976 977 pctx->create_vs_state = v3d_shader_state_create; 978 pctx->delete_vs_state = v3d_shader_state_delete; 979 980 pctx->create_gs_state = v3d_shader_state_create; 981 pctx->delete_gs_state = v3d_shader_state_delete; 982 983 pctx->create_fs_state = v3d_shader_state_create; 984 pctx->delete_fs_state = v3d_shader_state_delete; 985 986 pctx->bind_fs_state = v3d_fp_state_bind; 987 pctx->bind_gs_state = v3d_gp_state_bind; 988 pctx->bind_vs_state = v3d_vp_state_bind; 989 990 if (v3d->screen->has_csd) { 991 pctx->create_compute_state = v3d_create_compute_state; 992 pctx->delete_compute_state = v3d_shader_state_delete; 993 pctx->bind_compute_state = v3d_compute_state_bind; 994 } 995 996 v3d->prog.cache[MESA_SHADER_VERTEX] = 997 _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); 998 v3d->prog.cache[MESA_SHADER_GEOMETRY] = 999 _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare); 1000 v3d->prog.cache[MESA_SHADER_FRAGMENT] = 1001 _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); 1002 v3d->prog.cache[MESA_SHADER_COMPUTE] = 1003 _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); 1004} 1005 1006void 1007v3d_program_fini(struct pipe_context *pctx) 1008{ 1009 struct v3d_context *v3d = v3d_context(pctx); 1010 1011 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 1012 struct hash_table *cache = v3d->prog.cache[i]; 1013 if (!cache) 1014 continue; 1015 1016 hash_table_foreach(cache, entry) { 1017 struct v3d_compiled_shader *shader = entry->data; 1018 v3d_free_compiled_shader(shader); 1019 _mesa_hash_table_remove(cache, entry); 1020 } 1021 } 1022 1023 v3d_bo_unreference(&v3d->prog.spill_bo); 1024} 1025