1/**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24#include <llvm/Config/llvm-config.h> 25 26#if LLVM_VERSION_MAJOR < 7 27// llvm redefines DEBUG 28#pragma push_macro("DEBUG") 29#undef DEBUG 30#endif 31 32#include <rasterizer/core/state.h> 33#include "JitManager.h" 34 35#if LLVM_VERSION_MAJOR < 7 36#pragma pop_macro("DEBUG") 37#endif 38 39#include "common/os.h" 40#include "jit_api.h" 41#include "gen_state_llvm.h" 42#include "core/multisample.h" 43#include "core/state_funcs.h" 44 45#include "gallivm/lp_bld_tgsi.h" 46#include "util/format/u_format.h" 47 48#include "util/u_memory.h" 49#include "util/u_inlines.h" 50#include "util/u_helpers.h" 51#include "util/u_framebuffer.h" 52#include "util/u_viewport.h" 53#include "util/u_prim.h" 54 55#include "swr_state.h" 56#include "swr_context.h" 57#include "gen_surf_state_llvm.h" 58#include "gen_swr_context_llvm.h" 59#include "swr_screen.h" 60#include "swr_resource.h" 61#include "swr_tex_sample.h" 62#include "swr_scratch.h" 63#include "swr_shader.h" 64#include "swr_fence.h" 65 66/* These should be pulled out into separate files as necessary 67 * Just initializing everything here to get going. */ 68 69static void * 70swr_create_blend_state(struct pipe_context *pipe, 71 const struct pipe_blend_state *blend) 72{ 73 struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state); 74 assert(state != nullptr); 75 76 memcpy(&state->pipe, blend, sizeof(*blend)); 77 78 struct pipe_blend_state *pipe_blend = &state->pipe; 79 80 for (int target = 0; 81 target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS); 82 target++) { 83 84 struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target]; 85 SWR_RENDER_TARGET_BLEND_STATE &blendState = 86 state->blendState.renderTarget[target]; 87 RENDER_TARGET_BLEND_COMPILE_STATE &compileState = 88 state->compileState[target]; 89 90 if (target != 0 && !pipe_blend->independent_blend_enable) { 91 memcpy(&compileState, 92 &state->compileState[0], 93 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE)); 94 continue; 95 } 96 97 compileState.blendEnable = rt_blend->blend_enable; 98 if (compileState.blendEnable) { 99 compileState.sourceAlphaBlendFactor = 100 swr_convert_blend_factor(rt_blend->alpha_src_factor); 101 compileState.destAlphaBlendFactor = 102 swr_convert_blend_factor(rt_blend->alpha_dst_factor); 103 compileState.sourceBlendFactor = 104 swr_convert_blend_factor(rt_blend->rgb_src_factor); 105 compileState.destBlendFactor = 106 swr_convert_blend_factor(rt_blend->rgb_dst_factor); 107 108 compileState.colorBlendFunc = 109 swr_convert_blend_func(rt_blend->rgb_func); 110 compileState.alphaBlendFunc = 111 swr_convert_blend_func(rt_blend->alpha_func); 112 } 113 compileState.logicOpEnable = state->pipe.logicop_enable; 114 if (compileState.logicOpEnable) { 115 compileState.logicOpFunc = 116 swr_convert_logic_op(state->pipe.logicop_func); 117 } 118 119 blendState.writeDisableRed = 120 (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1; 121 blendState.writeDisableGreen = 122 (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1; 123 blendState.writeDisableBlue = 124 (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1; 125 blendState.writeDisableAlpha = 126 (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1; 127 128 if (rt_blend->colormask == 0) 129 compileState.blendEnable = false; 130 } 131 132 return state; 133} 134 135static void 136swr_bind_blend_state(struct pipe_context *pipe, void *blend) 137{ 138 struct swr_context *ctx = swr_context(pipe); 139 140 if (ctx->blend == blend) 141 return; 142 143 ctx->blend = (swr_blend_state *)blend; 144 145 ctx->dirty |= SWR_NEW_BLEND; 146} 147 148static void 149swr_delete_blend_state(struct pipe_context *pipe, void *blend) 150{ 151 FREE(blend); 152} 153 154static void 155swr_set_blend_color(struct pipe_context *pipe, 156 const struct pipe_blend_color *color) 157{ 158 struct swr_context *ctx = swr_context(pipe); 159 160 ctx->blend_color = *color; 161 162 ctx->dirty |= SWR_NEW_BLEND; 163} 164 165static void 166swr_set_stencil_ref(struct pipe_context *pipe, 167 const struct pipe_stencil_ref ref) 168{ 169 struct swr_context *ctx = swr_context(pipe); 170 171 ctx->stencil_ref = ref; 172 173 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; 174} 175 176static void * 177swr_create_depth_stencil_state( 178 struct pipe_context *pipe, 179 const struct pipe_depth_stencil_alpha_state *depth_stencil) 180{ 181 struct pipe_depth_stencil_alpha_state *state; 182 183 state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil, 184 sizeof *depth_stencil); 185 186 return state; 187} 188 189static void 190swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) 191{ 192 struct swr_context *ctx = swr_context(pipe); 193 194 if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil) 195 return; 196 197 ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil; 198 199 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; 200} 201 202static void 203swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) 204{ 205 FREE(depth); 206} 207 208 209static void * 210swr_create_rasterizer_state(struct pipe_context *pipe, 211 const struct pipe_rasterizer_state *rast) 212{ 213 struct pipe_rasterizer_state *state; 214 state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast); 215 216 return state; 217} 218 219static void 220swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle) 221{ 222 struct swr_context *ctx = swr_context(pipe); 223 const struct pipe_rasterizer_state *rasterizer = 224 (const struct pipe_rasterizer_state *)handle; 225 226 if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer) 227 return; 228 229 ctx->rasterizer = (pipe_rasterizer_state *)rasterizer; 230 231 ctx->dirty |= SWR_NEW_RASTERIZER; 232} 233 234static void 235swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) 236{ 237 FREE(rasterizer); 238} 239 240 241static void * 242swr_create_sampler_state(struct pipe_context *pipe, 243 const struct pipe_sampler_state *sampler) 244{ 245 struct pipe_sampler_state *state = 246 (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler); 247 248 return state; 249} 250 251static void 252swr_bind_sampler_states(struct pipe_context *pipe, 253 enum pipe_shader_type shader, 254 unsigned start, 255 unsigned num, 256 void **samplers) 257{ 258 struct swr_context *ctx = swr_context(pipe); 259 unsigned i; 260 261 assert(shader < PIPE_SHADER_TYPES); 262 assert(start + num <= ARRAY_SIZE(ctx->samplers[shader])); 263 264 /* set the new samplers */ 265 ctx->num_samplers[shader] = num; 266 for (i = 0; i < num; i++) { 267 ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i]; 268 } 269 270 ctx->dirty |= SWR_NEW_SAMPLER; 271} 272 273static void 274swr_delete_sampler_state(struct pipe_context *pipe, void *sampler) 275{ 276 FREE(sampler); 277} 278 279 280static struct pipe_sampler_view * 281swr_create_sampler_view(struct pipe_context *pipe, 282 struct pipe_resource *texture, 283 const struct pipe_sampler_view *templ) 284{ 285 struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); 286 287 if (view) { 288 *view = *templ; 289 view->reference.count = 1; 290 view->texture = NULL; 291 pipe_resource_reference(&view->texture, texture); 292 view->context = pipe; 293 } 294 295 return view; 296} 297 298static void 299swr_set_sampler_views(struct pipe_context *pipe, 300 enum pipe_shader_type shader, 301 unsigned start, 302 unsigned num, 303 unsigned unbind_num_trailing_slots, 304 bool take_ownership, 305 struct pipe_sampler_view **views) 306{ 307 struct swr_context *ctx = swr_context(pipe); 308 uint i; 309 310 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); 311 312 assert(shader < PIPE_SHADER_TYPES); 313 assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader])); 314 315 /* set the new sampler views */ 316 ctx->num_sampler_views[shader] = num; 317 for (i = 0; i < num; i++) { 318 if (take_ownership) { 319 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i], 320 NULL); 321 ctx->sampler_views[shader][start + i] = views[i]; 322 } else { 323 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i], 324 views[i]); 325 } 326 } 327 for (; i < num + unbind_num_trailing_slots; i++) { 328 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i], 329 NULL); 330 } 331 332 ctx->dirty |= SWR_NEW_SAMPLER_VIEW; 333} 334 335static void 336swr_sampler_view_destroy(struct pipe_context *pipe, 337 struct pipe_sampler_view *view) 338{ 339 pipe_resource_reference(&view->texture, NULL); 340 FREE(view); 341} 342 343static void * 344swr_create_vs_state(struct pipe_context *pipe, 345 const struct pipe_shader_state *vs) 346{ 347 struct swr_vertex_shader *swr_vs = new swr_vertex_shader; 348 if (!swr_vs) 349 return NULL; 350 351 swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens); 352 swr_vs->pipe.stream_output = vs->stream_output; 353 354 lp_build_tgsi_info(vs->tokens, &swr_vs->info); 355 356 swr_vs->soState = {0}; 357 358 if (swr_vs->pipe.stream_output.num_outputs) { 359 pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output; 360 361 swr_vs->soState.soEnable = true; 362 // soState.rasterizerDisable set on state dirty 363 // soState.streamToRasterizer not used 364 365 for (uint32_t i = 0; i < stream_output->num_outputs; i++) { 366 unsigned attrib_slot = stream_output->output[i].register_index; 367 attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs); 368 swr_vs->soState.streamMasks[stream_output->output[i].stream] |= 369 (1 << attrib_slot); 370 } 371 for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { 372 swr_vs->soState.streamNumEntries[i] = 373 _mm_popcnt_u32(swr_vs->soState.streamMasks[i]); 374 } 375 } 376 377 return swr_vs; 378} 379 380static void 381swr_bind_vs_state(struct pipe_context *pipe, void *vs) 382{ 383 struct swr_context *ctx = swr_context(pipe); 384 385 if (ctx->vs == vs) 386 return; 387 388 ctx->vs = (swr_vertex_shader *)vs; 389 ctx->dirty |= SWR_NEW_VS; 390} 391 392static void 393swr_delete_vs_state(struct pipe_context *pipe, void *vs) 394{ 395 struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs; 396 FREE((void *)swr_vs->pipe.tokens); 397 struct swr_screen *screen = swr_screen(pipe->screen); 398 399 /* Defer deletion of vs state */ 400 swr_fence_work_delete_vs(screen->flush_fence, swr_vs); 401} 402 403static void * 404swr_create_fs_state(struct pipe_context *pipe, 405 const struct pipe_shader_state *fs) 406{ 407 struct swr_fragment_shader *swr_fs = new swr_fragment_shader; 408 if (!swr_fs) 409 return NULL; 410 411 swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens); 412 413 lp_build_tgsi_info(fs->tokens, &swr_fs->info); 414 415 return swr_fs; 416} 417 418 419static void 420swr_bind_fs_state(struct pipe_context *pipe, void *fs) 421{ 422 struct swr_context *ctx = swr_context(pipe); 423 424 if (ctx->fs == fs) 425 return; 426 427 ctx->fs = (swr_fragment_shader *)fs; 428 ctx->dirty |= SWR_NEW_FS; 429} 430 431static void 432swr_delete_fs_state(struct pipe_context *pipe, void *fs) 433{ 434 struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs; 435 FREE((void *)swr_fs->pipe.tokens); 436 struct swr_screen *screen = swr_screen(pipe->screen); 437 438 /* Defer deleton of fs state */ 439 swr_fence_work_delete_fs(screen->flush_fence, swr_fs); 440} 441 442static void * 443swr_create_gs_state(struct pipe_context *pipe, 444 const struct pipe_shader_state *gs) 445{ 446 struct swr_geometry_shader *swr_gs = new swr_geometry_shader; 447 if (!swr_gs) 448 return NULL; 449 450 swr_gs->pipe.tokens = tgsi_dup_tokens(gs->tokens); 451 lp_build_tgsi_info(gs->tokens, &swr_gs->info); 452 return swr_gs; 453} 454 455static void 456swr_bind_gs_state(struct pipe_context *pipe, void *gs) 457{ 458 struct swr_context *ctx = swr_context(pipe); 459 460 if (ctx->gs == gs) 461 return; 462 463 ctx->gs = (swr_geometry_shader *)gs; 464 ctx->dirty |= SWR_NEW_GS; 465} 466 467static void 468swr_delete_gs_state(struct pipe_context *pipe, void *gs) 469{ 470 struct swr_geometry_shader *swr_gs = (swr_geometry_shader *)gs; 471 FREE((void *)swr_gs->pipe.tokens); 472 struct swr_screen *screen = swr_screen(pipe->screen); 473 474 /* Defer deleton of fs state */ 475 swr_fence_work_delete_gs(screen->flush_fence, swr_gs); 476} 477 478static void * 479swr_create_tcs_state(struct pipe_context *pipe, 480 const struct pipe_shader_state *tcs) 481{ 482 struct swr_tess_control_shader *swr_tcs = new swr_tess_control_shader; 483 if (!swr_tcs) 484 return NULL; 485 486 swr_tcs->pipe.tokens = tgsi_dup_tokens(tcs->tokens); 487 lp_build_tgsi_info(tcs->tokens, &swr_tcs->info); 488 return swr_tcs; 489} 490 491static void 492swr_bind_tcs_state(struct pipe_context *pipe, void *tcs) 493{ 494 struct swr_context *ctx = swr_context(pipe); 495 496 if (ctx->tcs == tcs) 497 return; 498 499 ctx->tcs = (swr_tess_control_shader *)tcs; 500 ctx->dirty |= SWR_NEW_TCS; 501 ctx->dirty |= SWR_NEW_TS; 502} 503 504static void 505swr_delete_tcs_state(struct pipe_context *pipe, void *tcs) 506{ 507 struct swr_tess_control_shader *swr_tcs = (swr_tess_control_shader *)tcs; 508 FREE((void *)swr_tcs->pipe.tokens); 509 struct swr_screen *screen = swr_screen(pipe->screen); 510 511 /* Defer deleton of tcs state */ 512 swr_fence_work_delete_tcs(screen->flush_fence, swr_tcs); 513} 514 515static void * 516swr_create_tes_state(struct pipe_context *pipe, 517 const struct pipe_shader_state *tes) 518{ 519 struct swr_tess_evaluation_shader *swr_tes = new swr_tess_evaluation_shader; 520 if (!swr_tes) 521 return NULL; 522 523 swr_tes->pipe.tokens = tgsi_dup_tokens(tes->tokens); 524 lp_build_tgsi_info(tes->tokens, &swr_tes->info); 525 return swr_tes; 526} 527 528static void 529swr_bind_tes_state(struct pipe_context *pipe, void *tes) 530{ 531 struct swr_context *ctx = swr_context(pipe); 532 533 if (ctx->tes == tes) 534 return; 535 536 // Save current tessellator state first 537 if (ctx->tes != nullptr) { 538 ctx->tes->ts_state = ctx->tsState; 539 } 540 541 ctx->tes = (swr_tess_evaluation_shader *)tes; 542 543 ctx->dirty |= SWR_NEW_TES; 544 ctx->dirty |= SWR_NEW_TS; 545} 546 547static void 548swr_delete_tes_state(struct pipe_context *pipe, void *tes) 549{ 550 struct swr_tess_evaluation_shader *swr_tes = (swr_tess_evaluation_shader *)tes; 551 FREE((void *)swr_tes->pipe.tokens); 552 struct swr_screen *screen = swr_screen(pipe->screen); 553 554 /* Defer deleton of tes state */ 555 swr_fence_work_delete_tes(screen->flush_fence, swr_tes); 556} 557 558static void 559swr_set_constant_buffer(struct pipe_context *pipe, 560 enum pipe_shader_type shader, 561 uint index, bool take_ownership, 562 const struct pipe_constant_buffer *cb) 563{ 564 struct swr_context *ctx = swr_context(pipe); 565 struct pipe_resource *constants = cb ? cb->buffer : NULL; 566 567 assert(shader < PIPE_SHADER_TYPES); 568 assert(index < ARRAY_SIZE(ctx->constants[shader])); 569 570 /* note: reference counting */ 571 util_copy_constant_buffer(&ctx->constants[shader][index], cb, take_ownership); 572 573 if (shader == PIPE_SHADER_VERTEX) { 574 ctx->dirty |= SWR_NEW_VSCONSTANTS; 575 } else if (shader == PIPE_SHADER_FRAGMENT) { 576 ctx->dirty |= SWR_NEW_FSCONSTANTS; 577 } else if (shader == PIPE_SHADER_GEOMETRY) { 578 ctx->dirty |= SWR_NEW_GSCONSTANTS; 579 } else if (shader == PIPE_SHADER_TESS_CTRL) { 580 ctx->dirty |= SWR_NEW_TCSCONSTANTS; 581 } else if (shader == PIPE_SHADER_TESS_EVAL) { 582 ctx->dirty |= SWR_NEW_TESCONSTANTS; 583 } 584 if (cb && cb->user_buffer) { 585 pipe_resource_reference(&constants, NULL); 586 } 587} 588 589 590static void * 591swr_create_vertex_elements_state(struct pipe_context *pipe, 592 unsigned num_elements, 593 const struct pipe_vertex_element *attribs) 594{ 595 struct swr_vertex_element_state *velems; 596 assert(num_elements <= PIPE_MAX_ATTRIBS); 597 velems = new swr_vertex_element_state; 598 if (velems) { 599 memset((void*)&velems->fsState, 0, sizeof(velems->fsState)); 600 velems->fsState.bVertexIDOffsetEnable = true; 601 velems->fsState.numAttribs = num_elements; 602 for (unsigned i = 0; i < num_elements; i++) { 603 // XXX: we should do this keyed on the VS usage info 604 605 const struct util_format_description *desc = 606 util_format_description((enum pipe_format)attribs[i].src_format); 607 608 velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset; 609 velems->fsState.layout[i].Format = 610 mesa_to_swr_format((enum pipe_format)attribs[i].src_format); 611 velems->fsState.layout[i].StreamIndex = 612 attribs[i].vertex_buffer_index; 613 velems->fsState.layout[i].InstanceEnable = 614 attribs[i].instance_divisor != 0; 615 velems->fsState.layout[i].ComponentControl0 = 616 desc->channel[0].type != UTIL_FORMAT_TYPE_VOID 617 ? ComponentControl::StoreSrc 618 : ComponentControl::Store0; 619 velems->fsState.layout[i].ComponentControl1 = 620 desc->channel[1].type != UTIL_FORMAT_TYPE_VOID 621 ? ComponentControl::StoreSrc 622 : ComponentControl::Store0; 623 velems->fsState.layout[i].ComponentControl2 = 624 desc->channel[2].type != UTIL_FORMAT_TYPE_VOID 625 ? ComponentControl::StoreSrc 626 : ComponentControl::Store0; 627 velems->fsState.layout[i].ComponentControl3 = 628 desc->channel[3].type != UTIL_FORMAT_TYPE_VOID 629 ? ComponentControl::StoreSrc 630 : ComponentControl::Store1Fp; 631 velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW; 632 velems->fsState.layout[i].InstanceAdvancementState = 633 attribs[i].instance_divisor; 634 635 /* Calculate the pitch of each stream */ 636 const SWR_FORMAT_INFO &swr_desc = GetFormatInfo( 637 mesa_to_swr_format((enum pipe_format)attribs[i].src_format)); 638 velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp; 639 640 if (attribs[i].instance_divisor != 0) { 641 velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index; 642 uint32_t *min_instance_div = 643 &velems->min_instance_div[attribs[i].vertex_buffer_index]; 644 if (!*min_instance_div || 645 attribs[i].instance_divisor < *min_instance_div) 646 *min_instance_div = attribs[i].instance_divisor; 647 } 648 } 649 } 650 651 return velems; 652} 653 654static void 655swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) 656{ 657 struct swr_context *ctx = swr_context(pipe); 658 struct swr_vertex_element_state *swr_velems = 659 (struct swr_vertex_element_state *)velems; 660 661 ctx->velems = swr_velems; 662 ctx->dirty |= SWR_NEW_VERTEX; 663} 664 665static void 666swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) 667{ 668 struct swr_vertex_element_state *swr_velems = 669 (struct swr_vertex_element_state *) velems; 670 /* XXX Need to destroy fetch shader? */ 671 delete swr_velems; 672} 673 674 675static void 676swr_set_vertex_buffers(struct pipe_context *pipe, 677 unsigned start_slot, 678 unsigned num_elements, 679 unsigned unbind_num_trailing_slots, 680 bool take_ownership, 681 const struct pipe_vertex_buffer *buffers) 682{ 683 struct swr_context *ctx = swr_context(pipe); 684 685 assert(num_elements <= PIPE_MAX_ATTRIBS); 686 687 util_set_vertex_buffers_count(ctx->vertex_buffer, 688 &ctx->num_vertex_buffers, 689 buffers, 690 start_slot, 691 num_elements, 692 unbind_num_trailing_slots, 693 take_ownership); 694 695 ctx->dirty |= SWR_NEW_VERTEX; 696} 697 698 699static void 700swr_set_polygon_stipple(struct pipe_context *pipe, 701 const struct pipe_poly_stipple *stipple) 702{ 703 struct swr_context *ctx = swr_context(pipe); 704 705 ctx->poly_stipple.pipe = *stipple; /* struct copy */ 706 ctx->dirty |= SWR_NEW_STIPPLE; 707} 708 709static void 710swr_set_clip_state(struct pipe_context *pipe, 711 const struct pipe_clip_state *clip) 712{ 713 struct swr_context *ctx = swr_context(pipe); 714 715 ctx->clip = *clip; 716 /* XXX Unimplemented, but prevents crash */ 717 718 ctx->dirty |= SWR_NEW_CLIP; 719} 720 721 722static void 723swr_set_scissor_states(struct pipe_context *pipe, 724 unsigned start_slot, 725 unsigned num_scissors, 726 const struct pipe_scissor_state *scissors) 727{ 728 struct swr_context *ctx = swr_context(pipe); 729 730 memcpy(ctx->scissors + start_slot, scissors, 731 sizeof(struct pipe_scissor_state) * num_scissors); 732 733 for (unsigned i = 0; i < num_scissors; i++) { 734 auto idx = start_slot + i; 735 ctx->swr_scissors[idx].xmin = scissors[idx].minx; 736 ctx->swr_scissors[idx].xmax = scissors[idx].maxx; 737 ctx->swr_scissors[idx].ymin = scissors[idx].miny; 738 ctx->swr_scissors[idx].ymax = scissors[idx].maxy; 739 } 740 ctx->dirty |= SWR_NEW_SCISSOR; 741} 742 743static void 744swr_set_viewport_states(struct pipe_context *pipe, 745 unsigned start_slot, 746 unsigned num_viewports, 747 const struct pipe_viewport_state *vpt) 748{ 749 struct swr_context *ctx = swr_context(pipe); 750 751 memcpy(ctx->viewports + start_slot, vpt, sizeof(struct pipe_viewport_state) * num_viewports); 752 ctx->dirty |= SWR_NEW_VIEWPORT; 753} 754 755 756static void 757swr_set_framebuffer_state(struct pipe_context *pipe, 758 const struct pipe_framebuffer_state *fb) 759{ 760 struct swr_context *ctx = swr_context(pipe); 761 762 bool changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb); 763 764 assert(fb->width <= KNOB_GUARDBAND_WIDTH); 765 assert(fb->height <= KNOB_GUARDBAND_HEIGHT); 766 767 if (changed) { 768 util_copy_framebuffer_state(&ctx->framebuffer, fb); 769 770 /* 0 and 1 both indicate no msaa. Core doesn't understand 0 samples */ 771 ctx->framebuffer.samples = std::max((ubyte)1, ctx->framebuffer.samples); 772 773 ctx->dirty |= SWR_NEW_FRAMEBUFFER; 774 } 775} 776 777 778static void 779swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) 780{ 781 struct swr_context *ctx = swr_context(pipe); 782 783 if (sample_mask != ctx->sample_mask) { 784 ctx->sample_mask = sample_mask; 785 ctx->dirty |= SWR_NEW_RASTERIZER; 786 } 787} 788 789/* 790 * MSAA fixed sample position table 791 * used by update_derived and get_sample_position 792 * (integer locations on a 16x16 grid) 793 */ 794static const uint8_t swr_sample_positions[][2] = 795{ /* 1x*/ { 8, 8}, 796 /* 2x*/ {12,12},{ 4, 4}, 797 /* 4x*/ { 6, 2},{14, 6},{ 2,10},{10,14}, 798 /* 8x*/ { 9, 5},{ 7,11},{13, 9},{ 5, 3}, 799 { 3,13},{ 1, 7},{11,15},{15, 1}, 800 /*16x*/ { 9, 9},{ 7, 5},{ 5,10},{12, 7}, 801 { 3, 6},{10,13},{13,11},{11, 3}, 802 { 6,14},{ 8, 1},{ 4, 2},{ 2,12}, 803 { 0, 8},{15, 4},{14,15},{ 1, 0} }; 804 805static void 806swr_get_sample_position(struct pipe_context *pipe, 807 unsigned sample_count, unsigned sample_index, 808 float *out_value) 809{ 810 /* validate sample_count */ 811 sample_count = GetNumSamples(GetSampleCount(sample_count)); 812 813 const uint8_t *sample = swr_sample_positions[sample_count-1 + sample_index]; 814 out_value[0] = sample[0] / 16.0f; 815 out_value[1] = sample[1] / 16.0f; 816} 817 818 819/* 820 * Update resource in-use status 821 * All resources bound to color or depth targets marked as WRITE resources. 822 * VBO Vertex/index buffers and texture views marked as READ resources. 823 */ 824void 825swr_update_resource_status(struct pipe_context *pipe, 826 const struct pipe_draw_info *p_draw_info) 827{ 828 struct swr_context *ctx = swr_context(pipe); 829 struct pipe_framebuffer_state *fb = &ctx->framebuffer; 830 831 /* colorbuffer targets */ 832 if (fb->nr_cbufs) 833 for (uint32_t i = 0; i < fb->nr_cbufs; ++i) 834 if (fb->cbufs[i]) 835 swr_resource_write(fb->cbufs[i]->texture); 836 837 /* depth/stencil target */ 838 if (fb->zsbuf) 839 swr_resource_write(fb->zsbuf->texture); 840 841 /* VBO vertex buffers */ 842 for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) { 843 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; 844 if (!vb->is_user_buffer && vb->buffer.resource) 845 swr_resource_read(vb->buffer.resource); 846 } 847 848 /* VBO index buffer */ 849 if (p_draw_info && p_draw_info->index_size) { 850 if (!p_draw_info->has_user_indices) 851 swr_resource_read(p_draw_info->index.resource); 852 } 853 854 /* transform feedback buffers */ 855 for (uint32_t i = 0; i < ctx->num_so_targets; i++) { 856 struct pipe_stream_output_target *target = ctx->so_targets[i]; 857 if (target && target->buffer) 858 swr_resource_write(target->buffer); 859 } 860 861 /* texture sampler views */ 862 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) { 863 for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) { 864 struct pipe_sampler_view *view = ctx->sampler_views[j][i]; 865 if (view) 866 swr_resource_read(view->texture); 867 } 868 } 869 870 /* constant buffers */ 871 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) { 872 for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 873 struct pipe_constant_buffer *cb = &ctx->constants[j][i]; 874 if (cb->buffer) 875 swr_resource_read(cb->buffer); 876 } 877 } 878} 879 880static void 881swr_update_texture_state(struct swr_context *ctx, 882 enum pipe_shader_type shader_type, 883 unsigned num_sampler_views, 884 swr_jit_texture *textures) 885{ 886 for (unsigned i = 0; i < num_sampler_views; i++) { 887 struct pipe_sampler_view *view = 888 ctx->sampler_views[shader_type][i]; 889 struct swr_jit_texture *jit_tex = &textures[i]; 890 891 memset(jit_tex, 0, sizeof(*jit_tex)); 892 if (view) { 893 struct pipe_resource *res = view->texture; 894 struct swr_resource *swr_res = swr_resource(res); 895 SWR_SURFACE_STATE *swr = &swr_res->swr; 896 size_t *mip_offsets = swr_res->mip_offsets; 897 if (swr_res->has_depth && swr_res->has_stencil && 898 !util_format_has_depth(util_format_description(view->format))) { 899 swr = &swr_res->secondary; 900 mip_offsets = swr_res->secondary_mip_offsets; 901 } 902 903 jit_tex->width = res->width0; 904 jit_tex->height = res->height0; 905 jit_tex->base_ptr = (uint8_t*)swr->xpBaseAddress; 906 jit_tex->num_samples = swr->numSamples; 907 jit_tex->sample_stride = 0; 908 if (view->target != PIPE_BUFFER) { 909 jit_tex->first_level = view->u.tex.first_level; 910 jit_tex->last_level = view->u.tex.last_level; 911 if (view->target == PIPE_TEXTURE_3D) 912 jit_tex->depth = res->depth0; 913 else 914 jit_tex->depth = 915 view->u.tex.last_layer - view->u.tex.first_layer + 1; 916 jit_tex->base_ptr += view->u.tex.first_layer * 917 swr->qpitch * swr->pitch; 918 } else { 919 unsigned view_blocksize = util_format_get_blocksize(view->format); 920 jit_tex->base_ptr += view->u.buf.offset; 921 jit_tex->width = view->u.buf.size / view_blocksize; 922 jit_tex->depth = 1; 923 } 924 925 for (unsigned level = jit_tex->first_level; 926 level <= jit_tex->last_level; 927 level++) { 928 jit_tex->row_stride[level] = swr->pitch; 929 jit_tex->img_stride[level] = swr->qpitch * swr->pitch; 930 jit_tex->mip_offsets[level] = mip_offsets[level]; 931 } 932 } 933 } 934} 935 936static void 937swr_update_sampler_state(struct swr_context *ctx, 938 enum pipe_shader_type shader_type, 939 unsigned num_samplers, 940 swr_jit_sampler *samplers) 941{ 942 for (unsigned i = 0; i < num_samplers; i++) { 943 const struct pipe_sampler_state *sampler = 944 ctx->samplers[shader_type][i]; 945 946 if (sampler) { 947 samplers[i].min_lod = sampler->min_lod; 948 samplers[i].max_lod = sampler->max_lod; 949 samplers[i].lod_bias = sampler->lod_bias; 950 COPY_4V(samplers[i].border_color, sampler->border_color.f); 951 } 952 } 953} 954 955static void 956swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType) 957{ 958 swr_draw_context *pDC = &ctx->swrDC; 959 960 const float **constant; 961 uint32_t *num_constants; 962 struct swr_scratch_space *scratch; 963 964 switch (shaderType) { 965 case PIPE_SHADER_VERTEX: 966 constant = pDC->constantVS; 967 num_constants = pDC->num_constantsVS; 968 scratch = &ctx->scratch->vs_constants; 969 break; 970 case PIPE_SHADER_FRAGMENT: 971 constant = pDC->constantFS; 972 num_constants = pDC->num_constantsFS; 973 scratch = &ctx->scratch->fs_constants; 974 break; 975 case PIPE_SHADER_GEOMETRY: 976 constant = pDC->constantGS; 977 num_constants = pDC->num_constantsGS; 978 scratch = &ctx->scratch->gs_constants; 979 break; 980 case PIPE_SHADER_TESS_CTRL: 981 constant = pDC->constantTCS; 982 num_constants = pDC->num_constantsTCS; 983 scratch = &ctx->scratch->tcs_constants; 984 break; 985 case PIPE_SHADER_TESS_EVAL: 986 constant = pDC->constantTES; 987 num_constants = pDC->num_constantsTES; 988 scratch = &ctx->scratch->tes_constants; 989 break; 990 default: 991 assert(0 && "Unsupported shader type constants"); 992 return; 993 } 994 995 for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 996 const pipe_constant_buffer *cb = &ctx->constants[shaderType][i]; 997 num_constants[i] = cb->buffer_size; 998 if (cb->buffer) { 999 constant[i] = 1000 (const float *)(swr_resource_data(cb->buffer) + 1001 cb->buffer_offset); 1002 } else { 1003 /* Need to copy these constants to scratch space */ 1004 if (cb->user_buffer && cb->buffer_size) { 1005 const void *ptr = 1006 ((const uint8_t *)cb->user_buffer + cb->buffer_offset); 1007 uint32_t size = AlignUp(cb->buffer_size, 4); 1008 ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size); 1009 constant[i] = (const float *)ptr; 1010 } 1011 } 1012 } 1013} 1014 1015static bool 1016swr_change_rt(struct swr_context *ctx, 1017 unsigned attachment, 1018 const struct pipe_surface *sf) 1019{ 1020 swr_draw_context *pDC = &ctx->swrDC; 1021 struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment]; 1022 1023 /* Do nothing if the render target hasn't changed */ 1024 if ((!sf || !sf->texture) && (void*)(rt->xpBaseAddress) == nullptr) 1025 return false; 1026 1027 /* Deal with disabling RT up front */ 1028 if (!sf || !sf->texture) { 1029 /* If detaching attachment, mark tiles as RESOLVED so core 1030 * won't try to load from non-existent target. */ 1031 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED); 1032 *rt = {0}; 1033 return true; 1034 } 1035 1036 const struct swr_resource *swr = swr_resource(sf->texture); 1037 const SWR_SURFACE_STATE *swr_surface = &swr->swr; 1038 SWR_FORMAT fmt = mesa_to_swr_format(sf->format); 1039 1040 if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.xpBaseAddress) { 1041 swr_surface = &swr->secondary; 1042 fmt = swr_surface->format; 1043 } 1044 1045 if (rt->xpBaseAddress == swr_surface->xpBaseAddress && 1046 rt->format == fmt && 1047 rt->lod == sf->u.tex.level && 1048 rt->arrayIndex == sf->u.tex.first_layer) 1049 return false; 1050 1051 bool need_fence = false; 1052 1053 /* StoreTile for changed target */ 1054 if (rt->xpBaseAddress) { 1055 /* If changing attachment to a new target, mark tiles as 1056 * INVALID so they are reloaded from surface. */ 1057 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID); 1058 need_fence = true; 1059 } else { 1060 /* if no previous attachment, invalidate tiles that may be marked 1061 * RESOLVED because of an old attachment */ 1062 swr_invalidate_render_target(&ctx->pipe, attachment, sf->width, sf->height); 1063 /* no need to set fence here */ 1064 } 1065 1066 /* Make new attachment */ 1067 *rt = *swr_surface; 1068 rt->format = fmt; 1069 rt->lod = sf->u.tex.level; 1070 rt->arrayIndex = sf->u.tex.first_layer; 1071 1072 return need_fence; 1073} 1074 1075/* 1076 * for cases where resources are shared between contexts, invalidate 1077 * this ctx's resource. so it can be fetched fresh. Old ctx's resource 1078 * is already stored during a flush 1079 */ 1080static inline void 1081swr_invalidate_buffers_after_ctx_change(struct pipe_context *pipe) 1082{ 1083 struct swr_context *ctx = swr_context(pipe); 1084 1085 for (uint32_t i = 0; i < ctx->framebuffer.nr_cbufs; i++) { 1086 struct pipe_surface *cb = ctx->framebuffer.cbufs[i]; 1087 if (cb) { 1088 struct swr_resource *res = swr_resource(cb->texture); 1089 if (res->curr_pipe != pipe) { 1090 /* if curr_pipe is NULL (first use), status should not be WRITE */ 1091 assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE)); 1092 if (res->status & SWR_RESOURCE_WRITE) { 1093 swr_invalidate_render_target(pipe, i, cb->width, cb->height); 1094 } 1095 } 1096 res->curr_pipe = pipe; 1097 } 1098 } 1099 if (ctx->framebuffer.zsbuf) { 1100 struct pipe_surface *zb = ctx->framebuffer.zsbuf; 1101 if (zb) { 1102 struct swr_resource *res = swr_resource(zb->texture); 1103 if (res->curr_pipe != pipe) { 1104 /* if curr_pipe is NULL (first use), status should not be WRITE */ 1105 assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE)); 1106 if (res->status & SWR_RESOURCE_WRITE) { 1107 swr_invalidate_render_target(pipe, SWR_ATTACHMENT_DEPTH, zb->width, zb->height); 1108 swr_invalidate_render_target(pipe, SWR_ATTACHMENT_STENCIL, zb->width, zb->height); 1109 } 1110 } 1111 res->curr_pipe = pipe; 1112 } 1113 } 1114} 1115 1116static inline void 1117swr_user_vbuf_range(const struct pipe_draw_info *info, 1118 const struct swr_vertex_element_state *velems, 1119 const struct pipe_vertex_buffer *vb, 1120 uint32_t i, 1121 uint32_t *totelems, 1122 uint32_t *base, 1123 uint32_t *size, 1124 int index_bias) 1125{ 1126 /* FIXME: The size is too large - we don't access the full extra stride. */ 1127 unsigned elems; 1128 unsigned elem_pitch = vb->stride + velems->stream_pitch[i]; 1129 if (velems->instanced_bufs & (1U << i)) { 1130 elems = info->instance_count / velems->min_instance_div[i] + 1; 1131 *totelems = info->start_instance + elems; 1132 *base = info->start_instance * vb->stride; 1133 *size = elems * elem_pitch; 1134 } else if (vb->stride) { 1135 elems = info->max_index - info->min_index + 1; 1136 *totelems = (info->max_index + (info->index_size ? index_bias : 0)) + 1; 1137 *base = (info->min_index + (info->index_size ? index_bias : 0)) * vb->stride; 1138 *size = elems * elem_pitch; 1139 } else { 1140 *totelems = 1; 1141 *base = 0; 1142 *size = velems->stream_pitch[i]; 1143 } 1144} 1145 1146static void 1147swr_update_poly_stipple(struct swr_context *ctx) 1148{ 1149 struct swr_draw_context *pDC = &ctx->swrDC; 1150 1151 assert(sizeof(ctx->poly_stipple.pipe.stipple) == sizeof(pDC->polyStipple)); 1152 memcpy(pDC->polyStipple, 1153 ctx->poly_stipple.pipe.stipple, 1154 sizeof(ctx->poly_stipple.pipe.stipple)); 1155} 1156 1157 1158static struct tgsi_shader_info * 1159swr_get_last_fe(const struct swr_context *ctx) 1160{ 1161 tgsi_shader_info *pLastFE = &ctx->vs->info.base; 1162 1163 if (ctx->gs) { 1164 pLastFE = &ctx->gs->info.base; 1165 } 1166 else if (ctx->tes) { 1167 pLastFE = &ctx->tes->info.base; 1168 } 1169 else if (ctx->tcs) { 1170 pLastFE = &ctx->tcs->info.base; 1171 } 1172 return pLastFE; 1173} 1174 1175 1176void 1177swr_update_derived(struct pipe_context *pipe, 1178 const struct pipe_draw_info *p_draw_info, 1179 const struct pipe_draw_start_count_bias *draw) 1180{ 1181 struct swr_context *ctx = swr_context(pipe); 1182 struct swr_screen *screen = swr_screen(pipe->screen); 1183 1184 /* When called from swr_clear (p_draw_info = null), set any null 1185 * state-objects to the dummy state objects to prevent nullptr dereference 1186 * in validation below. 1187 * 1188 * Important that this remains static for zero initialization. These 1189 * aren't meant to be proper state objects, just empty structs. They will 1190 * not be written to. 1191 * 1192 * Shaders can't be part of the union since they contain std::unordered_map 1193 */ 1194 static struct { 1195 union { 1196 struct pipe_rasterizer_state rasterizer; 1197 struct pipe_depth_stencil_alpha_state depth_stencil; 1198 struct swr_blend_state blend; 1199 } state; 1200 struct swr_vertex_shader vs; 1201 struct swr_fragment_shader fs; 1202 } swr_dummy; 1203 1204 if (!p_draw_info) { 1205 if (!ctx->rasterizer) 1206 ctx->rasterizer = &swr_dummy.state.rasterizer; 1207 if (!ctx->depth_stencil) 1208 ctx->depth_stencil = &swr_dummy.state.depth_stencil; 1209 if (!ctx->blend) 1210 ctx->blend = &swr_dummy.state.blend; 1211 if (!ctx->vs) 1212 ctx->vs = &swr_dummy.vs; 1213 if (!ctx->fs) 1214 ctx->fs = &swr_dummy.fs; 1215 } 1216 1217 /* Update screen->pipe to current pipe context. */ 1218 screen->pipe = pipe; 1219 1220 /* Any state that requires dirty flags to be re-triggered sets this mask */ 1221 /* For example, user_buffer vertex and index buffers. */ 1222 unsigned post_update_dirty_flags = 0; 1223 1224 /* bring resources that changed context up-to-date */ 1225 swr_invalidate_buffers_after_ctx_change(pipe); 1226 1227 /* Render Targets */ 1228 if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { 1229 struct pipe_framebuffer_state *fb = &ctx->framebuffer; 1230 const struct util_format_description *desc = NULL; 1231 bool need_fence = false; 1232 1233 /* colorbuffer targets */ 1234 if (fb->nr_cbufs) { 1235 for (unsigned i = 0; i < fb->nr_cbufs; ++i) 1236 need_fence |= swr_change_rt( 1237 ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]); 1238 } 1239 for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i) 1240 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL); 1241 1242 /* depth/stencil target */ 1243 if (fb->zsbuf) 1244 desc = util_format_description(fb->zsbuf->format); 1245 if (fb->zsbuf && util_format_has_depth(desc)) 1246 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf); 1247 else 1248 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL); 1249 1250 if (fb->zsbuf && util_format_has_stencil(desc)) 1251 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf); 1252 else 1253 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL); 1254 1255 /* This fence ensures any attachment changes are resolved before the 1256 * next draw */ 1257 if (need_fence) 1258 swr_fence_submit(ctx, screen->flush_fence); 1259 } 1260 1261 /* Raster state */ 1262 if (ctx->dirty & (SWR_NEW_RASTERIZER | 1263 SWR_NEW_VS | // clipping 1264 SWR_NEW_TES | 1265 SWR_NEW_TCS | 1266 SWR_NEW_FRAMEBUFFER)) { 1267 pipe_rasterizer_state *rasterizer = ctx->rasterizer; 1268 pipe_framebuffer_state *fb = &ctx->framebuffer; 1269 1270 SWR_RASTSTATE *rastState = &ctx->derived.rastState; 1271 rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face); 1272 rastState->frontWinding = rasterizer->front_ccw 1273 ? SWR_FRONTWINDING_CCW 1274 : SWR_FRONTWINDING_CW; 1275 rastState->scissorEnable = rasterizer->scissor; 1276 rastState->pointSize = rasterizer->point_size > 0.0f 1277 ? rasterizer->point_size 1278 : 1.0f; 1279 rastState->lineWidth = rasterizer->line_width > 0.0f 1280 ? rasterizer->line_width 1281 : 1.0f; 1282 1283 rastState->pointParam = rasterizer->point_size_per_vertex; 1284 1285 rastState->pointSpriteEnable = rasterizer->sprite_coord_enable; 1286 rastState->pointSpriteTopOrigin = 1287 rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT; 1288 1289 /* If SWR_MSAA_FORCE_ENABLE is set, turn msaa on */ 1290 if (screen->msaa_force_enable && !rasterizer->multisample) { 1291 /* Force enable and use the value the surface was created with */ 1292 rasterizer->multisample = true; 1293 fb->samples = swr_resource(fb->cbufs[0]->texture)->swr.numSamples; 1294 fprintf(stderr,"msaa force enable: %d samples\n", fb->samples); 1295 } 1296 1297 rastState->sampleCount = GetSampleCount(fb->samples); 1298 rastState->forcedSampleCount = false; 1299 rastState->bIsCenterPattern = !rasterizer->multisample; 1300 rastState->pixelLocation = SWR_PIXEL_LOCATION_CENTER; 1301 1302 /* Only initialize sample positions if msaa is enabled */ 1303 if (rasterizer->multisample) { 1304 for (uint32_t i = 0; i < fb->samples; i++) { 1305 const uint8_t *sample = swr_sample_positions[fb->samples-1 + i]; 1306 rastState->samplePositions.SetXi(i, sample[0] << 4); 1307 rastState->samplePositions.SetYi(i, sample[1] << 4); 1308 rastState->samplePositions.SetX (i, sample[0] / 16.0f); 1309 rastState->samplePositions.SetY (i, sample[1] / 16.0f); 1310 } 1311 rastState->samplePositions.PrecalcSampleData(fb->samples); 1312 } 1313 1314 bool do_offset = false; 1315 switch (rasterizer->fill_front) { 1316 case PIPE_POLYGON_MODE_FILL: 1317 do_offset = rasterizer->offset_tri; 1318 break; 1319 case PIPE_POLYGON_MODE_LINE: 1320 do_offset = rasterizer->offset_line; 1321 break; 1322 case PIPE_POLYGON_MODE_POINT: 1323 do_offset = rasterizer->offset_point; 1324 break; 1325 } 1326 1327 if (do_offset) { 1328 rastState->depthBias = rasterizer->offset_units; 1329 rastState->slopeScaledDepthBias = rasterizer->offset_scale; 1330 rastState->depthBiasClamp = rasterizer->offset_clamp; 1331 } else { 1332 rastState->depthBias = 0; 1333 rastState->slopeScaledDepthBias = 0; 1334 rastState->depthBiasClamp = 0; 1335 } 1336 1337 /* translate polygon mode, at least for the front==back case */ 1338 rastState->fillMode = swr_convert_fill_mode(rasterizer->fill_front); 1339 1340 struct pipe_surface *zb = fb->zsbuf; 1341 if (zb && swr_resource(zb->texture)->has_depth) 1342 rastState->depthFormat = swr_resource(zb->texture)->swr.format; 1343 1344 rastState->depthClipEnable = rasterizer->depth_clip_near; 1345 rastState->clipEnable = rasterizer->depth_clip_near | rasterizer->depth_clip_far; 1346 rastState->clipHalfZ = rasterizer->clip_halfz; 1347 1348 ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState); 1349 } 1350 1351 /* Viewport */ 1352 if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER 1353 | SWR_NEW_RASTERIZER)) { 1354 pipe_viewport_state *state = &ctx->viewports[0]; 1355 pipe_framebuffer_state *fb = &ctx->framebuffer; 1356 pipe_rasterizer_state *rasterizer = ctx->rasterizer; 1357 1358 SWR_VIEWPORT *vp = &ctx->derived.vp[0]; 1359 SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm; 1360 1361 for (unsigned i = 0; i < KNOB_NUM_VIEWPORTS_SCISSORS; i++) { 1362 vp->x = state->translate[0] - state->scale[0]; 1363 vp->width = 2 * state->scale[0]; 1364 vp->y = state->translate[1] - fabs(state->scale[1]); 1365 vp->height = 2 * fabs(state->scale[1]); 1366 util_viewport_zmin_zmax(state, rasterizer->clip_halfz, 1367 &vp->minZ, &vp->maxZ); 1368 1369 if (rasterizer->depth_clip_near) { 1370 vp->minZ = 0.0f; 1371 } 1372 1373 if (rasterizer->depth_clip_far) { 1374 vp->maxZ = 1.0f; 1375 } 1376 1377 vpm->m00[i] = state->scale[0]; 1378 vpm->m11[i] = state->scale[1]; 1379 vpm->m22[i] = state->scale[2]; 1380 vpm->m30[i] = state->translate[0]; 1381 vpm->m31[i] = state->translate[1]; 1382 vpm->m32[i] = state->translate[2]; 1383 1384 /* Now that the matrix is calculated, clip the view coords to screen 1385 * size. OpenGL allows for -ve x,y in the viewport. */ 1386 if (vp->x < 0.0f) { 1387 vp->width += vp->x; 1388 vp->x = 0.0f; 1389 } 1390 if (vp->y < 0.0f) { 1391 vp->height += vp->y; 1392 vp->y = 0.0f; 1393 } 1394 vp->width = std::min(vp->width, (float) fb->width - vp->x); 1395 vp->height = std::min(vp->height, (float) fb->height - vp->y); 1396 1397 vp++; 1398 state++; 1399 } 1400 ctx->api.pfnSwrSetViewports(ctx->swrContext, KNOB_NUM_VIEWPORTS_SCISSORS, 1401 &ctx->derived.vp[0], &ctx->derived.vpm); 1402 } 1403 1404 /* When called from swr_clear (p_draw_info = null), render targets, 1405 * rasterState and viewports (dependent on render targets) are the only 1406 * necessary validation. Defer remaining validation by setting 1407 * post_update_dirty_flags and clear all dirty flags. BackendState is 1408 * still unconditionally validated below */ 1409 if (!p_draw_info) { 1410 post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER | 1411 SWR_NEW_RASTERIZER | 1412 SWR_NEW_VIEWPORT); 1413 ctx->dirty = 0; 1414 } 1415 1416 /* Scissor */ 1417 if (ctx->dirty & SWR_NEW_SCISSOR) { 1418 ctx->api.pfnSwrSetScissorRects(ctx->swrContext, KNOB_NUM_VIEWPORTS_SCISSORS, ctx->swr_scissors); 1419 } 1420 1421 /* Set vertex & index buffers */ 1422 if (ctx->dirty & SWR_NEW_VERTEX) { 1423 const struct pipe_draw_info &info = *p_draw_info; 1424 1425 /* vertex buffers */ 1426 SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; 1427 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { 1428 uint32_t size = 0, pitch = 0, elems = 0, partial_inbounds = 0; 1429 uint32_t min_vertex_index = 0; 1430 const uint8_t *p_data; 1431 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; 1432 1433 pitch = vb->stride; 1434 if (vb->is_user_buffer) { 1435 /* Client buffer 1436 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to 1437 * revalidate on each draw */ 1438 post_update_dirty_flags |= SWR_NEW_VERTEX; 1439 1440 uint32_t base; 1441 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size, draw->index_bias); 1442 partial_inbounds = 0; 1443 min_vertex_index = info.min_index + (info.index_size ? draw->index_bias : 0); 1444 1445 size = AlignUp(size, 4); 1446 /* If size of client memory copy is too large, don't copy. The 1447 * draw will access user-buffer directly and then block. This is 1448 * faster than queuing many large client draws. */ 1449 if (size >= screen->client_copy_limit) { 1450 post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; 1451 p_data = (const uint8_t *) vb->buffer.user; 1452 } else { 1453 /* Copy only needed vertices to scratch space */ 1454 const void *ptr = (const uint8_t *) vb->buffer.user + base; 1455 ptr = (uint8_t *)swr_copy_to_scratch_space( 1456 ctx, &ctx->scratch->vertex_buffer, ptr, size); 1457 p_data = (const uint8_t *)ptr - base; 1458 } 1459 } else if (vb->buffer.resource) { 1460 /* VBO */ 1461 if (!pitch) { 1462 /* If pitch=0 (ie vb->stride), buffer contains a single 1463 * constant attribute. Use the stream_pitch which was 1464 * calculated during creation of vertex_elements_state for the 1465 * size of the attribute. */ 1466 size = ctx->velems->stream_pitch[i]; 1467 elems = 1; 1468 partial_inbounds = 0; 1469 min_vertex_index = 0; 1470 } else { 1471 /* size is based on buffer->width0 rather than info.max_index 1472 * to prevent having to validate VBO on each draw. */ 1473 size = vb->buffer.resource->width0; 1474 elems = size / pitch; 1475 partial_inbounds = size % pitch; 1476 min_vertex_index = 0; 1477 } 1478 1479 p_data = swr_resource_data(vb->buffer.resource) + vb->buffer_offset; 1480 } else 1481 p_data = NULL; 1482 1483 swrVertexBuffers[i] = {0}; 1484 swrVertexBuffers[i].index = i; 1485 swrVertexBuffers[i].pitch = pitch; 1486 swrVertexBuffers[i].xpData = (gfxptr_t) p_data; 1487 swrVertexBuffers[i].size = size; 1488 swrVertexBuffers[i].minVertex = min_vertex_index; 1489 swrVertexBuffers[i].maxVertex = elems; 1490 swrVertexBuffers[i].partialInboundsSize = partial_inbounds; 1491 } 1492 1493 ctx->api.pfnSwrSetVertexBuffers( 1494 ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers); 1495 1496 /* index buffer, if required (info passed in by swr_draw_vbo) */ 1497 SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */ 1498 if (info.index_size) { 1499 const uint8_t *p_data; 1500 uint32_t size, pitch; 1501 1502 pitch = info.index_size ? info.index_size : sizeof(uint32_t); 1503 index_type = swr_convert_index_type(pitch); 1504 1505 if (!info.has_user_indices) { 1506 /* VBO 1507 * size is based on buffer->width0 rather than info.count 1508 * to prevent having to validate VBO on each draw */ 1509 size = info.index.resource->width0; 1510 p_data = swr_resource_data(info.index.resource); 1511 } else { 1512 /* Client buffer 1513 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to 1514 * revalidate on each draw */ 1515 post_update_dirty_flags |= SWR_NEW_VERTEX; 1516 1517 size = draw->count * pitch; 1518 1519 size = AlignUp(size, 4); 1520 /* If size of client memory copy is too large, don't copy. The 1521 * draw will access user-buffer directly and then block. This is 1522 * faster than queuing many large client draws. */ 1523 if (size >= screen->client_copy_limit) { 1524 post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; 1525 p_data = (const uint8_t *) info.index.user + 1526 draw->start * info.index_size; 1527 } else { 1528 /* Copy indices to scratch space */ 1529 const void *ptr = (char*)info.index.user + 1530 draw->start * info.index_size; 1531 ptr = swr_copy_to_scratch_space( 1532 ctx, &ctx->scratch->index_buffer, ptr, size); 1533 p_data = (const uint8_t *)ptr; 1534 } 1535 } 1536 1537 SWR_INDEX_BUFFER_STATE swrIndexBuffer; 1538 swrIndexBuffer.format = swr_convert_index_type(info.index_size); 1539 swrIndexBuffer.xpIndices = (gfxptr_t) p_data; 1540 swrIndexBuffer.size = size; 1541 1542 ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); 1543 } 1544 1545 struct swr_vertex_element_state *velems = ctx->velems; 1546 if (velems && velems->fsState.indexType != index_type) { 1547 velems->fsFunc = NULL; 1548 velems->fsState.indexType = index_type; 1549 } 1550 } 1551 1552 /* GeometryShader */ 1553 if (ctx->dirty & (SWR_NEW_GS | 1554 SWR_NEW_VS | 1555 SWR_NEW_TCS | 1556 SWR_NEW_TES | 1557 SWR_NEW_SAMPLER | 1558 SWR_NEW_SAMPLER_VIEW)) { 1559 if (ctx->gs) { 1560 swr_jit_gs_key key; 1561 swr_generate_gs_key(key, ctx, ctx->gs); 1562 auto search = ctx->gs->map.find(key); 1563 PFN_GS_FUNC func; 1564 if (search != ctx->gs->map.end()) { 1565 func = search->second->shader; 1566 } else { 1567 func = swr_compile_gs(ctx, key); 1568 } 1569 ctx->api.pfnSwrSetGsFunc(ctx->swrContext, func); 1570 1571 /* JIT sampler state */ 1572 if (ctx->dirty & SWR_NEW_SAMPLER) { 1573 swr_update_sampler_state(ctx, 1574 PIPE_SHADER_GEOMETRY, 1575 key.nr_samplers, 1576 ctx->swrDC.samplersGS); 1577 } 1578 1579 /* JIT sampler view state */ 1580 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { 1581 swr_update_texture_state(ctx, 1582 PIPE_SHADER_GEOMETRY, 1583 key.nr_sampler_views, 1584 ctx->swrDC.texturesGS); 1585 } 1586 1587 ctx->api.pfnSwrSetGsState(ctx->swrContext, &ctx->gs->gsState); 1588 } else { 1589 SWR_GS_STATE state = { 0 }; 1590 ctx->api.pfnSwrSetGsState(ctx->swrContext, &state); 1591 ctx->api.pfnSwrSetGsFunc(ctx->swrContext, NULL); 1592 } 1593 } 1594 1595 // We may need to restore tessellation state 1596 // This restored state may be however overwritten 1597 // during shader compilation 1598 if (ctx->dirty & SWR_NEW_TS) { 1599 if (ctx->tes != nullptr) { 1600 ctx->tsState = ctx->tes->ts_state; 1601 ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState); 1602 } else { 1603 SWR_TS_STATE state = { 0 }; 1604 ctx->api.pfnSwrSetTsState(ctx->swrContext, &state); 1605 } 1606 } 1607 1608 // Tessellation Evaluation Shader 1609 // Compile TES first, because TCS is optional 1610 if (ctx->dirty & (SWR_NEW_GS | 1611 SWR_NEW_VS | 1612 SWR_NEW_TCS | 1613 SWR_NEW_TES | 1614 SWR_NEW_SAMPLER | 1615 SWR_NEW_SAMPLER_VIEW)) { 1616 if (ctx->tes) { 1617 swr_jit_tes_key key; 1618 swr_generate_tes_key(key, ctx, ctx->tes); 1619 1620 auto search = ctx->tes->map.find(key); 1621 PFN_TES_FUNC func; 1622 if (search != ctx->tes->map.end()) { 1623 func = search->second->shader; 1624 } else { 1625 func = swr_compile_tes(ctx, key); 1626 } 1627 1628 ctx->api.pfnSwrSetDsFunc(ctx->swrContext, func); 1629 1630 /* JIT sampler state */ 1631 if (ctx->dirty & SWR_NEW_SAMPLER) { 1632 swr_update_sampler_state(ctx, 1633 PIPE_SHADER_TESS_EVAL, 1634 key.nr_samplers, 1635 ctx->swrDC.samplersTES); 1636 } 1637 1638 /* JIT sampler view state */ 1639 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { 1640 swr_update_texture_state(ctx, 1641 PIPE_SHADER_TESS_EVAL, 1642 key.nr_sampler_views, 1643 ctx->swrDC.texturesTES); 1644 } 1645 1646 // Update tessellation state in case it's been updated 1647 ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState); 1648 } else { 1649 ctx->api.pfnSwrSetDsFunc(ctx->swrContext, NULL); 1650 } 1651 } 1652 1653 /* Tessellation Control Shader */ 1654 if (ctx->dirty & (SWR_NEW_GS | 1655 SWR_NEW_VS | 1656 SWR_NEW_TCS | 1657 SWR_NEW_TES | 1658 SWR_NEW_SAMPLER | 1659 SWR_NEW_SAMPLER_VIEW)) { 1660 if (ctx->tcs) { 1661 ctx->tcs->vertices_per_patch = ctx->patch_vertices; 1662 1663 swr_jit_tcs_key key; 1664 swr_generate_tcs_key(key, ctx, ctx->tcs); 1665 1666 auto search = ctx->tcs->map.find(key); 1667 PFN_TCS_FUNC func; 1668 if (search != ctx->tcs->map.end()) { 1669 func = search->second->shader; 1670 } else { 1671 func = swr_compile_tcs(ctx, key); 1672 } 1673 1674 ctx->api.pfnSwrSetHsFunc(ctx->swrContext, func); 1675 1676 /* JIT sampler state */ 1677 if (ctx->dirty & SWR_NEW_SAMPLER) { 1678 swr_update_sampler_state(ctx, 1679 PIPE_SHADER_TESS_CTRL, 1680 key.nr_samplers, 1681 ctx->swrDC.samplersTCS); 1682 } 1683 1684 /* JIT sampler view state */ 1685 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { 1686 swr_update_texture_state(ctx, 1687 PIPE_SHADER_TESS_CTRL, 1688 key.nr_sampler_views, 1689 ctx->swrDC.texturesTCS); 1690 } 1691 1692 // Update tessellation state in case it's been updated 1693 ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState); 1694 } else { 1695 ctx->api.pfnSwrSetHsFunc(ctx->swrContext, NULL); 1696 } 1697 } 1698 1699 /* VertexShader */ 1700 if (ctx->dirty 1701 & (SWR_NEW_VS | SWR_NEW_RASTERIZER | // for clip planes 1702 SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { 1703 swr_jit_vs_key key; 1704 swr_generate_vs_key(key, ctx, ctx->vs); 1705 auto search = ctx->vs->map.find(key); 1706 PFN_VERTEX_FUNC func; 1707 if (search != ctx->vs->map.end()) { 1708 func = search->second->shader; 1709 } else { 1710 func = swr_compile_vs(ctx, key); 1711 } 1712 ctx->api.pfnSwrSetVertexFunc(ctx->swrContext, func); 1713 1714 /* JIT sampler state */ 1715 if (ctx->dirty & SWR_NEW_SAMPLER) { 1716 swr_update_sampler_state( 1717 ctx, PIPE_SHADER_VERTEX, key.nr_samplers, ctx->swrDC.samplersVS); 1718 } 1719 1720 /* JIT sampler view state */ 1721 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { 1722 swr_update_texture_state(ctx, 1723 PIPE_SHADER_VERTEX, 1724 key.nr_sampler_views, 1725 ctx->swrDC.texturesVS); 1726 } 1727 } 1728 1729 /* work around the fact that poly stipple also affects lines */ 1730 /* and points, since we rasterize them as triangles, too */ 1731 /* Has to be before fragment shader, since it sets SWR_NEW_FS */ 1732 if (p_draw_info) { 1733 bool new_prim_is_poly = 1734 (u_reduced_prim((enum pipe_prim_type)p_draw_info->mode) == PIPE_PRIM_TRIANGLES) && 1735 (ctx->derived.rastState.fillMode == SWR_FILLMODE_SOLID); 1736 if (new_prim_is_poly != ctx->poly_stipple.prim_is_poly) { 1737 ctx->dirty |= SWR_NEW_FS; 1738 ctx->poly_stipple.prim_is_poly = new_prim_is_poly; 1739 } 1740 } 1741 1742 /* FragmentShader */ 1743 if (ctx->dirty & (SWR_NEW_FS | 1744 SWR_NEW_VS | 1745 SWR_NEW_GS | 1746 SWR_NEW_TES | 1747 SWR_NEW_TCS | 1748 SWR_NEW_RASTERIZER | 1749 SWR_NEW_SAMPLER | 1750 SWR_NEW_SAMPLER_VIEW | 1751 SWR_NEW_FRAMEBUFFER)) { 1752 swr_jit_fs_key key; 1753 swr_generate_fs_key(key, ctx, ctx->fs); 1754 auto search = ctx->fs->map.find(key); 1755 PFN_PIXEL_KERNEL func; 1756 if (search != ctx->fs->map.end()) { 1757 func = search->second->shader; 1758 } else { 1759 func = swr_compile_fs(ctx, key); 1760 } 1761 SWR_PS_STATE psState = {0}; 1762 psState.pfnPixelShader = func; 1763 psState.killsPixel = ctx->fs->info.base.uses_kill; 1764 psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL; 1765 psState.writesODepth = ctx->fs->info.base.writes_z; 1766 psState.usesSourceDepth = ctx->fs->info.base.reads_z; 1767 psState.shadingRate = SWR_SHADING_RATE_PIXEL; 1768 psState.renderTargetMask = (1 << ctx->framebuffer.nr_cbufs) - 1; 1769 psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; 1770 uint32_t barycentricsMask = 0; 1771#if 0 1772 // when we switch to mesa-master 1773 if (ctx->fs->info.base.uses_persp_center || 1774 ctx->fs->info.base.uses_linear_center) 1775 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK; 1776 if (ctx->fs->info.base.uses_persp_centroid || 1777 ctx->fs->info.base.uses_linear_centroid) 1778 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK; 1779 if (ctx->fs->info.base.uses_persp_sample || 1780 ctx->fs->info.base.uses_linear_sample) 1781 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK; 1782#else 1783 for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) { 1784 switch (ctx->fs->info.base.input_interpolate_loc[i]) { 1785 case TGSI_INTERPOLATE_LOC_CENTER: 1786 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK; 1787 break; 1788 case TGSI_INTERPOLATE_LOC_CENTROID: 1789 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK; 1790 break; 1791 case TGSI_INTERPOLATE_LOC_SAMPLE: 1792 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK; 1793 break; 1794 } 1795 } 1796#endif 1797 psState.barycentricsMask = barycentricsMask; 1798 psState.usesUAV = false; // XXX 1799 psState.forceEarlyZ = false; 1800 ctx->api.pfnSwrSetPixelShaderState(ctx->swrContext, &psState); 1801 1802 /* JIT sampler state */ 1803 if (ctx->dirty & (SWR_NEW_SAMPLER | 1804 SWR_NEW_FS)) { 1805 swr_update_sampler_state(ctx, 1806 PIPE_SHADER_FRAGMENT, 1807 key.nr_samplers, 1808 ctx->swrDC.samplersFS); 1809 } 1810 1811 /* JIT sampler view state */ 1812 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | 1813 SWR_NEW_FRAMEBUFFER | 1814 SWR_NEW_FS)) { 1815 swr_update_texture_state(ctx, 1816 PIPE_SHADER_FRAGMENT, 1817 key.nr_sampler_views, 1818 ctx->swrDC.texturesFS); 1819 } 1820 } 1821 1822 1823 /* VertexShader Constants */ 1824 if (ctx->dirty & SWR_NEW_VSCONSTANTS) { 1825 swr_update_constants(ctx, PIPE_SHADER_VERTEX); 1826 } 1827 1828 /* FragmentShader Constants */ 1829 if (ctx->dirty & SWR_NEW_FSCONSTANTS) { 1830 swr_update_constants(ctx, PIPE_SHADER_FRAGMENT); 1831 } 1832 1833 /* GeometryShader Constants */ 1834 if (ctx->dirty & SWR_NEW_GSCONSTANTS) { 1835 swr_update_constants(ctx, PIPE_SHADER_GEOMETRY); 1836 } 1837 1838 /* Tessellation Control Shader Constants */ 1839 if (ctx->dirty & SWR_NEW_TCSCONSTANTS) { 1840 swr_update_constants(ctx, PIPE_SHADER_TESS_CTRL); 1841 } 1842 1843 /* Tessellation Evaluation Shader Constants */ 1844 if (ctx->dirty & SWR_NEW_TESCONSTANTS) { 1845 swr_update_constants(ctx, PIPE_SHADER_TESS_EVAL); 1846 } 1847 1848 /* Depth/stencil state */ 1849 if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) { 1850 struct pipe_depth_stencil_alpha_state *depth = ctx->depth_stencil; 1851 struct pipe_stencil_state *stencil = depth->stencil; 1852 SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}}; 1853 SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0}; 1854 1855 /* XXX, incomplete. Need to flesh out stencil & alpha test state 1856 struct pipe_stencil_state *front_stencil = 1857 ctx->depth_stencil.stencil[0]; 1858 struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1]; 1859 */ 1860 if (stencil[0].enabled) { 1861 depthStencilState.stencilWriteEnable = 1; 1862 depthStencilState.stencilTestEnable = 1; 1863 depthStencilState.stencilTestFunc = 1864 swr_convert_depth_func(stencil[0].func); 1865 1866 depthStencilState.stencilPassDepthPassOp = 1867 swr_convert_stencil_op(stencil[0].zpass_op); 1868 depthStencilState.stencilPassDepthFailOp = 1869 swr_convert_stencil_op(stencil[0].zfail_op); 1870 depthStencilState.stencilFailOp = 1871 swr_convert_stencil_op(stencil[0].fail_op); 1872 depthStencilState.stencilWriteMask = stencil[0].writemask; 1873 depthStencilState.stencilTestMask = stencil[0].valuemask; 1874 depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0]; 1875 } 1876 if (stencil[1].enabled) { 1877 depthStencilState.doubleSidedStencilTestEnable = 1; 1878 1879 depthStencilState.backfaceStencilTestFunc = 1880 swr_convert_depth_func(stencil[1].func); 1881 1882 depthStencilState.backfaceStencilPassDepthPassOp = 1883 swr_convert_stencil_op(stencil[1].zpass_op); 1884 depthStencilState.backfaceStencilPassDepthFailOp = 1885 swr_convert_stencil_op(stencil[1].zfail_op); 1886 depthStencilState.backfaceStencilFailOp = 1887 swr_convert_stencil_op(stencil[1].fail_op); 1888 depthStencilState.backfaceStencilWriteMask = stencil[1].writemask; 1889 depthStencilState.backfaceStencilTestMask = stencil[1].valuemask; 1890 1891 depthStencilState.backfaceStencilRefValue = 1892 ctx->stencil_ref.ref_value[1]; 1893 } 1894 1895 depthStencilState.depthTestEnable = depth->depth_enabled; 1896 depthStencilState.depthTestFunc = swr_convert_depth_func(depth->depth_func); 1897 depthStencilState.depthWriteEnable = depth->depth_writemask; 1898 ctx->api.pfnSwrSetDepthStencilState(ctx->swrContext, &depthStencilState); 1899 1900 depthBoundsState.depthBoundsTestEnable = depth->depth_bounds_test; 1901 depthBoundsState.depthBoundsTestMinValue = depth->depth_bounds_min; 1902 depthBoundsState.depthBoundsTestMaxValue = depth->depth_bounds_max; 1903 ctx->api.pfnSwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState); 1904 } 1905 1906 /* Blend State */ 1907 if (ctx->dirty & (SWR_NEW_BLEND | 1908 SWR_NEW_RASTERIZER | 1909 SWR_NEW_FRAMEBUFFER | 1910 SWR_NEW_DEPTH_STENCIL_ALPHA)) { 1911 struct pipe_framebuffer_state *fb = &ctx->framebuffer; 1912 1913 SWR_BLEND_STATE blendState; 1914 memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState)); 1915 blendState.constantColor[0] = ctx->blend_color.color[0]; 1916 blendState.constantColor[1] = ctx->blend_color.color[1]; 1917 blendState.constantColor[2] = ctx->blend_color.color[2]; 1918 blendState.constantColor[3] = ctx->blend_color.color[3]; 1919 blendState.alphaTestReference = 1920 *((uint32_t*)&ctx->depth_stencil->alpha_ref_value); 1921 1922 blendState.sampleMask = ctx->sample_mask; 1923 blendState.sampleCount = GetSampleCount(fb->samples); 1924 1925 /* If there are no color buffers bound, disable writes on RT0 1926 * and skip loop */ 1927 if (fb->nr_cbufs == 0) { 1928 blendState.renderTarget[0].writeDisableRed = 1; 1929 blendState.renderTarget[0].writeDisableGreen = 1; 1930 blendState.renderTarget[0].writeDisableBlue = 1; 1931 blendState.renderTarget[0].writeDisableAlpha = 1; 1932 ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, 0, NULL); 1933 } 1934 else 1935 for (int target = 0; 1936 target < std::min(SWR_NUM_RENDERTARGETS, 1937 PIPE_MAX_COLOR_BUFS); 1938 target++) { 1939 if (!fb->cbufs[target]) 1940 continue; 1941 1942 struct swr_resource *colorBuffer = 1943 swr_resource(fb->cbufs[target]->texture); 1944 1945 BLEND_COMPILE_STATE compileState; 1946 memset(&compileState, 0, sizeof(compileState)); 1947 compileState.format = colorBuffer->swr.format; 1948 memcpy(&compileState.blendState, 1949 &ctx->blend->compileState[target], 1950 sizeof(compileState.blendState)); 1951 1952 const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format); 1953 if (compileState.blendState.logicOpEnable && 1954 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) { 1955 compileState.blendState.logicOpEnable = false; 1956 } 1957 1958 if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT) 1959 compileState.blendState.blendEnable = false; 1960 1961 if (compileState.blendState.blendEnable == false && 1962 compileState.blendState.logicOpEnable == false && 1963 ctx->depth_stencil->alpha_enabled == 0) { 1964 ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, NULL); 1965 continue; 1966 } 1967 1968 compileState.desc.alphaTestEnable = 1969 ctx->depth_stencil->alpha_enabled; 1970 compileState.desc.independentAlphaBlendEnable = 1971 (compileState.blendState.sourceBlendFactor != 1972 compileState.blendState.sourceAlphaBlendFactor) || 1973 (compileState.blendState.destBlendFactor != 1974 compileState.blendState.destAlphaBlendFactor) || 1975 (compileState.blendState.colorBlendFunc != 1976 compileState.blendState.alphaBlendFunc); 1977 compileState.desc.alphaToCoverageEnable = 1978 ctx->blend->pipe.alpha_to_coverage; 1979 compileState.desc.sampleMaskEnable = (blendState.sampleMask != 0); 1980 compileState.desc.numSamples = fb->samples; 1981 1982 compileState.alphaTestFunction = 1983 swr_convert_depth_func(ctx->depth_stencil->alpha_func); 1984 compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx 1985 1986 compileState.Canonicalize(); 1987 1988 PFN_BLEND_JIT_FUNC func = NULL; 1989 auto search = ctx->blendJIT->find(compileState); 1990 if (search != ctx->blendJIT->end()) { 1991 func = search->second; 1992 } else { 1993 HANDLE hJitMgr = screen->hJitMgr; 1994 func = JitCompileBlend(hJitMgr, compileState); 1995 debug_printf("BLEND shader %p\n", func); 1996 assert(func && "Error: BlendShader = NULL"); 1997 1998 ctx->blendJIT->insert(std::make_pair(compileState, func)); 1999 } 2000 ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, func); 2001 } 2002 2003 ctx->api.pfnSwrSetBlendState(ctx->swrContext, &blendState); 2004 } 2005 2006 if (ctx->dirty & SWR_NEW_STIPPLE) { 2007 swr_update_poly_stipple(ctx); 2008 } 2009 2010 if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_TCS | SWR_NEW_TES | SWR_NEW_SO | SWR_NEW_RASTERIZER)) { 2011 ctx->vs->soState.rasterizerDisable = 2012 ctx->rasterizer->rasterizer_discard; 2013 ctx->api.pfnSwrSetSoState(ctx->swrContext, &ctx->vs->soState); 2014 2015 pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output; 2016 2017 for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { 2018 SWR_STREAMOUT_BUFFER buffer = {0}; 2019 if (ctx->so_targets[i]) { 2020 buffer.enable = true; 2021 buffer.pBuffer = 2022 (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) + 2023 ctx->so_targets[i]->buffer_offset); 2024 buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; 2025 buffer.pitch = stream_output->stride[i]; 2026 buffer.streamOffset = 0; 2027 } 2028 2029 ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i); 2030 } 2031 } 2032 2033 2034 if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) { 2035 // shader exporting clip distances overrides all user clip planes 2036 if (ctx->rasterizer->clip_plane_enable && 2037 !swr_get_last_fe(ctx)->num_written_clipdistance) 2038 { 2039 swr_draw_context *pDC = &ctx->swrDC; 2040 memcpy(pDC->userClipPlanes, 2041 ctx->clip.ucp, 2042 sizeof(pDC->userClipPlanes)); 2043 } 2044 } 2045 2046 // set up backend state 2047 SWR_BACKEND_STATE backendState = {0}; 2048 if (ctx->gs) { 2049 backendState.numAttributes = ctx->gs->info.base.num_outputs - 1; 2050 } else 2051 if (ctx->tes) { 2052 backendState.numAttributes = ctx->tes->info.base.num_outputs - 1; 2053 // no case for TCS, because if TCS is active, TES must be active 2054 // as well - pipeline stages after tessellation does not support patches 2055 } else { 2056 backendState.numAttributes = ctx->vs->info.base.num_outputs - 1; 2057 if (ctx->fs->info.base.uses_primid) { 2058 backendState.numAttributes++; 2059 backendState.swizzleEnable = true; 2060 for (unsigned i = 0; i < sizeof(backendState.numComponents); i++) { 2061 backendState.swizzleMap[i].sourceAttrib = i; 2062 } 2063 backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].constantSource = 2064 SWR_CONSTANT_SOURCE_PRIM_ID; 2065 backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].componentOverrideMask = 1; 2066 } 2067 } 2068 if (ctx->rasterizer->sprite_coord_enable) 2069 backendState.numAttributes++; 2070 2071 backendState.numAttributes = std::min((size_t)backendState.numAttributes, 2072 sizeof(backendState.numComponents)); 2073 for (unsigned i = 0; i < backendState.numAttributes; i++) 2074 backendState.numComponents[i] = 4; 2075 backendState.constantInterpolationMask = ctx->fs->constantMask | 2076 (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0); 2077 backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask; 2078 2079 struct tgsi_shader_info *pLastFE = swr_get_last_fe(ctx); 2080 2081 backendState.readRenderTargetArrayIndex = pLastFE->writes_layer; 2082 backendState.readViewportArrayIndex = pLastFE->writes_viewport_index; 2083 backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize 2084 2085 backendState.clipDistanceMask = 2086 pLastFE->num_written_clipdistance ? 2087 pLastFE->clipdist_writemask & ctx->rasterizer->clip_plane_enable : 2088 ctx->rasterizer->clip_plane_enable; 2089 2090 backendState.cullDistanceMask = 2091 pLastFE->culldist_writemask << pLastFE->num_written_clipdistance; 2092 2093 // Assume old layout of SGV, POSITION, CLIPCULL, ATTRIB 2094 backendState.vertexClipCullOffset = backendState.vertexAttribOffset - 2; 2095 2096 ctx->api.pfnSwrSetBackendState(ctx->swrContext, &backendState); 2097 2098 /* Ensure that any in-progress attachment change StoreTiles finish */ 2099 if (swr_is_fence_pending(screen->flush_fence)) 2100 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); 2101 2102 /* Finally, update the in-use status of all resources involved in draw */ 2103 swr_update_resource_status(pipe, p_draw_info); 2104 2105 ctx->dirty = post_update_dirty_flags; 2106} 2107 2108 2109static struct pipe_stream_output_target * 2110swr_create_so_target(struct pipe_context *pipe, 2111 struct pipe_resource *buffer, 2112 unsigned buffer_offset, 2113 unsigned buffer_size) 2114{ 2115 struct pipe_stream_output_target *target; 2116 2117 target = CALLOC_STRUCT(pipe_stream_output_target); 2118 if (!target) 2119 return NULL; 2120 2121 target->context = pipe; 2122 target->reference.count = 1; 2123 pipe_resource_reference(&target->buffer, buffer); 2124 target->buffer_offset = buffer_offset; 2125 target->buffer_size = buffer_size; 2126 return target; 2127} 2128 2129static void 2130swr_destroy_so_target(struct pipe_context *pipe, 2131 struct pipe_stream_output_target *target) 2132{ 2133 pipe_resource_reference(&target->buffer, NULL); 2134 FREE(target); 2135} 2136 2137static void 2138swr_set_so_targets(struct pipe_context *pipe, 2139 unsigned num_targets, 2140 struct pipe_stream_output_target **targets, 2141 const unsigned *offsets) 2142{ 2143 struct swr_context *swr = swr_context(pipe); 2144 uint32_t i; 2145 2146 assert(num_targets <= MAX_SO_STREAMS); 2147 2148 for (i = 0; i < num_targets; i++) { 2149 pipe_so_target_reference( 2150 (struct pipe_stream_output_target **)&swr->so_targets[i], 2151 targets[i]); 2152 } 2153 2154 for (/* fall-through */; i < swr->num_so_targets; i++) { 2155 pipe_so_target_reference( 2156 (struct pipe_stream_output_target **)&swr->so_targets[i], NULL); 2157 } 2158 2159 swr->num_so_targets = num_targets; 2160 swr->swrDC.soPrims = &swr->so_primCounter; 2161 2162 swr->dirty |= SWR_NEW_SO; 2163} 2164 2165static void 2166swr_set_patch_vertices(struct pipe_context *pipe, uint8_t patch_vertices) 2167{ 2168 struct swr_context *swr = swr_context(pipe); 2169 2170 swr->patch_vertices = patch_vertices; 2171} 2172 2173 2174void 2175swr_state_init(struct pipe_context *pipe) 2176{ 2177 pipe->create_blend_state = swr_create_blend_state; 2178 pipe->bind_blend_state = swr_bind_blend_state; 2179 pipe->delete_blend_state = swr_delete_blend_state; 2180 2181 pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state; 2182 pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state; 2183 pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state; 2184 2185 pipe->create_rasterizer_state = swr_create_rasterizer_state; 2186 pipe->bind_rasterizer_state = swr_bind_rasterizer_state; 2187 pipe->delete_rasterizer_state = swr_delete_rasterizer_state; 2188 2189 pipe->create_sampler_state = swr_create_sampler_state; 2190 pipe->bind_sampler_states = swr_bind_sampler_states; 2191 pipe->delete_sampler_state = swr_delete_sampler_state; 2192 2193 pipe->create_sampler_view = swr_create_sampler_view; 2194 pipe->set_sampler_views = swr_set_sampler_views; 2195 pipe->sampler_view_destroy = swr_sampler_view_destroy; 2196 2197 pipe->create_vs_state = swr_create_vs_state; 2198 pipe->bind_vs_state = swr_bind_vs_state; 2199 pipe->delete_vs_state = swr_delete_vs_state; 2200 2201 pipe->create_fs_state = swr_create_fs_state; 2202 pipe->bind_fs_state = swr_bind_fs_state; 2203 pipe->delete_fs_state = swr_delete_fs_state; 2204 2205 pipe->create_gs_state = swr_create_gs_state; 2206 pipe->bind_gs_state = swr_bind_gs_state; 2207 pipe->delete_gs_state = swr_delete_gs_state; 2208 2209 pipe->create_tcs_state = swr_create_tcs_state; 2210 pipe->bind_tcs_state = swr_bind_tcs_state; 2211 pipe->delete_tcs_state = swr_delete_tcs_state; 2212 2213 pipe->create_tes_state = swr_create_tes_state; 2214 pipe->bind_tes_state = swr_bind_tes_state; 2215 pipe->delete_tes_state = swr_delete_tes_state; 2216 2217 pipe->set_constant_buffer = swr_set_constant_buffer; 2218 2219 pipe->create_vertex_elements_state = swr_create_vertex_elements_state; 2220 pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state; 2221 pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state; 2222 2223 pipe->set_vertex_buffers = swr_set_vertex_buffers; 2224 2225 pipe->set_polygon_stipple = swr_set_polygon_stipple; 2226 pipe->set_clip_state = swr_set_clip_state; 2227 pipe->set_scissor_states = swr_set_scissor_states; 2228 pipe->set_viewport_states = swr_set_viewport_states; 2229 2230 pipe->set_framebuffer_state = swr_set_framebuffer_state; 2231 2232 pipe->set_blend_color = swr_set_blend_color; 2233 pipe->set_stencil_ref = swr_set_stencil_ref; 2234 2235 pipe->set_sample_mask = swr_set_sample_mask; 2236 pipe->get_sample_position = swr_get_sample_position; 2237 2238 pipe->create_stream_output_target = swr_create_so_target; 2239 pipe->stream_output_target_destroy = swr_destroy_so_target; 2240 pipe->set_stream_output_targets = swr_set_so_targets; 2241 2242 pipe->set_patch_vertices = swr_set_patch_vertices; 2243} 2244