1/* 2 * Copyright 2010 Red Hat Inc. 3 * 2010 Jerome Glisse 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie <airlied@redhat.com> 25 * Jerome Glisse <jglisse@redhat.com> 26 */ 27#include "r600_formats.h" 28#include "r600_shader.h" 29#include "r600d.h" 30 31#include "util/format/u_format_s3tc.h" 32#include "util/u_draw.h" 33#include "util/u_index_modify.h" 34#include "util/u_memory.h" 35#include "util/u_upload_mgr.h" 36#include "util/u_math.h" 37#include "tgsi/tgsi_parse.h" 38#include "tgsi/tgsi_scan.h" 39#include "tgsi/tgsi_ureg.h" 40 41#include "nir.h" 42#include "nir/nir_to_tgsi_info.h" 43#include "tgsi/tgsi_from_mesa.h" 44 45void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) 46{ 47 assert(!cb->buf); 48 cb->buf = CALLOC(1, 4 * num_dw); 49 cb->max_num_dw = num_dw; 50} 51 52void r600_release_command_buffer(struct r600_command_buffer *cb) 53{ 54 FREE(cb->buf); 55} 56 57void r600_add_atom(struct r600_context *rctx, 58 struct r600_atom *atom, 59 unsigned id) 60{ 61 assert(id < R600_NUM_ATOMS); 62 assert(rctx->atoms[id] == NULL); 63 rctx->atoms[id] = atom; 64 atom->id = id; 65} 66 67void r600_init_atom(struct r600_context *rctx, 68 struct r600_atom *atom, 69 unsigned id, 70 void (*emit)(struct r600_context *ctx, struct r600_atom *state), 71 unsigned num_dw) 72{ 73 atom->emit = (void*)emit; 74 atom->num_dw = num_dw; 75 r600_add_atom(rctx, atom, id); 76} 77 78void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) 79{ 80 r600_emit_command_buffer(&rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); 81} 82 83void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) 84{ 85 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 86 struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; 87 unsigned alpha_ref = a->sx_alpha_ref; 88 89 if (rctx->b.chip_class >= EVERGREEN && a->cb0_export_16bpc) { 90 alpha_ref &= ~0x1FFF; 91 } 92 93 radeon_set_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL, 94 a->sx_alpha_test_control | 95 S_028410_ALPHA_TEST_BYPASS(a->bypass)); 96 radeon_set_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref); 97} 98 99static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) 100{ 101 struct r600_context *rctx = (struct r600_context *)ctx; 102 103 if (!(flags & ~PIPE_BARRIER_UPDATE)) 104 return; 105 106 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 107 rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE; 108 109 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 110 PIPE_BARRIER_SHADER_BUFFER | 111 PIPE_BARRIER_TEXTURE | 112 PIPE_BARRIER_IMAGE | 113 PIPE_BARRIER_STREAMOUT_BUFFER | 114 PIPE_BARRIER_GLOBAL_BUFFER)) { 115 rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE| 116 R600_CONTEXT_INV_TEX_CACHE; 117 } 118 119 if (flags & (PIPE_BARRIER_FRAMEBUFFER| 120 PIPE_BARRIER_IMAGE)) 121 rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV; 122 123 rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 124} 125 126static void r600_texture_barrier(struct pipe_context *ctx, unsigned flags) 127{ 128 struct r600_context *rctx = (struct r600_context *)ctx; 129 130 rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | 131 R600_CONTEXT_FLUSH_AND_INV_CB | 132 R600_CONTEXT_FLUSH_AND_INV | 133 R600_CONTEXT_WAIT_3D_IDLE; 134 rctx->framebuffer.do_update_surf_dirtiness = true; 135} 136 137static unsigned r600_conv_pipe_prim(unsigned prim) 138{ 139 static const unsigned prim_conv[] = { 140 [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST, 141 [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST, 142 [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP, 143 [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP, 144 [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST, 145 [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP, 146 [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN, 147 [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST, 148 [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP, 149 [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON, 150 [PIPE_PRIM_LINES_ADJACENCY] = V_008958_DI_PT_LINELIST_ADJ, 151 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ, 152 [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ, 153 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ, 154 [PIPE_PRIM_PATCHES] = V_008958_DI_PT_PATCH, 155 [R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST 156 }; 157 assert(prim < ARRAY_SIZE(prim_conv)); 158 return prim_conv[prim]; 159} 160 161unsigned r600_conv_prim_to_gs_out(unsigned mode) 162{ 163 static const int prim_conv[] = { 164 [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST, 165 [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, 166 [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, 167 [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, 168 [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 169 [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 170 [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 171 [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 172 [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 173 [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 174 [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, 175 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, 176 [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 177 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, 178 [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST, 179 [R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP 180 }; 181 assert(mode < ARRAY_SIZE(prim_conv)); 182 183 return prim_conv[mode]; 184} 185 186/* common state between evergreen and r600 */ 187 188static void r600_bind_blend_state_internal(struct r600_context *rctx, 189 struct r600_blend_state *blend, bool blend_disable) 190{ 191 unsigned color_control; 192 bool update_cb = false; 193 194 rctx->alpha_to_one = blend->alpha_to_one; 195 rctx->dual_src_blend = blend->dual_src_blend; 196 197 if (!blend_disable) { 198 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer); 199 color_control = blend->cb_color_control; 200 } else { 201 /* Blending is disabled. */ 202 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend); 203 color_control = blend->cb_color_control_no_blend; 204 } 205 206 /* Update derived states. */ 207 if (rctx->cb_misc_state.blend_colormask != blend->cb_target_mask) { 208 rctx->cb_misc_state.blend_colormask = blend->cb_target_mask; 209 update_cb = true; 210 } 211 if (rctx->b.chip_class <= R700 && 212 rctx->cb_misc_state.cb_color_control != color_control) { 213 rctx->cb_misc_state.cb_color_control = color_control; 214 update_cb = true; 215 } 216 if (rctx->cb_misc_state.dual_src_blend != blend->dual_src_blend) { 217 rctx->cb_misc_state.dual_src_blend = blend->dual_src_blend; 218 update_cb = true; 219 } 220 if (update_cb) { 221 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 222 } 223 if (rctx->framebuffer.dual_src_blend != blend->dual_src_blend) { 224 rctx->framebuffer.dual_src_blend = blend->dual_src_blend; 225 r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 226 } 227} 228 229static void r600_bind_blend_state(struct pipe_context *ctx, void *state) 230{ 231 struct r600_context *rctx = (struct r600_context *)ctx; 232 struct r600_blend_state *blend = (struct r600_blend_state *)state; 233 234 if (!blend) { 235 r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL); 236 return; 237 } 238 239 r600_bind_blend_state_internal(rctx, blend, rctx->force_blend_disable); 240} 241 242static void r600_set_blend_color(struct pipe_context *ctx, 243 const struct pipe_blend_color *state) 244{ 245 struct r600_context *rctx = (struct r600_context *)ctx; 246 247 rctx->blend_color.state = *state; 248 r600_mark_atom_dirty(rctx, &rctx->blend_color.atom); 249} 250 251void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) 252{ 253 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 254 struct pipe_blend_color *state = &rctx->blend_color.state; 255 256 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 257 radeon_emit(cs, fui(state->color[0])); /* R_028414_CB_BLEND_RED */ 258 radeon_emit(cs, fui(state->color[1])); /* R_028418_CB_BLEND_GREEN */ 259 radeon_emit(cs, fui(state->color[2])); /* R_02841C_CB_BLEND_BLUE */ 260 radeon_emit(cs, fui(state->color[3])); /* R_028420_CB_BLEND_ALPHA */ 261} 262 263void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) 264{ 265 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 266 struct r600_vgt_state *a = (struct r600_vgt_state *)atom; 267 268 radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en); 269 radeon_set_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2); 270 radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */ 271 radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */ 272 if (a->last_draw_was_indirect) { 273 a->last_draw_was_indirect = false; 274 radeon_set_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 275 } 276} 277 278static void r600_set_clip_state(struct pipe_context *ctx, 279 const struct pipe_clip_state *state) 280{ 281 struct r600_context *rctx = (struct r600_context *)ctx; 282 283 rctx->clip_state.state = *state; 284 r600_mark_atom_dirty(rctx, &rctx->clip_state.atom); 285 rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true; 286} 287 288static void r600_set_stencil_ref(struct pipe_context *ctx, 289 const struct r600_stencil_ref state) 290{ 291 struct r600_context *rctx = (struct r600_context *)ctx; 292 293 rctx->stencil_ref.state = state; 294 r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom); 295} 296 297void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) 298{ 299 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 300 struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom; 301 302 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 303 radeon_emit(cs, /* R_028430_DB_STENCILREFMASK */ 304 S_028430_STENCILREF(a->state.ref_value[0]) | 305 S_028430_STENCILMASK(a->state.valuemask[0]) | 306 S_028430_STENCILWRITEMASK(a->state.writemask[0])); 307 radeon_emit(cs, /* R_028434_DB_STENCILREFMASK_BF */ 308 S_028434_STENCILREF_BF(a->state.ref_value[1]) | 309 S_028434_STENCILMASK_BF(a->state.valuemask[1]) | 310 S_028434_STENCILWRITEMASK_BF(a->state.writemask[1])); 311} 312 313static void r600_set_pipe_stencil_ref(struct pipe_context *ctx, 314 const struct pipe_stencil_ref state) 315{ 316 struct r600_context *rctx = (struct r600_context *)ctx; 317 struct r600_dsa_state *dsa = (struct r600_dsa_state*)rctx->dsa_state.cso; 318 struct r600_stencil_ref ref; 319 320 rctx->stencil_ref.pipe_state = state; 321 322 if (!dsa) 323 return; 324 325 ref.ref_value[0] = state.ref_value[0]; 326 ref.ref_value[1] = state.ref_value[1]; 327 ref.valuemask[0] = dsa->valuemask[0]; 328 ref.valuemask[1] = dsa->valuemask[1]; 329 ref.writemask[0] = dsa->writemask[0]; 330 ref.writemask[1] = dsa->writemask[1]; 331 332 r600_set_stencil_ref(ctx, ref); 333} 334 335static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) 336{ 337 struct r600_context *rctx = (struct r600_context *)ctx; 338 struct r600_dsa_state *dsa = state; 339 struct r600_stencil_ref ref; 340 341 if (!state) { 342 r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL); 343 return; 344 } 345 346 r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer); 347 348 ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0]; 349 ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1]; 350 ref.valuemask[0] = dsa->valuemask[0]; 351 ref.valuemask[1] = dsa->valuemask[1]; 352 ref.writemask[0] = dsa->writemask[0]; 353 ref.writemask[1] = dsa->writemask[1]; 354 if (rctx->zwritemask != dsa->zwritemask) { 355 rctx->zwritemask = dsa->zwritemask; 356 if (rctx->b.chip_class >= EVERGREEN) { 357 /* work around some issue when not writing to zbuffer 358 * we are having lockup on evergreen so do not enable 359 * hyperz when not writing zbuffer 360 */ 361 r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 362 } 363 } 364 365 r600_set_stencil_ref(ctx, ref); 366 367 /* Update alphatest state. */ 368 if (rctx->alphatest_state.sx_alpha_test_control != dsa->sx_alpha_test_control || 369 rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) { 370 rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control; 371 rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref; 372 r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 373 } 374} 375 376static void r600_bind_rs_state(struct pipe_context *ctx, void *state) 377{ 378 struct r600_rasterizer_state *rs = (struct r600_rasterizer_state *)state; 379 struct r600_context *rctx = (struct r600_context *)ctx; 380 381 if (!state) 382 return; 383 384 rctx->rasterizer = rs; 385 386 r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer); 387 388 if (rs->offset_enable && 389 (rs->offset_units != rctx->poly_offset_state.offset_units || 390 rs->offset_scale != rctx->poly_offset_state.offset_scale || 391 rs->offset_units_unscaled != rctx->poly_offset_state.offset_units_unscaled)) { 392 rctx->poly_offset_state.offset_units = rs->offset_units; 393 rctx->poly_offset_state.offset_scale = rs->offset_scale; 394 rctx->poly_offset_state.offset_units_unscaled = rs->offset_units_unscaled; 395 r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); 396 } 397 398 /* Update clip_misc_state. */ 399 if (rctx->clip_misc_state.pa_cl_clip_cntl != rs->pa_cl_clip_cntl || 400 rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) { 401 rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl; 402 rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable; 403 r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom); 404 } 405 406 r600_viewport_set_rast_deps(&rctx->b, rs->scissor_enable, rs->clip_halfz); 407 408 /* Re-emit PA_SC_LINE_STIPPLE. */ 409 rctx->last_primitive_type = -1; 410} 411 412static void r600_delete_rs_state(struct pipe_context *ctx, void *state) 413{ 414 struct r600_rasterizer_state *rs = (struct r600_rasterizer_state *)state; 415 416 r600_release_command_buffer(&rs->buffer); 417 FREE(rs); 418} 419 420static void r600_sampler_view_destroy(struct pipe_context *ctx, 421 struct pipe_sampler_view *state) 422{ 423 struct r600_pipe_sampler_view *view = (struct r600_pipe_sampler_view *)state; 424 425 if (view->tex_resource->gpu_address && 426 view->tex_resource->b.b.target == PIPE_BUFFER) 427 list_delinit(&view->list); 428 429 pipe_resource_reference(&state->texture, NULL); 430 FREE(view); 431} 432 433void r600_sampler_states_dirty(struct r600_context *rctx, 434 struct r600_sampler_states *state) 435{ 436 if (state->dirty_mask) { 437 if (state->dirty_mask & state->has_bordercolor_mask) { 438 rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 439 } 440 state->atom.num_dw = 441 util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + 442 util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5; 443 r600_mark_atom_dirty(rctx, &state->atom); 444 } 445} 446 447static void r600_bind_sampler_states(struct pipe_context *pipe, 448 enum pipe_shader_type shader, 449 unsigned start, 450 unsigned count, void **states) 451{ 452 struct r600_context *rctx = (struct r600_context *)pipe; 453 struct r600_textures_info *dst = &rctx->samplers[shader]; 454 struct r600_pipe_sampler_state **rstates = (struct r600_pipe_sampler_state**)states; 455 int seamless_cube_map = -1; 456 unsigned i; 457 /* This sets 1-bit for states with index >= count. */ 458 uint32_t disable_mask = ~((1ull << count) - 1); 459 /* These are the new states set by this function. */ 460 uint32_t new_mask = 0; 461 462 assert(start == 0); /* XXX fix below */ 463 464 if (!states) { 465 disable_mask = ~0u; 466 count = 0; 467 } 468 469 for (i = 0; i < count; i++) { 470 struct r600_pipe_sampler_state *rstate = rstates[i]; 471 472 if (rstate == dst->states.states[i]) { 473 continue; 474 } 475 476 if (rstate) { 477 if (rstate->border_color_use) { 478 dst->states.has_bordercolor_mask |= 1 << i; 479 } else { 480 dst->states.has_bordercolor_mask &= ~(1 << i); 481 } 482 seamless_cube_map = rstate->seamless_cube_map; 483 484 new_mask |= 1 << i; 485 } else { 486 disable_mask |= 1 << i; 487 } 488 } 489 490 memcpy(dst->states.states, rstates, sizeof(void*) * count); 491 memset(dst->states.states + count, 0, sizeof(void*) * (NUM_TEX_UNITS - count)); 492 493 dst->states.enabled_mask &= ~disable_mask; 494 dst->states.dirty_mask &= dst->states.enabled_mask; 495 dst->states.enabled_mask |= new_mask; 496 dst->states.dirty_mask |= new_mask; 497 dst->states.has_bordercolor_mask &= dst->states.enabled_mask; 498 499 r600_sampler_states_dirty(rctx, &dst->states); 500 501 /* Seamless cubemap state. */ 502 if (rctx->b.chip_class <= R700 && 503 seamless_cube_map != -1 && 504 seamless_cube_map != rctx->seamless_cube_map.enabled) { 505 /* change in TA_CNTL_AUX need a pipeline flush */ 506 rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 507 rctx->seamless_cube_map.enabled = seamless_cube_map; 508 r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom); 509 } 510} 511 512static void r600_delete_sampler_state(struct pipe_context *ctx, void *state) 513{ 514 free(state); 515} 516 517static void r600_delete_blend_state(struct pipe_context *ctx, void *state) 518{ 519 struct r600_context *rctx = (struct r600_context *)ctx; 520 struct r600_blend_state *blend = (struct r600_blend_state*)state; 521 522 if (rctx->blend_state.cso == state) { 523 ctx->bind_blend_state(ctx, NULL); 524 } 525 526 r600_release_command_buffer(&blend->buffer); 527 r600_release_command_buffer(&blend->buffer_no_blend); 528 FREE(blend); 529} 530 531static void r600_delete_dsa_state(struct pipe_context *ctx, void *state) 532{ 533 struct r600_context *rctx = (struct r600_context *)ctx; 534 struct r600_dsa_state *dsa = (struct r600_dsa_state *)state; 535 536 if (rctx->dsa_state.cso == state) { 537 ctx->bind_depth_stencil_alpha_state(ctx, NULL); 538 } 539 540 r600_release_command_buffer(&dsa->buffer); 541 free(dsa); 542} 543 544static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) 545{ 546 struct r600_context *rctx = (struct r600_context *)ctx; 547 548 r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state); 549} 550 551static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state) 552{ 553 struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state; 554 if (shader) 555 r600_resource_reference(&shader->buffer, NULL); 556 FREE(shader); 557} 558 559void r600_vertex_buffers_dirty(struct r600_context *rctx) 560{ 561 if (rctx->vertex_buffer_state.dirty_mask) { 562 rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) * 563 util_bitcount(rctx->vertex_buffer_state.dirty_mask); 564 r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom); 565 } 566} 567 568static void r600_set_vertex_buffers(struct pipe_context *ctx, 569 unsigned start_slot, unsigned count, 570 unsigned unbind_num_trailing_slots, 571 bool take_ownership, 572 const struct pipe_vertex_buffer *input) 573{ 574 struct r600_context *rctx = (struct r600_context *)ctx; 575 struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state; 576 struct pipe_vertex_buffer *vb = state->vb + start_slot; 577 unsigned i; 578 uint32_t disable_mask = 0; 579 /* These are the new buffers set by this function. */ 580 uint32_t new_buffer_mask = 0; 581 582 /* Set vertex buffers. */ 583 if (input) { 584 for (i = 0; i < count; i++) { 585 if ((input[i].buffer.resource != vb[i].buffer.resource) || 586 (vb[i].stride != input[i].stride) || 587 (vb[i].buffer_offset != input[i].buffer_offset) || 588 (vb[i].is_user_buffer != input[i].is_user_buffer)) { 589 if (input[i].buffer.resource) { 590 vb[i].stride = input[i].stride; 591 vb[i].buffer_offset = input[i].buffer_offset; 592 if (take_ownership) { 593 pipe_resource_reference(&vb[i].buffer.resource, NULL); 594 vb[i].buffer.resource = input[i].buffer.resource; 595 } else { 596 pipe_resource_reference(&vb[i].buffer.resource, 597 input[i].buffer.resource); 598 } 599 new_buffer_mask |= 1 << i; 600 r600_context_add_resource_size(ctx, input[i].buffer.resource); 601 } else { 602 pipe_resource_reference(&vb[i].buffer.resource, NULL); 603 disable_mask |= 1 << i; 604 } 605 } 606 } 607 } else { 608 for (i = 0; i < count; i++) { 609 pipe_resource_reference(&vb[i].buffer.resource, NULL); 610 } 611 disable_mask = ((1ull << count) - 1); 612 } 613 614 for (i = 0; i < unbind_num_trailing_slots; i++) { 615 pipe_resource_reference(&vb[count + i].buffer.resource, NULL); 616 } 617 disable_mask |= ((1ull << unbind_num_trailing_slots) - 1) << count; 618 619 disable_mask <<= start_slot; 620 new_buffer_mask <<= start_slot; 621 622 rctx->vertex_buffer_state.enabled_mask &= ~disable_mask; 623 rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask; 624 rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask; 625 rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask; 626 627 r600_vertex_buffers_dirty(rctx); 628} 629 630void r600_sampler_views_dirty(struct r600_context *rctx, 631 struct r600_samplerview_state *state) 632{ 633 if (state->dirty_mask) { 634 state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) * 635 util_bitcount(state->dirty_mask); 636 r600_mark_atom_dirty(rctx, &state->atom); 637 } 638} 639 640static void r600_set_sampler_views(struct pipe_context *pipe, 641 enum pipe_shader_type shader, 642 unsigned start, unsigned count, 643 unsigned unbind_num_trailing_slots, 644 bool take_ownership, 645 struct pipe_sampler_view **views) 646{ 647 struct r600_context *rctx = (struct r600_context *) pipe; 648 struct r600_textures_info *dst = &rctx->samplers[shader]; 649 struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views; 650 uint32_t dirty_sampler_states_mask = 0; 651 unsigned i; 652 /* This sets 1-bit for textures with index >= count. */ 653 uint32_t disable_mask = ~((1ull << count) - 1); 654 /* These are the new textures set by this function. */ 655 uint32_t new_mask = 0; 656 657 /* Set textures with index >= count to NULL. */ 658 uint32_t remaining_mask; 659 660 assert(start == 0); /* XXX fix below */ 661 662 if (!views) { 663 disable_mask = ~0u; 664 count = 0; 665 } 666 667 remaining_mask = dst->views.enabled_mask & disable_mask; 668 669 while (remaining_mask) { 670 i = u_bit_scan(&remaining_mask); 671 assert(dst->views.views[i]); 672 673 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL); 674 } 675 676 for (i = 0; i < count; i++) { 677 if (rviews[i] == dst->views.views[i]) { 678 if (take_ownership) { 679 struct pipe_sampler_view *view = views[i]; 680 pipe_sampler_view_reference(&view, NULL); 681 } 682 continue; 683 } 684 685 if (rviews[i]) { 686 struct r600_texture *rtex = 687 (struct r600_texture*)rviews[i]->base.texture; 688 bool is_buffer = rviews[i]->base.texture->target == PIPE_BUFFER; 689 690 if (!is_buffer && rtex->db_compatible) { 691 dst->views.compressed_depthtex_mask |= 1 << i; 692 } else { 693 dst->views.compressed_depthtex_mask &= ~(1 << i); 694 } 695 696 /* Track compressed colorbuffers. */ 697 if (!is_buffer && rtex->cmask.size) { 698 dst->views.compressed_colortex_mask |= 1 << i; 699 } else { 700 dst->views.compressed_colortex_mask &= ~(1 << i); 701 } 702 703 /* Changing from array to non-arrays textures and vice versa requires 704 * updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */ 705 if (rctx->b.chip_class <= R700 && 706 (dst->states.enabled_mask & (1 << i)) && 707 (rviews[i]->base.texture->target == PIPE_TEXTURE_1D_ARRAY || 708 rviews[i]->base.texture->target == PIPE_TEXTURE_2D_ARRAY) != dst->is_array_sampler[i]) { 709 dirty_sampler_states_mask |= 1 << i; 710 } 711 712 if (take_ownership) { 713 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL); 714 dst->views.views[i] = (struct r600_pipe_sampler_view*)views[i]; 715 } else { 716 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]); 717 } 718 new_mask |= 1 << i; 719 r600_context_add_resource_size(pipe, views[i]->texture); 720 } else { 721 pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL); 722 disable_mask |= 1 << i; 723 } 724 } 725 726 dst->views.enabled_mask &= ~disable_mask; 727 dst->views.dirty_mask &= dst->views.enabled_mask; 728 dst->views.enabled_mask |= new_mask; 729 dst->views.dirty_mask |= new_mask; 730 dst->views.compressed_depthtex_mask &= dst->views.enabled_mask; 731 dst->views.compressed_colortex_mask &= dst->views.enabled_mask; 732 dst->views.dirty_buffer_constants = TRUE; 733 r600_sampler_views_dirty(rctx, &dst->views); 734 735 if (dirty_sampler_states_mask) { 736 dst->states.dirty_mask |= dirty_sampler_states_mask; 737 r600_sampler_states_dirty(rctx, &dst->states); 738 } 739} 740 741static void r600_update_compressed_colortex_mask(struct r600_samplerview_state *views) 742{ 743 uint32_t mask = views->enabled_mask; 744 745 while (mask) { 746 unsigned i = u_bit_scan(&mask); 747 struct pipe_resource *res = views->views[i]->base.texture; 748 749 if (res && res->target != PIPE_BUFFER) { 750 struct r600_texture *rtex = (struct r600_texture *)res; 751 752 if (rtex->cmask.size) { 753 views->compressed_colortex_mask |= 1 << i; 754 } else { 755 views->compressed_colortex_mask &= ~(1 << i); 756 } 757 } 758 } 759} 760 761static int r600_get_hw_atomic_count(const struct pipe_context *ctx, 762 enum pipe_shader_type shader) 763{ 764 const struct r600_context *rctx = (struct r600_context *)ctx; 765 int value = 0; 766 switch (shader) { 767 case PIPE_SHADER_FRAGMENT: 768 case PIPE_SHADER_COMPUTE: 769 default: 770 break; 771 case PIPE_SHADER_VERTEX: 772 value = rctx->ps_shader->info.file_count[TGSI_FILE_HW_ATOMIC]; 773 break; 774 case PIPE_SHADER_GEOMETRY: 775 value = rctx->ps_shader->info.file_count[TGSI_FILE_HW_ATOMIC] + 776 rctx->vs_shader->info.file_count[TGSI_FILE_HW_ATOMIC]; 777 break; 778 case PIPE_SHADER_TESS_EVAL: 779 value = rctx->ps_shader->info.file_count[TGSI_FILE_HW_ATOMIC] + 780 rctx->vs_shader->info.file_count[TGSI_FILE_HW_ATOMIC] + 781 (rctx->gs_shader ? rctx->gs_shader->info.file_count[TGSI_FILE_HW_ATOMIC] : 0); 782 break; 783 case PIPE_SHADER_TESS_CTRL: 784 value = rctx->ps_shader->info.file_count[TGSI_FILE_HW_ATOMIC] + 785 rctx->vs_shader->info.file_count[TGSI_FILE_HW_ATOMIC] + 786 (rctx->gs_shader ? rctx->gs_shader->info.file_count[TGSI_FILE_HW_ATOMIC] : 0) + 787 rctx->tes_shader->info.file_count[TGSI_FILE_HW_ATOMIC]; 788 break; 789 } 790 return value; 791} 792 793static void r600_update_compressed_colortex_mask_images(struct r600_image_state *images) 794{ 795 uint32_t mask = images->enabled_mask; 796 797 while (mask) { 798 unsigned i = u_bit_scan(&mask); 799 struct pipe_resource *res = images->views[i].base.resource; 800 801 if (res && res->target != PIPE_BUFFER) { 802 struct r600_texture *rtex = (struct r600_texture *)res; 803 804 if (rtex->cmask.size) { 805 images->compressed_colortex_mask |= 1 << i; 806 } else { 807 images->compressed_colortex_mask &= ~(1 << i); 808 } 809 } 810 } 811} 812 813/* Compute the key for the hw shader variant */ 814static inline void r600_shader_selector_key(const struct pipe_context *ctx, 815 const struct r600_pipe_shader_selector *sel, 816 union r600_shader_key *key) 817{ 818 const struct r600_context *rctx = (struct r600_context *)ctx; 819 memset(key, 0, sizeof(*key)); 820 821 switch (sel->type) { 822 case PIPE_SHADER_VERTEX: { 823 key->vs.as_ls = (rctx->tes_shader != NULL); 824 if (!key->vs.as_ls) 825 key->vs.as_es = (rctx->gs_shader != NULL); 826 827 if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) { 828 key->vs.as_gs_a = true; 829 key->vs.prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid; 830 } 831 key->vs.first_atomic_counter = r600_get_hw_atomic_count(ctx, PIPE_SHADER_VERTEX); 832 break; 833 } 834 case PIPE_SHADER_GEOMETRY: 835 key->gs.first_atomic_counter = r600_get_hw_atomic_count(ctx, PIPE_SHADER_GEOMETRY); 836 key->gs.tri_strip_adj_fix = rctx->gs_tri_strip_adj_fix; 837 break; 838 case PIPE_SHADER_FRAGMENT: { 839 if (rctx->ps_shader->info.images_declared) 840 key->ps.image_size_const_offset = util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.enabled_mask); 841 key->ps.first_atomic_counter = r600_get_hw_atomic_count(ctx, PIPE_SHADER_FRAGMENT); 842 key->ps.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side; 843 key->ps.alpha_to_one = rctx->alpha_to_one && 844 rctx->rasterizer && rctx->rasterizer->multisample_enable && 845 !rctx->framebuffer.cb0_is_integer; 846 key->ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs; 847 key->ps.apply_sample_id_mask = (rctx->ps_iter_samples > 1) || !rctx->rasterizer->multisample_enable; 848 /* Dual-source blending only makes sense with nr_cbufs == 1. */ 849 if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) { 850 key->ps.nr_cbufs = 2; 851 key->ps.dual_source_blend = 1; 852 } 853 break; 854 } 855 case PIPE_SHADER_TESS_EVAL: 856 key->tes.as_es = (rctx->gs_shader != NULL); 857 key->tes.first_atomic_counter = r600_get_hw_atomic_count(ctx, PIPE_SHADER_TESS_EVAL); 858 break; 859 case PIPE_SHADER_TESS_CTRL: 860 key->tcs.prim_mode = rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; 861 key->tcs.first_atomic_counter = r600_get_hw_atomic_count(ctx, PIPE_SHADER_TESS_CTRL); 862 break; 863 case PIPE_SHADER_COMPUTE: 864 break; 865 default: 866 assert(0); 867 } 868} 869 870/* Select the hw shader variant depending on the current state. 871 * (*dirty) is set to 1 if current variant was changed */ 872int r600_shader_select(struct pipe_context *ctx, 873 struct r600_pipe_shader_selector* sel, 874 bool *dirty) 875{ 876 union r600_shader_key key; 877 struct r600_pipe_shader * shader = NULL; 878 int r; 879 880 r600_shader_selector_key(ctx, sel, &key); 881 882 /* Check if we don't need to change anything. 883 * This path is also used for most shaders that don't need multiple 884 * variants, it will cost just a computation of the key and this 885 * test. */ 886 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { 887 return 0; 888 } 889 890 /* lookup if we have other variants in the list */ 891 if (sel->num_shaders > 1) { 892 struct r600_pipe_shader *p = sel->current, *c = p->next_variant; 893 894 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { 895 p = c; 896 c = c->next_variant; 897 } 898 899 if (c) { 900 p->next_variant = c->next_variant; 901 shader = c; 902 } 903 } 904 905 if (unlikely(!shader)) { 906 shader = CALLOC(1, sizeof(struct r600_pipe_shader)); 907 shader->selector = sel; 908 909 r = r600_pipe_shader_create(ctx, shader, key); 910 if (unlikely(r)) { 911 R600_ERR("Failed to build shader variant (type=%u) %d\n", 912 sel->type, r); 913 sel->current = NULL; 914 FREE(shader); 915 return r; 916 } 917 918 /* We don't know the value of nr_ps_max_color_exports until we built 919 * at least one variant, so we may need to recompute the key after 920 * building first variant. */ 921 if (sel->type == PIPE_SHADER_FRAGMENT && 922 sel->num_shaders == 0) { 923 sel->nr_ps_max_color_exports = shader->shader.nr_ps_max_color_exports; 924 r600_shader_selector_key(ctx, sel, &key); 925 } 926 927 memcpy(&shader->key, &key, sizeof(key)); 928 sel->num_shaders++; 929 } 930 931 if (dirty) 932 *dirty = true; 933 934 shader->next_variant = sel->current; 935 sel->current = shader; 936 937 return 0; 938} 939 940struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx, 941 const void *prog, enum pipe_shader_ir ir, 942 unsigned pipe_shader_type) 943{ 944 struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector); 945 946 sel->type = pipe_shader_type; 947 if (ir == PIPE_SHADER_IR_TGSI) { 948 sel->tokens = tgsi_dup_tokens((const struct tgsi_token *)prog); 949 tgsi_scan_shader(sel->tokens, &sel->info); 950 } else if (ir == PIPE_SHADER_IR_NIR){ 951 sel->nir = nir_shader_clone(NULL, (const nir_shader *)prog); 952 nir_tgsi_scan_shader(sel->nir, &sel->info, true); 953 } 954 return sel; 955} 956 957static void *r600_create_shader_state(struct pipe_context *ctx, 958 const struct pipe_shader_state *state, 959 unsigned pipe_shader_type) 960{ 961 int i; 962 struct r600_pipe_shader_selector *sel; 963 964 if (state->type == PIPE_SHADER_IR_TGSI) 965 sel = r600_create_shader_state_tokens(ctx, state->tokens, state->type, pipe_shader_type); 966 else if (state->type == PIPE_SHADER_IR_NIR) { 967 sel = r600_create_shader_state_tokens(ctx, state->ir.nir, state->type, pipe_shader_type); 968 } else 969 assert(0 && "Unknown shader type\n"); 970 971 sel->ir_type = state->type; 972 sel->so = state->stream_output; 973 974 switch (pipe_shader_type) { 975 case PIPE_SHADER_GEOMETRY: 976 sel->gs_output_prim = 977 sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 978 sel->gs_max_out_vertices = 979 sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 980 sel->gs_num_invocations = 981 sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; 982 break; 983 case PIPE_SHADER_VERTEX: 984 case PIPE_SHADER_TESS_CTRL: 985 sel->lds_patch_outputs_written_mask = 0; 986 sel->lds_outputs_written_mask = 0; 987 988 for (i = 0; i < sel->info.num_outputs; i++) { 989 unsigned name = sel->info.output_semantic_name[i]; 990 unsigned index = sel->info.output_semantic_index[i]; 991 992 switch (name) { 993 case TGSI_SEMANTIC_TESSINNER: 994 case TGSI_SEMANTIC_TESSOUTER: 995 case TGSI_SEMANTIC_PATCH: 996 sel->lds_patch_outputs_written_mask |= 997 1ull << r600_get_lds_unique_index(name, index); 998 break; 999 default: 1000 sel->lds_outputs_written_mask |= 1001 1ull << r600_get_lds_unique_index(name, index); 1002 } 1003 } 1004 break; 1005 default: 1006 break; 1007 } 1008 1009 return sel; 1010} 1011 1012static void *r600_create_ps_state(struct pipe_context *ctx, 1013 const struct pipe_shader_state *state) 1014{ 1015 return r600_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); 1016} 1017 1018static void *r600_create_vs_state(struct pipe_context *ctx, 1019 const struct pipe_shader_state *state) 1020{ 1021 return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); 1022} 1023 1024static void *r600_create_gs_state(struct pipe_context *ctx, 1025 const struct pipe_shader_state *state) 1026{ 1027 return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); 1028} 1029 1030static void *r600_create_tcs_state(struct pipe_context *ctx, 1031 const struct pipe_shader_state *state) 1032{ 1033 return r600_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL); 1034} 1035 1036static void *r600_create_tes_state(struct pipe_context *ctx, 1037 const struct pipe_shader_state *state) 1038{ 1039 return r600_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL); 1040} 1041 1042static void r600_bind_ps_state(struct pipe_context *ctx, void *state) 1043{ 1044 struct r600_context *rctx = (struct r600_context *)ctx; 1045 1046 if (!state) 1047 state = rctx->dummy_pixel_shader; 1048 1049 rctx->ps_shader = (struct r600_pipe_shader_selector *)state; 1050} 1051 1052static struct tgsi_shader_info *r600_get_vs_info(struct r600_context *rctx) 1053{ 1054 if (rctx->gs_shader) 1055 return &rctx->gs_shader->info; 1056 else if (rctx->tes_shader) 1057 return &rctx->tes_shader->info; 1058 else if (rctx->vs_shader) 1059 return &rctx->vs_shader->info; 1060 else 1061 return NULL; 1062} 1063 1064static void r600_bind_vs_state(struct pipe_context *ctx, void *state) 1065{ 1066 struct r600_context *rctx = (struct r600_context *)ctx; 1067 1068 if (!state || rctx->vs_shader == state) 1069 return; 1070 1071 rctx->vs_shader = (struct r600_pipe_shader_selector *)state; 1072 r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx)); 1073 1074 if (rctx->vs_shader->so.num_outputs) 1075 rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride; 1076} 1077 1078static void r600_bind_gs_state(struct pipe_context *ctx, void *state) 1079{ 1080 struct r600_context *rctx = (struct r600_context *)ctx; 1081 1082 if (state == rctx->gs_shader) 1083 return; 1084 1085 rctx->gs_shader = (struct r600_pipe_shader_selector *)state; 1086 r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx)); 1087 1088 if (!state) 1089 return; 1090 1091 if (rctx->gs_shader->so.num_outputs) 1092 rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride; 1093} 1094 1095static void r600_bind_tcs_state(struct pipe_context *ctx, void *state) 1096{ 1097 struct r600_context *rctx = (struct r600_context *)ctx; 1098 1099 rctx->tcs_shader = (struct r600_pipe_shader_selector *)state; 1100} 1101 1102static void r600_bind_tes_state(struct pipe_context *ctx, void *state) 1103{ 1104 struct r600_context *rctx = (struct r600_context *)ctx; 1105 1106 if (state == rctx->tes_shader) 1107 return; 1108 1109 rctx->tes_shader = (struct r600_pipe_shader_selector *)state; 1110 r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx)); 1111 1112 if (!state) 1113 return; 1114 1115 if (rctx->tes_shader->so.num_outputs) 1116 rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride; 1117} 1118 1119void r600_delete_shader_selector(struct pipe_context *ctx, 1120 struct r600_pipe_shader_selector *sel) 1121{ 1122 struct r600_pipe_shader *p = sel->current, *c; 1123 while (p) { 1124 c = p->next_variant; 1125 r600_pipe_shader_destroy(ctx, p); 1126 free(p); 1127 p = c; 1128 } 1129 1130 if (sel->ir_type == PIPE_SHADER_IR_TGSI) { 1131 free(sel->tokens); 1132 /* We might have converted the TGSI shader to a NIR shader */ 1133 if (sel->nir) 1134 ralloc_free(sel->nir); 1135 } 1136 else if (sel->ir_type == PIPE_SHADER_IR_NIR) 1137 ralloc_free(sel->nir); 1138 free(sel); 1139} 1140 1141 1142static void r600_delete_ps_state(struct pipe_context *ctx, void *state) 1143{ 1144 struct r600_context *rctx = (struct r600_context *)ctx; 1145 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; 1146 1147 if (rctx->ps_shader == sel) { 1148 rctx->ps_shader = NULL; 1149 } 1150 1151 r600_delete_shader_selector(ctx, sel); 1152} 1153 1154static void r600_delete_vs_state(struct pipe_context *ctx, void *state) 1155{ 1156 struct r600_context *rctx = (struct r600_context *)ctx; 1157 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; 1158 1159 if (rctx->vs_shader == sel) { 1160 rctx->vs_shader = NULL; 1161 } 1162 1163 r600_delete_shader_selector(ctx, sel); 1164} 1165 1166 1167static void r600_delete_gs_state(struct pipe_context *ctx, void *state) 1168{ 1169 struct r600_context *rctx = (struct r600_context *)ctx; 1170 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; 1171 1172 if (rctx->gs_shader == sel) { 1173 rctx->gs_shader = NULL; 1174 } 1175 1176 r600_delete_shader_selector(ctx, sel); 1177} 1178 1179static void r600_delete_tcs_state(struct pipe_context *ctx, void *state) 1180{ 1181 struct r600_context *rctx = (struct r600_context *)ctx; 1182 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; 1183 1184 if (rctx->tcs_shader == sel) { 1185 rctx->tcs_shader = NULL; 1186 } 1187 1188 r600_delete_shader_selector(ctx, sel); 1189} 1190 1191static void r600_delete_tes_state(struct pipe_context *ctx, void *state) 1192{ 1193 struct r600_context *rctx = (struct r600_context *)ctx; 1194 struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; 1195 1196 if (rctx->tes_shader == sel) { 1197 rctx->tes_shader = NULL; 1198 } 1199 1200 r600_delete_shader_selector(ctx, sel); 1201} 1202 1203void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) 1204{ 1205 if (state->dirty_mask) { 1206 state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 1207 : util_bitcount(state->dirty_mask)*19; 1208 r600_mark_atom_dirty(rctx, &state->atom); 1209 } 1210} 1211 1212static void r600_set_constant_buffer(struct pipe_context *ctx, 1213 enum pipe_shader_type shader, uint index, 1214 bool take_ownership, 1215 const struct pipe_constant_buffer *input) 1216{ 1217 struct r600_context *rctx = (struct r600_context *)ctx; 1218 struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; 1219 struct pipe_constant_buffer *cb; 1220 const uint8_t *ptr; 1221 1222 /* Note that the gallium frontend can unbind constant buffers by 1223 * passing NULL here. 1224 */ 1225 if (unlikely(!input || (!input->buffer && !input->user_buffer))) { 1226 state->enabled_mask &= ~(1 << index); 1227 state->dirty_mask &= ~(1 << index); 1228 pipe_resource_reference(&state->cb[index].buffer, NULL); 1229 return; 1230 } 1231 1232 cb = &state->cb[index]; 1233 cb->buffer_size = input->buffer_size; 1234 1235 ptr = input->user_buffer; 1236 1237 if (ptr) { 1238 /* Upload the user buffer. */ 1239 if (R600_BIG_ENDIAN) { 1240 uint32_t *tmpPtr; 1241 unsigned i, size = input->buffer_size; 1242 1243 if (!(tmpPtr = malloc(size))) { 1244 R600_ERR("Failed to allocate BE swap buffer.\n"); 1245 return; 1246 } 1247 1248 for (i = 0; i < size / 4; ++i) { 1249 tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]); 1250 } 1251 1252 u_upload_data(ctx->stream_uploader, 0, size, 256, 1253 tmpPtr, &cb->buffer_offset, &cb->buffer); 1254 free(tmpPtr); 1255 } else { 1256 u_upload_data(ctx->stream_uploader, 0, 1257 input->buffer_size, 256, ptr, 1258 &cb->buffer_offset, &cb->buffer); 1259 } 1260 /* account it in gtt */ 1261 rctx->b.gtt += input->buffer_size; 1262 } else { 1263 /* Setup the hw buffer. */ 1264 cb->buffer_offset = input->buffer_offset; 1265 if (take_ownership) { 1266 pipe_resource_reference(&cb->buffer, NULL); 1267 cb->buffer = input->buffer; 1268 } else { 1269 pipe_resource_reference(&cb->buffer, input->buffer); 1270 } 1271 r600_context_add_resource_size(ctx, input->buffer); 1272 } 1273 1274 state->enabled_mask |= 1 << index; 1275 state->dirty_mask |= 1 << index; 1276 r600_constant_buffers_dirty(rctx, state); 1277} 1278 1279static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) 1280{ 1281 struct r600_context *rctx = (struct r600_context*)pipe; 1282 1283 if (rctx->sample_mask.sample_mask == (uint16_t)sample_mask) 1284 return; 1285 1286 rctx->sample_mask.sample_mask = sample_mask; 1287 r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom); 1288} 1289 1290void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_only) 1291{ 1292 int sh, size; 1293 void *ptr; 1294 struct pipe_constant_buffer cb; 1295 int start, end; 1296 1297 start = compute_only ? PIPE_SHADER_COMPUTE : 0; 1298 end = compute_only ? PIPE_SHADER_TYPES : PIPE_SHADER_COMPUTE; 1299 1300 for (sh = start; sh < end; sh++) { 1301 struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh]; 1302 if (!info->vs_ucp_dirty && 1303 !info->texture_const_dirty && 1304 !info->ps_sample_pos_dirty && 1305 !info->tcs_default_levels_dirty && 1306 !info->cs_block_grid_size_dirty) 1307 continue; 1308 1309 ptr = info->constants; 1310 size = info->alloc_size; 1311 if (info->vs_ucp_dirty) { 1312 assert(sh == PIPE_SHADER_VERTEX); 1313 if (!size) { 1314 ptr = rctx->clip_state.state.ucp; 1315 size = R600_UCP_SIZE; 1316 } else { 1317 memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE); 1318 } 1319 info->vs_ucp_dirty = false; 1320 } 1321 1322 else if (info->ps_sample_pos_dirty) { 1323 assert(sh == PIPE_SHADER_FRAGMENT); 1324 if (!size) { 1325 ptr = rctx->sample_positions; 1326 size = R600_UCP_SIZE; 1327 } else { 1328 memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); 1329 } 1330 info->ps_sample_pos_dirty = false; 1331 } 1332 1333 else if (info->cs_block_grid_size_dirty) { 1334 assert(sh == PIPE_SHADER_COMPUTE); 1335 if (!size) { 1336 ptr = rctx->cs_block_grid_sizes; 1337 size = R600_CS_BLOCK_GRID_SIZE; 1338 } else { 1339 memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE); 1340 } 1341 info->cs_block_grid_size_dirty = false; 1342 } 1343 1344 else if (info->tcs_default_levels_dirty) { 1345 /* 1346 * We'd only really need this for default tcs shader. 1347 */ 1348 assert(sh == PIPE_SHADER_TESS_CTRL); 1349 if (!size) { 1350 ptr = rctx->tess_state; 1351 size = R600_TCS_DEFAULT_LEVELS_SIZE; 1352 } else { 1353 memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE); 1354 } 1355 info->tcs_default_levels_dirty = false; 1356 } 1357 1358 if (info->texture_const_dirty) { 1359 assert (ptr); 1360 assert (size); 1361 if (sh == PIPE_SHADER_VERTEX) 1362 memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE); 1363 if (sh == PIPE_SHADER_FRAGMENT) 1364 memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); 1365 if (sh == PIPE_SHADER_COMPUTE) 1366 memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE); 1367 if (sh == PIPE_SHADER_TESS_CTRL) 1368 memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE); 1369 } 1370 info->texture_const_dirty = false; 1371 1372 cb.buffer = NULL; 1373 cb.user_buffer = ptr; 1374 cb.buffer_offset = 0; 1375 cb.buffer_size = size; 1376 rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, &cb); 1377 pipe_resource_reference(&cb.buffer, NULL); 1378 } 1379} 1380 1381static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type, 1382 unsigned array_size, uint32_t *base_offset) 1383{ 1384 struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type]; 1385 if (array_size + R600_UCP_SIZE > info->alloc_size) { 1386 info->constants = realloc(info->constants, array_size + R600_UCP_SIZE); 1387 info->alloc_size = array_size + R600_UCP_SIZE; 1388 } 1389 memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size); 1390 info->texture_const_dirty = true; 1391 *base_offset = R600_UCP_SIZE; 1392 return info->constants; 1393} 1394/* 1395 * On r600/700 hw we don't have vertex fetch swizzle, though TBO 1396 * doesn't require full swizzles it does need masking and setting alpha 1397 * to one, so we setup a set of 5 constants with the masks + alpha value 1398 * then in the shader, we AND the 4 components with 0xffffffff or 0, 1399 * then OR the alpha with the value given here. 1400 * We use a 6th constant to store the txq buffer size in 1401 * we use 7th slot for number of cube layers in a cube map array. 1402 */ 1403static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type) 1404{ 1405 struct r600_textures_info *samplers = &rctx->samplers[shader_type]; 1406 int bits; 1407 uint32_t array_size; 1408 int i, j; 1409 uint32_t *constants; 1410 uint32_t base_offset; 1411 if (!samplers->views.dirty_buffer_constants) 1412 return; 1413 1414 samplers->views.dirty_buffer_constants = FALSE; 1415 1416 bits = util_last_bit(samplers->views.enabled_mask); 1417 array_size = bits * 8 * sizeof(uint32_t); 1418 1419 constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset); 1420 1421 for (i = 0; i < bits; i++) { 1422 if (samplers->views.enabled_mask & (1 << i)) { 1423 int offset = (base_offset / 4) + i * 8; 1424 const struct util_format_description *desc; 1425 desc = util_format_description(samplers->views.views[i]->base.format); 1426 1427 for (j = 0; j < 4; j++) 1428 if (j < desc->nr_channels) 1429 constants[offset+j] = 0xffffffff; 1430 else 1431 constants[offset+j] = 0x0; 1432 if (desc->nr_channels < 4) { 1433 if (desc->channel[0].pure_integer) 1434 constants[offset+4] = 1; 1435 else 1436 constants[offset+4] = fui(1.0); 1437 } else 1438 constants[offset + 4] = 0; 1439 1440 constants[offset + 5] = samplers->views.views[i]->base.u.buf.size / 1441 util_format_get_blocksize(samplers->views.views[i]->base.format); 1442 constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6; 1443 } 1444 } 1445 1446} 1447 1448/* On evergreen we store one value 1449 * 1. number of cube layers in a cube map array. 1450 */ 1451void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) 1452{ 1453 struct r600_textures_info *samplers = &rctx->samplers[shader_type]; 1454 struct r600_image_state *images = NULL; 1455 int bits, sview_bits, img_bits; 1456 uint32_t array_size; 1457 int i; 1458 uint32_t *constants; 1459 uint32_t base_offset; 1460 1461 if (shader_type == PIPE_SHADER_FRAGMENT) { 1462 images = &rctx->fragment_images; 1463 } else if (shader_type == PIPE_SHADER_COMPUTE) { 1464 images = &rctx->compute_images; 1465 } 1466 1467 if (!samplers->views.dirty_buffer_constants && 1468 !(images && images->dirty_buffer_constants)) 1469 return; 1470 1471 if (images) 1472 images->dirty_buffer_constants = FALSE; 1473 samplers->views.dirty_buffer_constants = FALSE; 1474 1475 bits = sview_bits = util_last_bit(samplers->views.enabled_mask); 1476 if (images) 1477 bits += util_last_bit(images->enabled_mask); 1478 img_bits = bits; 1479 1480 array_size = bits * sizeof(uint32_t); 1481 1482 constants = r600_alloc_buf_consts(rctx, shader_type, array_size, 1483 &base_offset); 1484 1485 for (i = 0; i < sview_bits; i++) { 1486 if (samplers->views.enabled_mask & (1 << i)) { 1487 uint32_t offset = (base_offset / 4) + i; 1488 constants[offset] = samplers->views.views[i]->base.texture->array_size / 6; 1489 } 1490 } 1491 if (images) { 1492 for (i = sview_bits; i < img_bits; i++) { 1493 int idx = i - sview_bits; 1494 if (images->enabled_mask & (1 << idx)) { 1495 uint32_t offset = (base_offset / 4) + i; 1496 constants[offset] = images->views[idx].base.resource->array_size / 6; 1497 } 1498 } 1499 } 1500} 1501 1502/* set sample xy locations as array of fragment shader constants */ 1503void r600_set_sample_locations_constant_buffer(struct r600_context *rctx) 1504{ 1505 struct pipe_context *ctx = &rctx->b.b; 1506 1507 assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE); 1508 assert(rctx->framebuffer.nr_samples <= ARRAY_SIZE(rctx->sample_positions)/4); 1509 1510 memset(rctx->sample_positions, 0, 4 * 4 * 16); 1511 for (unsigned i = 0; i < rctx->framebuffer.nr_samples; i++) { 1512 ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]); 1513 /* Also fill in center-zeroed positions used for interpolateAtSample */ 1514 rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f; 1515 rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f; 1516 } 1517 1518 rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true; 1519} 1520 1521static void update_shader_atom(struct pipe_context *ctx, 1522 struct r600_shader_state *state, 1523 struct r600_pipe_shader *shader) 1524{ 1525 struct r600_context *rctx = (struct r600_context *)ctx; 1526 1527 state->shader = shader; 1528 if (shader) { 1529 state->atom.num_dw = shader->command_buffer.num_dw; 1530 r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo); 1531 } else { 1532 state->atom.num_dw = 0; 1533 } 1534 r600_mark_atom_dirty(rctx, &state->atom); 1535} 1536 1537static void update_gs_block_state(struct r600_context *rctx, unsigned enable) 1538{ 1539 if (rctx->shader_stages.geom_enable != enable) { 1540 rctx->shader_stages.geom_enable = enable; 1541 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); 1542 } 1543 1544 if (rctx->gs_rings.enable != enable) { 1545 rctx->gs_rings.enable = enable; 1546 r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom); 1547 1548 if (enable && !rctx->gs_rings.esgs_ring.buffer) { 1549 unsigned size = 0x1C000; 1550 rctx->gs_rings.esgs_ring.buffer = 1551 pipe_buffer_create(rctx->b.b.screen, 0, 1552 PIPE_USAGE_DEFAULT, size); 1553 rctx->gs_rings.esgs_ring.buffer_size = size; 1554 1555 size = 0x4000000; 1556 1557 rctx->gs_rings.gsvs_ring.buffer = 1558 pipe_buffer_create(rctx->b.b.screen, 0, 1559 PIPE_USAGE_DEFAULT, size); 1560 rctx->gs_rings.gsvs_ring.buffer_size = size; 1561 } 1562 1563 if (enable) { 1564 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, 1565 R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.esgs_ring); 1566 if (rctx->tes_shader) { 1567 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 1568 R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring); 1569 } else { 1570 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 1571 R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring); 1572 } 1573 } else { 1574 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, 1575 R600_GS_RING_CONST_BUFFER, false, NULL); 1576 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 1577 R600_GS_RING_CONST_BUFFER, false, NULL); 1578 r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 1579 R600_GS_RING_CONST_BUFFER, false, NULL); 1580 } 1581 } 1582} 1583 1584static void r600_update_clip_state(struct r600_context *rctx, 1585 struct r600_pipe_shader *current) 1586{ 1587 if (current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || 1588 current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write || 1589 current->shader.cull_dist_write != rctx->clip_misc_state.cull_dist_write || 1590 current->shader.vs_position_window_space != rctx->clip_misc_state.clip_disable || 1591 current->shader.vs_out_viewport != rctx->clip_misc_state.vs_out_viewport) { 1592 rctx->clip_misc_state.pa_cl_vs_out_cntl = current->pa_cl_vs_out_cntl; 1593 rctx->clip_misc_state.clip_dist_write = current->shader.clip_dist_write; 1594 rctx->clip_misc_state.cull_dist_write = current->shader.cull_dist_write; 1595 rctx->clip_misc_state.clip_disable = current->shader.vs_position_window_space; 1596 rctx->clip_misc_state.vs_out_viewport = current->shader.vs_out_viewport; 1597 r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom); 1598 } 1599} 1600 1601static void r600_generate_fixed_func_tcs(struct r600_context *rctx) 1602{ 1603 struct ureg_src const0, const1; 1604 struct ureg_dst tessouter, tessinner; 1605 struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL); 1606 1607 if (!ureg) 1608 return; /* if we get here, we're screwed */ 1609 1610 assert(!rctx->fixed_func_tcs_shader); 1611 1612 ureg_DECL_constant2D(ureg, 0, 1, R600_BUFFER_INFO_CONST_BUFFER); 1613 const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0), 1614 R600_BUFFER_INFO_CONST_BUFFER); 1615 const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1), 1616 R600_BUFFER_INFO_CONST_BUFFER); 1617 1618 tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0); 1619 tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0); 1620 1621 ureg_MOV(ureg, tessouter, const0); 1622 ureg_MOV(ureg, tessinner, const1); 1623 ureg_END(ureg); 1624 1625 rctx->fixed_func_tcs_shader = 1626 ureg_create_shader_and_destroy(ureg, &rctx->b.b); 1627} 1628 1629void r600_update_compressed_resource_state(struct r600_context *rctx, bool compute_only) 1630{ 1631 unsigned i; 1632 unsigned counter; 1633 1634 counter = p_atomic_read(&rctx->screen->b.compressed_colortex_counter); 1635 if (counter != rctx->b.last_compressed_colortex_counter) { 1636 rctx->b.last_compressed_colortex_counter = counter; 1637 1638 if (compute_only) { 1639 r600_update_compressed_colortex_mask(&rctx->samplers[PIPE_SHADER_COMPUTE].views); 1640 } else { 1641 for (i = 0; i < PIPE_SHADER_TYPES; ++i) { 1642 r600_update_compressed_colortex_mask(&rctx->samplers[i].views); 1643 } 1644 } 1645 if (!compute_only) 1646 r600_update_compressed_colortex_mask_images(&rctx->fragment_images); 1647 r600_update_compressed_colortex_mask_images(&rctx->compute_images); 1648 } 1649 1650 /* Decompress textures if needed. */ 1651 for (i = 0; i < PIPE_SHADER_TYPES; i++) { 1652 struct r600_samplerview_state *views = &rctx->samplers[i].views; 1653 1654 if (compute_only) 1655 if (i != PIPE_SHADER_COMPUTE) 1656 continue; 1657 if (views->compressed_depthtex_mask) { 1658 r600_decompress_depth_textures(rctx, views); 1659 } 1660 if (views->compressed_colortex_mask) { 1661 r600_decompress_color_textures(rctx, views); 1662 } 1663 } 1664 1665 { 1666 struct r600_image_state *istate; 1667 1668 if (!compute_only) { 1669 istate = &rctx->fragment_images; 1670 if (istate->compressed_depthtex_mask) 1671 r600_decompress_depth_images(rctx, istate); 1672 if (istate->compressed_colortex_mask) 1673 r600_decompress_color_images(rctx, istate); 1674 } 1675 1676 istate = &rctx->compute_images; 1677 if (istate->compressed_depthtex_mask) 1678 r600_decompress_depth_images(rctx, istate); 1679 if (istate->compressed_colortex_mask) 1680 r600_decompress_color_images(rctx, istate); 1681 } 1682} 1683 1684/* update MEM_SCRATCH buffers if needed */ 1685void r600_setup_scratch_area_for_shader(struct r600_context *rctx, 1686 struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, 1687 unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg) 1688{ 1689 unsigned num_ses = rctx->screen->b.info.max_se; 1690 unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; 1691 unsigned nthreads = 128; 1692 1693 unsigned itemsize = shader->scratch_space_needed * 4; 1694 unsigned size = align(itemsize * nthreads * num_pipes * num_ses * 4, 256); 1695 1696 if (scratch->dirty || 1697 unlikely(shader->scratch_space_needed != scratch->item_size || 1698 size > scratch->size)) { 1699 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1700 1701 scratch->dirty = false; 1702 1703 if (size > scratch->size) { 1704 // Release prior one if any 1705 if (scratch->buffer) { 1706 pipe_resource_reference((struct pipe_resource**)&scratch->buffer, NULL); 1707 } 1708 1709 scratch->buffer = (struct r600_resource *)pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, 1710 PIPE_USAGE_DEFAULT, size); 1711 if (scratch->buffer) { 1712 scratch->size = size; 1713 } 1714 } 1715 1716 scratch->item_size = shader->scratch_space_needed; 1717 1718 radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 1719 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1720 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 1721 1722 // multi-SE chips need programming per SE 1723 for (unsigned se = 0; se < num_ses; se++) { 1724 struct r600_resource *rbuffer = scratch->buffer; 1725 unsigned size_per_se = size / num_ses; 1726 1727 // Direct to particular SE 1728 if (num_ses > 1) { 1729 radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, 1730 S_0802C_INSTANCE_INDEX(0) | 1731 S_0802C_SE_INDEX(se) | 1732 S_0802C_INSTANCE_BROADCAST_WRITES(1) | 1733 S_0802C_SE_BROADCAST_WRITES(0)); 1734 } 1735 1736 radeon_set_config_reg(cs, ring_base_reg, (rbuffer->gpu_address + size_per_se * se) >> 8); 1737 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1738 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 1739 RADEON_USAGE_READWRITE, 1740 RADEON_PRIO_SCRATCH_BUFFER)); 1741 radeon_set_context_reg(cs, item_size_reg, itemsize); 1742 radeon_set_config_reg(cs, ring_size_reg, size_per_se >> 8); 1743 } 1744 1745 // Restore broadcast mode 1746 if (num_ses > 1) { 1747 radeon_set_config_reg(cs, EG_0802C_GRBM_GFX_INDEX, 1748 S_0802C_INSTANCE_INDEX(0) | 1749 S_0802C_SE_INDEX(0) | 1750 S_0802C_INSTANCE_BROADCAST_WRITES(1) | 1751 S_0802C_SE_BROADCAST_WRITES(1)); 1752 } 1753 1754 radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 1755 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1756 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 1757 } 1758} 1759 1760void r600_setup_scratch_buffers(struct r600_context *rctx) { 1761 static const struct { 1762 unsigned ring_base; 1763 unsigned item_size; 1764 unsigned ring_size; 1765 } regs[R600_NUM_HW_STAGES] = { 1766 [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, 1767 [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, 1768 [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, 1769 [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE } 1770 }; 1771 1772 for (unsigned i = 0; i < R600_NUM_HW_STAGES; i++) { 1773 struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; 1774 1775 if (stage && unlikely(stage->scratch_space_needed)) { 1776 r600_setup_scratch_area_for_shader(rctx, stage, 1777 &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); 1778 } 1779 } 1780} 1781 1782#define SELECT_SHADER_OR_FAIL(x) do { \ 1783 r600_shader_select(ctx, rctx->x##_shader, &x##_dirty); \ 1784 if (unlikely(!rctx->x##_shader->current)) \ 1785 return false; \ 1786 } while(0) 1787 1788#define UPDATE_SHADER(hw, sw) do { \ 1789 if (sw##_dirty || (rctx->hw_shader_stages[(hw)].shader != rctx->sw##_shader->current)) \ 1790 update_shader_atom(ctx, &rctx->hw_shader_stages[(hw)], rctx->sw##_shader->current); \ 1791 } while(0) 1792 1793#define UPDATE_SHADER_CLIP(hw, sw) do { \ 1794 if (sw##_dirty || (rctx->hw_shader_stages[(hw)].shader != rctx->sw##_shader->current)) { \ 1795 update_shader_atom(ctx, &rctx->hw_shader_stages[(hw)], rctx->sw##_shader->current); \ 1796 clip_so_current = rctx->sw##_shader->current; \ 1797 } \ 1798 } while(0) 1799 1800#define UPDATE_SHADER_GS(hw, hw2, sw) do { \ 1801 if (sw##_dirty || (rctx->hw_shader_stages[(hw)].shader != rctx->sw##_shader->current)) { \ 1802 update_shader_atom(ctx, &rctx->hw_shader_stages[(hw)], rctx->sw##_shader->current); \ 1803 update_shader_atom(ctx, &rctx->hw_shader_stages[(hw2)], rctx->sw##_shader->current->gs_copy_shader); \ 1804 clip_so_current = rctx->sw##_shader->current->gs_copy_shader; \ 1805 } \ 1806 } while(0) 1807 1808#define SET_NULL_SHADER(hw) do { \ 1809 if (rctx->hw_shader_stages[(hw)].shader) \ 1810 update_shader_atom(ctx, &rctx->hw_shader_stages[(hw)], NULL); \ 1811 } while (0) 1812 1813static bool r600_update_derived_state(struct r600_context *rctx) 1814{ 1815 struct pipe_context * ctx = (struct pipe_context*)rctx; 1816 bool ps_dirty = false, vs_dirty = false, gs_dirty = false; 1817 bool tcs_dirty = false, tes_dirty = false, fixed_func_tcs_dirty = false; 1818 bool blend_disable; 1819 bool need_buf_const; 1820 struct r600_pipe_shader *clip_so_current = NULL; 1821 1822 if (!rctx->blitter->running) 1823 r600_update_compressed_resource_state(rctx, false); 1824 1825 SELECT_SHADER_OR_FAIL(ps); 1826 1827 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); 1828 1829 update_gs_block_state(rctx, rctx->gs_shader != NULL); 1830 1831 if (rctx->gs_shader) 1832 SELECT_SHADER_OR_FAIL(gs); 1833 1834 /* Hull Shader */ 1835 if (rctx->tcs_shader) { 1836 SELECT_SHADER_OR_FAIL(tcs); 1837 1838 UPDATE_SHADER(EG_HW_STAGE_HS, tcs); 1839 } else if (rctx->tes_shader) { 1840 if (!rctx->fixed_func_tcs_shader) { 1841 r600_generate_fixed_func_tcs(rctx); 1842 if (!rctx->fixed_func_tcs_shader) 1843 return false; 1844 1845 } 1846 SELECT_SHADER_OR_FAIL(fixed_func_tcs); 1847 1848 UPDATE_SHADER(EG_HW_STAGE_HS, fixed_func_tcs); 1849 } else 1850 SET_NULL_SHADER(EG_HW_STAGE_HS); 1851 1852 if (rctx->tes_shader) { 1853 SELECT_SHADER_OR_FAIL(tes); 1854 } 1855 1856 SELECT_SHADER_OR_FAIL(vs); 1857 1858 if (rctx->gs_shader) { 1859 if (!rctx->shader_stages.geom_enable) { 1860 rctx->shader_stages.geom_enable = true; 1861 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); 1862 } 1863 1864 /* gs_shader provides GS and VS (copy shader) */ 1865 UPDATE_SHADER_GS(R600_HW_STAGE_GS, R600_HW_STAGE_VS, gs); 1866 1867 /* vs_shader is used as ES */ 1868 1869 if (rctx->tes_shader) { 1870 /* VS goes to LS, TES goes to ES */ 1871 UPDATE_SHADER(R600_HW_STAGE_ES, tes); 1872 UPDATE_SHADER(EG_HW_STAGE_LS, vs); 1873 } else { 1874 /* vs_shader is used as ES */ 1875 UPDATE_SHADER(R600_HW_STAGE_ES, vs); 1876 SET_NULL_SHADER(EG_HW_STAGE_LS); 1877 } 1878 } else { 1879 if (unlikely(rctx->hw_shader_stages[R600_HW_STAGE_GS].shader)) { 1880 SET_NULL_SHADER(R600_HW_STAGE_GS); 1881 SET_NULL_SHADER(R600_HW_STAGE_ES); 1882 rctx->shader_stages.geom_enable = false; 1883 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); 1884 } 1885 1886 if (rctx->tes_shader) { 1887 /* if TES is loaded and no geometry, TES runs on hw VS, VS runs on hw LS */ 1888 UPDATE_SHADER_CLIP(R600_HW_STAGE_VS, tes); 1889 UPDATE_SHADER(EG_HW_STAGE_LS, vs); 1890 } else { 1891 SET_NULL_SHADER(EG_HW_STAGE_LS); 1892 UPDATE_SHADER_CLIP(R600_HW_STAGE_VS, vs); 1893 } 1894 } 1895 1896 /* 1897 * XXX: I believe there's some fatal flaw in the dirty state logic when 1898 * enabling/disabling tes. 1899 * VS/ES share all buffer/resource/sampler slots. If TES is enabled, 1900 * it will therefore overwrite the VS slots. If it now gets disabled, 1901 * the VS needs to rebind all buffer/resource/sampler slots - not only 1902 * has TES overwritten the corresponding slots, but when the VS was 1903 * operating as LS the things with correpsonding dirty bits got bound 1904 * to LS slots and won't reflect what is dirty as VS stage even if the 1905 * TES didn't overwrite it. The story for re-enabled TES is similar. 1906 * In any case, we're not allowed to submit any TES state when 1907 * TES is disabled (the gallium frontend may not do this but this looks 1908 * like an optimization to me, not something which can be relied on). 1909 */ 1910 1911 /* Update clip misc state. */ 1912 if (clip_so_current) { 1913 r600_update_clip_state(rctx, clip_so_current); 1914 rctx->b.streamout.enabled_stream_buffers_mask = clip_so_current->enabled_stream_buffers_mask; 1915 } 1916 1917 if (unlikely(ps_dirty || rctx->hw_shader_stages[R600_HW_STAGE_PS].shader != rctx->ps_shader->current || 1918 rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable || 1919 rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)) { 1920 1921 if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs || 1922 rctx->cb_misc_state.ps_color_export_mask != rctx->ps_shader->current->ps_color_export_mask) { 1923 rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs; 1924 rctx->cb_misc_state.ps_color_export_mask = rctx->ps_shader->current->ps_color_export_mask; 1925 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 1926 } 1927 1928 if (rctx->b.chip_class <= R700) { 1929 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all; 1930 1931 if (rctx->cb_misc_state.multiwrite != multiwrite) { 1932 rctx->cb_misc_state.multiwrite = multiwrite; 1933 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 1934 } 1935 } 1936 1937 if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer && 1938 ((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) || 1939 (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) { 1940 1941 if (rctx->b.chip_class >= EVERGREEN) 1942 evergreen_update_ps_state(ctx, rctx->ps_shader->current); 1943 else 1944 r600_update_ps_state(ctx, rctx->ps_shader->current); 1945 } 1946 1947 r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); 1948 } 1949 UPDATE_SHADER(R600_HW_STAGE_PS, ps); 1950 1951 if (rctx->b.chip_class >= EVERGREEN) { 1952 evergreen_update_db_shader_control(rctx); 1953 } else { 1954 r600_update_db_shader_control(rctx); 1955 } 1956 1957 /* For each shader stage that needs to spill, set up buffer for MEM_SCRATCH */ 1958 if (rctx->b.chip_class >= EVERGREEN) { 1959 evergreen_setup_scratch_buffers(rctx); 1960 } else { 1961 r600_setup_scratch_buffers(rctx); 1962 } 1963 1964 /* on R600 we stuff masks + txq info into one constant buffer */ 1965 /* on evergreen we only need a txq info one */ 1966 if (rctx->ps_shader) { 1967 need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp; 1968 if (need_buf_const) { 1969 if (rctx->b.chip_class < EVERGREEN) 1970 r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); 1971 else 1972 eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT); 1973 } 1974 } 1975 1976 if (rctx->vs_shader) { 1977 need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp; 1978 if (need_buf_const) { 1979 if (rctx->b.chip_class < EVERGREEN) 1980 r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); 1981 else 1982 eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX); 1983 } 1984 } 1985 1986 if (rctx->gs_shader) { 1987 need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp; 1988 if (need_buf_const) { 1989 if (rctx->b.chip_class < EVERGREEN) 1990 r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); 1991 else 1992 eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY); 1993 } 1994 } 1995 1996 if (rctx->tes_shader) { 1997 assert(rctx->b.chip_class >= EVERGREEN); 1998 need_buf_const = rctx->tes_shader->current->shader.uses_tex_buffers || 1999 rctx->tes_shader->current->shader.has_txq_cube_array_z_comp; 2000 if (need_buf_const) { 2001 eg_setup_buffer_constants(rctx, PIPE_SHADER_TESS_EVAL); 2002 } 2003 if (rctx->tcs_shader) { 2004 need_buf_const = rctx->tcs_shader->current->shader.uses_tex_buffers || 2005 rctx->tcs_shader->current->shader.has_txq_cube_array_z_comp; 2006 if (need_buf_const) { 2007 eg_setup_buffer_constants(rctx, PIPE_SHADER_TESS_CTRL); 2008 } 2009 } 2010 } 2011 2012 r600_update_driver_const_buffers(rctx, false); 2013 2014 if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { 2015 if (!r600_adjust_gprs(rctx)) { 2016 /* discard rendering */ 2017 return false; 2018 } 2019 } 2020 2021 if (rctx->b.chip_class == EVERGREEN) { 2022 if (!evergreen_adjust_gprs(rctx)) { 2023 /* discard rendering */ 2024 return false; 2025 } 2026 } 2027 2028 blend_disable = (rctx->dual_src_blend && 2029 rctx->ps_shader->current->nr_ps_color_outputs < 2); 2030 2031 if (blend_disable != rctx->force_blend_disable) { 2032 rctx->force_blend_disable = blend_disable; 2033 r600_bind_blend_state_internal(rctx, 2034 rctx->blend_state.cso, 2035 blend_disable); 2036 } 2037 2038 return true; 2039} 2040 2041void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom) 2042{ 2043 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2044 struct r600_clip_misc_state *state = &rctx->clip_misc_state; 2045 2046 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 2047 state->pa_cl_clip_cntl | 2048 (state->clip_dist_write ? 0 : state->clip_plane_enable & 0x3F) | 2049 S_028810_CLIP_DISABLE(state->clip_disable)); 2050 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 2051 state->pa_cl_vs_out_cntl | 2052 (state->clip_plane_enable & state->clip_dist_write) | 2053 (state->cull_dist_write << 8)); 2054 /* reuse needs to be set off if we write oViewport */ 2055 if (rctx->b.chip_class >= EVERGREEN) 2056 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 2057 S_028AB4_REUSE_OFF(state->vs_out_viewport)); 2058} 2059 2060/* rast_prim is the primitive type after GS. */ 2061static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx) 2062{ 2063 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2064 enum pipe_prim_type rast_prim = rctx->current_rast_prim; 2065 2066 /* Skip this if not rendering lines. */ 2067 if (rast_prim != PIPE_PRIM_LINES && 2068 rast_prim != PIPE_PRIM_LINE_LOOP && 2069 rast_prim != PIPE_PRIM_LINE_STRIP && 2070 rast_prim != PIPE_PRIM_LINES_ADJACENCY && 2071 rast_prim != PIPE_PRIM_LINE_STRIP_ADJACENCY) 2072 return; 2073 2074 if (rast_prim == rctx->last_rast_prim) 2075 return; 2076 2077 /* For lines, reset the stipple pattern at each primitive. Otherwise, 2078 * reset the stipple pattern at each packet (line strips, line loops). 2079 */ 2080 radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, 2081 S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2) | 2082 (rctx->rasterizer ? rctx->rasterizer->pa_sc_line_stipple : 0)); 2083 rctx->last_rast_prim = rast_prim; 2084} 2085 2086static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info, 2087 unsigned drawid_offset, 2088 const struct pipe_draw_indirect_info *indirect, 2089 const struct pipe_draw_start_count_bias *draws, 2090 unsigned num_draws) 2091{ 2092 if (num_draws > 1) { 2093 util_draw_multi(ctx, info, drawid_offset, indirect, draws, num_draws); 2094 return; 2095 } 2096 2097 struct r600_context *rctx = (struct r600_context *)ctx; 2098 struct pipe_resource *indexbuf = !info->index_size || info->has_user_indices ? NULL : info->index.resource; 2099 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2100 bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; 2101 bool has_user_indices = info->index_size && info->has_user_indices; 2102 uint64_t mask; 2103 unsigned num_patches, dirty_tex_counter, index_offset = 0; 2104 unsigned index_size = info->index_size; 2105 int index_bias; 2106 struct r600_shader_atomic combined_atomics[8]; 2107 uint8_t atomic_used_mask = 0; 2108 struct pipe_stream_output_target *count_from_so = NULL; 2109 2110 if (indirect && indirect->count_from_stream_output) { 2111 count_from_so = indirect->count_from_stream_output; 2112 indirect = NULL; 2113 } 2114 2115 if (!indirect && !draws[0].count && (index_size || !count_from_so)) { 2116 return; 2117 } 2118 2119 if (unlikely(!rctx->vs_shader)) { 2120 assert(0); 2121 return; 2122 } 2123 if (unlikely(!rctx->ps_shader && 2124 (!rctx->rasterizer || !rctx->rasterizer->rasterizer_discard))) { 2125 assert(0); 2126 return; 2127 } 2128 2129 /* make sure that the gfx ring is only one active */ 2130 if (radeon_emitted(&rctx->b.dma.cs, 0)) { 2131 rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL); 2132 } 2133 2134 if (rctx->cmd_buf_is_compute) { 2135 rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL); 2136 rctx->cmd_buf_is_compute = false; 2137 } 2138 2139 /* Re-emit the framebuffer state if needed. */ 2140 dirty_tex_counter = p_atomic_read(&rctx->b.screen->dirty_tex_counter); 2141 if (unlikely(dirty_tex_counter != rctx->b.last_dirty_tex_counter)) { 2142 rctx->b.last_dirty_tex_counter = dirty_tex_counter; 2143 r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 2144 rctx->framebuffer.do_update_surf_dirtiness = true; 2145 } 2146 2147 if (rctx->gs_shader) { 2148 /* Determine whether the GS triangle strip adjacency fix should 2149 * be applied. Rotate every other triangle if 2150 * - triangle strips with adjacency are fed to the GS and 2151 * - primitive restart is disabled (the rotation doesn't help 2152 * when the restart occurs after an odd number of triangles). 2153 */ 2154 bool gs_tri_strip_adj_fix = 2155 !rctx->tes_shader && 2156 info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && 2157 !info->primitive_restart; 2158 if (gs_tri_strip_adj_fix != rctx->gs_tri_strip_adj_fix) 2159 rctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix; 2160 } 2161 if (!r600_update_derived_state(rctx)) { 2162 /* useless to render because current rendering command 2163 * can't be achieved 2164 */ 2165 return; 2166 } 2167 2168 rctx->current_rast_prim = (rctx->gs_shader)? rctx->gs_shader->gs_output_prim 2169 : (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] 2170 : info->mode; 2171 2172 if (rctx->b.chip_class >= EVERGREEN) { 2173 evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask); 2174 } 2175 2176 if (index_size) { 2177 index_offset += draws[0].start * index_size; 2178 2179 /* Translate 8-bit indices to 16-bit. */ 2180 if (unlikely(index_size == 1)) { 2181 struct pipe_resource *out_buffer = NULL; 2182 unsigned out_offset; 2183 void *ptr; 2184 unsigned start, count; 2185 2186 if (likely(!indirect)) { 2187 start = 0; 2188 count = draws[0].count; 2189 } 2190 else { 2191 /* Have to get start/count from indirect buffer, slow path ahead... */ 2192 struct r600_resource *indirect_resource = (struct r600_resource *)indirect->buffer; 2193 unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, 2194 PIPE_MAP_READ); 2195 if (data) { 2196 data += indirect->offset / sizeof(unsigned); 2197 start = data[2] * index_size; 2198 count = data[0]; 2199 } 2200 else { 2201 start = 0; 2202 count = 0; 2203 } 2204 } 2205 2206 u_upload_alloc(ctx->stream_uploader, start, count * 2, 2207 256, &out_offset, &out_buffer, &ptr); 2208 if (unlikely(!ptr)) 2209 return; 2210 2211 util_shorten_ubyte_elts_to_userptr( 2212 &rctx->b.b, info, 0, 0, index_offset, count, ptr); 2213 2214 indexbuf = out_buffer; 2215 index_offset = out_offset; 2216 index_size = 2; 2217 has_user_indices = false; 2218 } 2219 2220 /* Upload the index buffer. 2221 * The upload is skipped for small index counts on little-endian machines 2222 * and the indices are emitted via PKT3_DRAW_INDEX_IMMD. 2223 * Indirect draws never use immediate indices. 2224 * Note: Instanced rendering in combination with immediate indices hangs. */ 2225 if (has_user_indices && (R600_BIG_ENDIAN || indirect || 2226 info->instance_count > 1 || 2227 draws[0].count*index_size > 20)) { 2228 unsigned start_offset = draws[0].start * index_size; 2229 indexbuf = NULL; 2230 u_upload_data(ctx->stream_uploader, start_offset, 2231 draws[0].count * index_size, 256, 2232 (char*)info->index.user + start_offset, 2233 &index_offset, &indexbuf); 2234 index_offset -= start_offset; 2235 has_user_indices = false; 2236 } 2237 index_bias = draws->index_bias; 2238 } else { 2239 index_bias = indirect ? 0 : draws[0].start; 2240 } 2241 2242 /* Set the index offset and primitive restart. */ 2243 bool restart_index_changed = info->primitive_restart && 2244 rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info->restart_index; 2245 2246 if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info->primitive_restart || 2247 restart_index_changed || 2248 rctx->vgt_state.vgt_indx_offset != index_bias || 2249 (rctx->vgt_state.last_draw_was_indirect && !indirect)) { 2250 rctx->vgt_state.vgt_multi_prim_ib_reset_en = info->primitive_restart; 2251 rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info->restart_index; 2252 rctx->vgt_state.vgt_indx_offset = index_bias; 2253 r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom); 2254 } 2255 2256 /* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */ 2257 if (rctx->b.chip_class == R600) { 2258 rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; 2259 r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 2260 } 2261 2262 if (rctx->b.chip_class >= EVERGREEN) 2263 evergreen_setup_tess_constants(rctx, info, &num_patches); 2264 2265 /* Emit states. */ 2266 r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask)); 2267 r600_flush_emit(rctx); 2268 2269 mask = rctx->dirty_atoms; 2270 while (mask != 0) { 2271 r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]); 2272 } 2273 2274 if (rctx->b.chip_class >= EVERGREEN) { 2275 evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask); 2276 } 2277 2278 if (rctx->b.chip_class == CAYMAN) { 2279 /* Copied from radeonsi. */ 2280 unsigned primgroup_size = 128; /* recommended without a GS */ 2281 bool ia_switch_on_eop = false; 2282 bool partial_vs_wave = false; 2283 2284 if (rctx->gs_shader) 2285 primgroup_size = 64; /* recommended with a GS */ 2286 2287 if ((rctx->rasterizer && rctx->rasterizer->pa_sc_line_stipple) || 2288 (rctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) { 2289 ia_switch_on_eop = true; 2290 } 2291 2292 if (r600_get_strmout_en(&rctx->b)) 2293 partial_vs_wave = true; 2294 2295 radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM, 2296 S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | 2297 S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | 2298 S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1)); 2299 } 2300 2301 if (rctx->b.chip_class >= EVERGREEN) { 2302 uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, info, 2303 num_patches); 2304 2305 evergreen_set_ls_hs_config(rctx, cs, ls_hs_config); 2306 evergreen_set_lds_alloc(rctx, cs, rctx->lds_alloc); 2307 } 2308 2309 /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles, 2310 * even though it should have no effect on those. */ 2311 if (rctx->b.chip_class == R600 && rctx->rasterizer) { 2312 unsigned su_sc_mode_cntl = rctx->rasterizer->pa_su_sc_mode_cntl; 2313 unsigned prim = info->mode; 2314 2315 if (rctx->gs_shader) { 2316 prim = rctx->gs_shader->gs_output_prim; 2317 } 2318 prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */ 2319 2320 if (prim == V_028A6C_OUTPRIM_TYPE_POINTLIST || 2321 prim == V_028A6C_OUTPRIM_TYPE_LINESTRIP || 2322 info->mode == R600_PRIM_RECTANGLE_LIST) { 2323 su_sc_mode_cntl &= C_028814_CULL_FRONT; 2324 } 2325 radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl); 2326 } 2327 2328 /* Update start instance. */ 2329 if (!indirect && rctx->last_start_instance != info->start_instance) { 2330 radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance); 2331 rctx->last_start_instance = info->start_instance; 2332 } 2333 2334 /* Update the primitive type. */ 2335 if (rctx->last_primitive_type != info->mode) { 2336 r600_emit_rasterizer_prim_state(rctx); 2337 radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, 2338 r600_conv_pipe_prim(info->mode)); 2339 2340 rctx->last_primitive_type = info->mode; 2341 } 2342 2343 /* Draw packets. */ 2344 if (likely(!indirect)) { 2345 radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); 2346 radeon_emit(cs, info->instance_count); 2347 } else { 2348 uint64_t va = r600_resource(indirect->buffer)->gpu_address; 2349 assert(rctx->b.chip_class >= EVERGREEN); 2350 2351 // Invalidate so non-indirect draw calls reset this state 2352 rctx->vgt_state.last_draw_was_indirect = true; 2353 rctx->last_start_instance = -1; 2354 2355 radeon_emit(cs, PKT3(EG_PKT3_SET_BASE, 2, 0)); 2356 radeon_emit(cs, EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE); 2357 radeon_emit(cs, va); 2358 radeon_emit(cs, (va >> 32UL) & 0xFF); 2359 2360 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2361 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 2362 (struct r600_resource*)indirect->buffer, 2363 RADEON_USAGE_READ, 2364 RADEON_PRIO_DRAW_INDIRECT)); 2365 } 2366 2367 if (index_size) { 2368 radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); 2369 radeon_emit(cs, index_size == 4 ? 2370 (VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) : 2371 (VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0))); 2372 2373 if (has_user_indices) { 2374 unsigned size_bytes = draws[0].count*index_size; 2375 unsigned size_dw = align(size_bytes, 4) / 4; 2376 radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit)); 2377 radeon_emit(cs, draws[0].count); 2378 radeon_emit(cs, V_0287F0_DI_SRC_SEL_IMMEDIATE); 2379 radeon_emit_array(cs, info->index.user + draws[0].start * index_size, size_dw); 2380 } else { 2381 uint64_t va = r600_resource(indexbuf)->gpu_address + index_offset; 2382 2383 if (likely(!indirect)) { 2384 radeon_emit(cs, PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit)); 2385 radeon_emit(cs, va); 2386 radeon_emit(cs, (va >> 32UL) & 0xFF); 2387 radeon_emit(cs, draws[0].count); 2388 radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); 2389 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2390 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 2391 (struct r600_resource*)indexbuf, 2392 RADEON_USAGE_READ, 2393 RADEON_PRIO_INDEX_BUFFER)); 2394 } 2395 else { 2396 uint32_t max_size = (indexbuf->width0 - index_offset) / index_size; 2397 2398 radeon_emit(cs, PKT3(EG_PKT3_INDEX_BASE, 1, 0)); 2399 radeon_emit(cs, va); 2400 radeon_emit(cs, (va >> 32UL) & 0xFF); 2401 2402 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2403 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 2404 (struct r600_resource*)indexbuf, 2405 RADEON_USAGE_READ, 2406 RADEON_PRIO_INDEX_BUFFER)); 2407 2408 radeon_emit(cs, PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0)); 2409 radeon_emit(cs, max_size); 2410 2411 radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit)); 2412 radeon_emit(cs, indirect->offset); 2413 radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); 2414 } 2415 } 2416 } else { 2417 if (unlikely(count_from_so)) { 2418 struct r600_so_target *t = (struct r600_so_target*)count_from_so; 2419 uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; 2420 2421 radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); 2422 2423 radeon_emit(cs, PKT3(PKT3_COPY_DW, 4, 0)); 2424 radeon_emit(cs, COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG); 2425 radeon_emit(cs, va & 0xFFFFFFFFUL); /* src address lo */ 2426 radeon_emit(cs, (va >> 32UL) & 0xFFUL); /* src address hi */ 2427 radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); /* dst register */ 2428 radeon_emit(cs, 0); /* unused */ 2429 2430 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2431 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 2432 t->buf_filled_size, RADEON_USAGE_READ, 2433 RADEON_PRIO_SO_FILLED_SIZE)); 2434 } 2435 2436 if (likely(!indirect)) { 2437 radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); 2438 radeon_emit(cs, draws[0].count); 2439 } 2440 else { 2441 radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit)); 2442 radeon_emit(cs, indirect->offset); 2443 } 2444 radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | 2445 (count_from_so ? S_0287F0_USE_OPAQUE(1) : 0)); 2446 } 2447 2448 /* SMX returns CONTEXT_DONE too early workaround */ 2449 if (rctx->b.family == CHIP_R600 || 2450 rctx->b.family == CHIP_RV610 || 2451 rctx->b.family == CHIP_RV630 || 2452 rctx->b.family == CHIP_RV635) { 2453 /* if we have gs shader or streamout 2454 we need to do a wait idle after every draw */ 2455 if (rctx->gs_shader || r600_get_strmout_en(&rctx->b)) { 2456 radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 2457 } 2458 } 2459 2460 /* ES ring rolling over at EOP - workaround */ 2461 if (rctx->b.chip_class == R600) { 2462 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2463 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT)); 2464 } 2465 2466 2467 if (rctx->b.chip_class >= EVERGREEN) 2468 evergreen_emit_atomic_buffer_save(rctx, false, combined_atomics, &atomic_used_mask); 2469 2470 if (rctx->trace_buf) 2471 eg_trace_emit(rctx); 2472 2473 if (rctx->framebuffer.do_update_surf_dirtiness) { 2474 /* Set the depth buffer as dirty. */ 2475 if (rctx->framebuffer.state.zsbuf) { 2476 struct pipe_surface *surf = rctx->framebuffer.state.zsbuf; 2477 struct r600_texture *rtex = (struct r600_texture *)surf->texture; 2478 2479 rtex->dirty_level_mask |= 1 << surf->u.tex.level; 2480 2481 if (rtex->surface.has_stencil) 2482 rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2483 } 2484 if (rctx->framebuffer.compressed_cb_mask) { 2485 struct pipe_surface *surf; 2486 struct r600_texture *rtex; 2487 unsigned mask = rctx->framebuffer.compressed_cb_mask; 2488 2489 do { 2490 unsigned i = u_bit_scan(&mask); 2491 surf = rctx->framebuffer.state.cbufs[i]; 2492 rtex = (struct r600_texture*)surf->texture; 2493 2494 rtex->dirty_level_mask |= 1 << surf->u.tex.level; 2495 2496 } while (mask); 2497 } 2498 rctx->framebuffer.do_update_surf_dirtiness = false; 2499 } 2500 2501 if (index_size && indexbuf != info->index.resource) 2502 pipe_resource_reference(&indexbuf, NULL); 2503 rctx->b.num_draw_calls++; 2504} 2505 2506uint32_t r600_translate_stencil_op(int s_op) 2507{ 2508 switch (s_op) { 2509 case PIPE_STENCIL_OP_KEEP: 2510 return V_028800_STENCIL_KEEP; 2511 case PIPE_STENCIL_OP_ZERO: 2512 return V_028800_STENCIL_ZERO; 2513 case PIPE_STENCIL_OP_REPLACE: 2514 return V_028800_STENCIL_REPLACE; 2515 case PIPE_STENCIL_OP_INCR: 2516 return V_028800_STENCIL_INCR; 2517 case PIPE_STENCIL_OP_DECR: 2518 return V_028800_STENCIL_DECR; 2519 case PIPE_STENCIL_OP_INCR_WRAP: 2520 return V_028800_STENCIL_INCR_WRAP; 2521 case PIPE_STENCIL_OP_DECR_WRAP: 2522 return V_028800_STENCIL_DECR_WRAP; 2523 case PIPE_STENCIL_OP_INVERT: 2524 return V_028800_STENCIL_INVERT; 2525 default: 2526 R600_ERR("Unknown stencil op %d", s_op); 2527 assert(0); 2528 break; 2529 } 2530 return 0; 2531} 2532 2533uint32_t r600_translate_fill(uint32_t func) 2534{ 2535 switch(func) { 2536 case PIPE_POLYGON_MODE_FILL: 2537 return 2; 2538 case PIPE_POLYGON_MODE_LINE: 2539 return 1; 2540 case PIPE_POLYGON_MODE_POINT: 2541 return 0; 2542 default: 2543 assert(0); 2544 return 0; 2545 } 2546} 2547 2548unsigned r600_tex_wrap(unsigned wrap) 2549{ 2550 switch (wrap) { 2551 default: 2552 case PIPE_TEX_WRAP_REPEAT: 2553 return V_03C000_SQ_TEX_WRAP; 2554 case PIPE_TEX_WRAP_CLAMP: 2555 return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; 2556 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 2557 return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; 2558 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 2559 return V_03C000_SQ_TEX_CLAMP_BORDER; 2560 case PIPE_TEX_WRAP_MIRROR_REPEAT: 2561 return V_03C000_SQ_TEX_MIRROR; 2562 case PIPE_TEX_WRAP_MIRROR_CLAMP: 2563 return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 2564 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 2565 return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 2566 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 2567 return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; 2568 } 2569} 2570 2571unsigned r600_tex_mipfilter(unsigned filter) 2572{ 2573 switch (filter) { 2574 case PIPE_TEX_MIPFILTER_NEAREST: 2575 return V_03C000_SQ_TEX_Z_FILTER_POINT; 2576 case PIPE_TEX_MIPFILTER_LINEAR: 2577 return V_03C000_SQ_TEX_Z_FILTER_LINEAR; 2578 default: 2579 case PIPE_TEX_MIPFILTER_NONE: 2580 return V_03C000_SQ_TEX_Z_FILTER_NONE; 2581 } 2582} 2583 2584unsigned r600_tex_compare(unsigned compare) 2585{ 2586 switch (compare) { 2587 default: 2588 case PIPE_FUNC_NEVER: 2589 return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; 2590 case PIPE_FUNC_LESS: 2591 return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; 2592 case PIPE_FUNC_EQUAL: 2593 return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; 2594 case PIPE_FUNC_LEQUAL: 2595 return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 2596 case PIPE_FUNC_GREATER: 2597 return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; 2598 case PIPE_FUNC_NOTEQUAL: 2599 return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 2600 case PIPE_FUNC_GEQUAL: 2601 return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 2602 case PIPE_FUNC_ALWAYS: 2603 return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; 2604 } 2605} 2606 2607static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2608{ 2609 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2610 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2611 (linear_filter && 2612 (wrap == PIPE_TEX_WRAP_CLAMP || 2613 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2614} 2615 2616bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2617{ 2618 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2619 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2620 2621 return (state->border_color.ui[0] || state->border_color.ui[1] || 2622 state->border_color.ui[2] || state->border_color.ui[3]) && 2623 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2624 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2625 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2626} 2627 2628void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) 2629{ 2630 2631 struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2632 struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; 2633 2634 if (!shader) 2635 return; 2636 2637 r600_emit_command_buffer(cs, &shader->command_buffer); 2638 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2639 radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->bo, 2640 RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY)); 2641} 2642 2643unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, 2644 const unsigned char *swizzle_view, 2645 boolean vtx) 2646{ 2647 unsigned i; 2648 unsigned char swizzle[4]; 2649 unsigned result = 0; 2650 const uint32_t tex_swizzle_shift[4] = { 2651 16, 19, 22, 25, 2652 }; 2653 const uint32_t vtx_swizzle_shift[4] = { 2654 3, 6, 9, 12, 2655 }; 2656 const uint32_t swizzle_bit[4] = { 2657 0, 1, 2, 3, 2658 }; 2659 const uint32_t *swizzle_shift = tex_swizzle_shift; 2660 2661 if (vtx) 2662 swizzle_shift = vtx_swizzle_shift; 2663 2664 if (swizzle_view) { 2665 util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle); 2666 } else { 2667 memcpy(swizzle, swizzle_format, 4); 2668 } 2669 2670 /* Get swizzle. */ 2671 for (i = 0; i < 4; i++) { 2672 switch (swizzle[i]) { 2673 case PIPE_SWIZZLE_Y: 2674 result |= swizzle_bit[1] << swizzle_shift[i]; 2675 break; 2676 case PIPE_SWIZZLE_Z: 2677 result |= swizzle_bit[2] << swizzle_shift[i]; 2678 break; 2679 case PIPE_SWIZZLE_W: 2680 result |= swizzle_bit[3] << swizzle_shift[i]; 2681 break; 2682 case PIPE_SWIZZLE_0: 2683 result |= V_038010_SQ_SEL_0 << swizzle_shift[i]; 2684 break; 2685 case PIPE_SWIZZLE_1: 2686 result |= V_038010_SQ_SEL_1 << swizzle_shift[i]; 2687 break; 2688 default: /* PIPE_SWIZZLE_X */ 2689 result |= swizzle_bit[0] << swizzle_shift[i]; 2690 } 2691 } 2692 return result; 2693} 2694 2695/* texture format translate */ 2696uint32_t r600_translate_texformat(struct pipe_screen *screen, 2697 enum pipe_format format, 2698 const unsigned char *swizzle_view, 2699 uint32_t *word4_p, uint32_t *yuv_format_p, 2700 bool do_endian_swap) 2701{ 2702 struct r600_screen *rscreen = (struct r600_screen *)screen; 2703 uint32_t result = 0, word4 = 0, yuv_format = 0; 2704 const struct util_format_description *desc; 2705 boolean uniform = TRUE; 2706 bool is_srgb_valid = FALSE; 2707 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2708 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2709 const unsigned char swizzle_xxxy[4] = {0, 0, 0, 1}; 2710 const unsigned char swizzle_zyx1[4] = {2, 1, 0, 5}; 2711 const unsigned char swizzle_zyxw[4] = {2, 1, 0, 3}; 2712 2713 int i; 2714 const uint32_t sign_bit[4] = { 2715 S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), 2716 S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED), 2717 S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED), 2718 S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED) 2719 }; 2720 2721 /* Need to replace the specified texture formats in case of big-endian. 2722 * These formats are formats that have channels with number of bits 2723 * not divisible by 8. 2724 * Mesa conversion functions don't swap bits for those formats, and because 2725 * we transmit this over a serial bus to the GPU (PCIe), the 2726 * bit-endianess is important!!! 2727 * In case we have an "opposite" format, just use that for the swizzling 2728 * information. If we don't have such an "opposite" format, we need 2729 * to use a fixed swizzle info instead (see below) 2730 */ 2731 if (format == PIPE_FORMAT_R4A4_UNORM && do_endian_swap) 2732 format = PIPE_FORMAT_A4R4_UNORM; 2733 2734 desc = util_format_description(format); 2735 if (!desc) 2736 goto out_unknown; 2737 2738 /* Depth and stencil swizzling is handled separately. */ 2739 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) { 2740 /* Need to check for specific texture formats that don't have 2741 * an "opposite" format we can use. For those formats, we directly 2742 * specify the swizzling, which is the LE swizzling as defined in 2743 * u_format.csv 2744 */ 2745 if (do_endian_swap) { 2746 if (format == PIPE_FORMAT_L4A4_UNORM) 2747 word4 |= r600_get_swizzle_combined(swizzle_xxxy, swizzle_view, FALSE); 2748 else if (format == PIPE_FORMAT_B4G4R4A4_UNORM) 2749 word4 |= r600_get_swizzle_combined(swizzle_zyxw, swizzle_view, FALSE); 2750 else if (format == PIPE_FORMAT_B4G4R4X4_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM) 2751 word4 |= r600_get_swizzle_combined(swizzle_zyx1, swizzle_view, FALSE); 2752 else 2753 word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE); 2754 } else { 2755 word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE); 2756 } 2757 } 2758 2759 /* Colorspace (return non-RGB formats directly). */ 2760 switch (desc->colorspace) { 2761 /* Depth stencil formats */ 2762 case UTIL_FORMAT_COLORSPACE_ZS: 2763 switch (format) { 2764 /* Depth sampler formats. */ 2765 case PIPE_FORMAT_Z16_UNORM: 2766 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2767 result = FMT_16; 2768 goto out_word4; 2769 case PIPE_FORMAT_Z24X8_UNORM: 2770 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 2771 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2772 result = FMT_8_24; 2773 goto out_word4; 2774 case PIPE_FORMAT_X8Z24_UNORM: 2775 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2776 if (rscreen->b.chip_class < EVERGREEN) 2777 goto out_unknown; 2778 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE); 2779 result = FMT_24_8; 2780 goto out_word4; 2781 case PIPE_FORMAT_Z32_FLOAT: 2782 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2783 result = FMT_32_FLOAT; 2784 goto out_word4; 2785 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2786 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2787 result = FMT_X24_8_32_FLOAT; 2788 goto out_word4; 2789 /* Stencil sampler formats. */ 2790 case PIPE_FORMAT_S8_UINT: 2791 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2792 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2793 result = FMT_8; 2794 goto out_word4; 2795 case PIPE_FORMAT_X24S8_UINT: 2796 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2797 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE); 2798 result = FMT_8_24; 2799 goto out_word4; 2800 case PIPE_FORMAT_S8X24_UINT: 2801 if (rscreen->b.chip_class < EVERGREEN) 2802 goto out_unknown; 2803 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2804 word4 |= r600_get_swizzle_combined(swizzle_xxxx, swizzle_view, FALSE); 2805 result = FMT_24_8; 2806 goto out_word4; 2807 case PIPE_FORMAT_X32_S8X24_UINT: 2808 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2809 word4 |= r600_get_swizzle_combined(swizzle_yyyy, swizzle_view, FALSE); 2810 result = FMT_X24_8_32_FLOAT; 2811 goto out_word4; 2812 default: 2813 goto out_unknown; 2814 } 2815 2816 case UTIL_FORMAT_COLORSPACE_YUV: 2817 yuv_format |= (1 << 30); 2818 switch (format) { 2819 case PIPE_FORMAT_UYVY: 2820 case PIPE_FORMAT_YUYV: 2821 default: 2822 break; 2823 } 2824 goto out_unknown; /* XXX */ 2825 2826 case UTIL_FORMAT_COLORSPACE_SRGB: 2827 word4 |= S_038010_FORCE_DEGAMMA(1); 2828 break; 2829 2830 default: 2831 break; 2832 } 2833 2834 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 2835 switch (format) { 2836 case PIPE_FORMAT_RGTC1_SNORM: 2837 case PIPE_FORMAT_LATC1_SNORM: 2838 word4 |= sign_bit[0]; 2839 FALLTHROUGH; 2840 case PIPE_FORMAT_RGTC1_UNORM: 2841 case PIPE_FORMAT_LATC1_UNORM: 2842 result = FMT_BC4; 2843 goto out_word4; 2844 case PIPE_FORMAT_RGTC2_SNORM: 2845 case PIPE_FORMAT_LATC2_SNORM: 2846 word4 |= sign_bit[0] | sign_bit[1]; 2847 FALLTHROUGH; 2848 case PIPE_FORMAT_RGTC2_UNORM: 2849 case PIPE_FORMAT_LATC2_UNORM: 2850 result = FMT_BC5; 2851 goto out_word4; 2852 default: 2853 goto out_unknown; 2854 } 2855 } 2856 2857 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 2858 switch (format) { 2859 case PIPE_FORMAT_DXT1_RGB: 2860 case PIPE_FORMAT_DXT1_RGBA: 2861 case PIPE_FORMAT_DXT1_SRGB: 2862 case PIPE_FORMAT_DXT1_SRGBA: 2863 result = FMT_BC1; 2864 is_srgb_valid = TRUE; 2865 goto out_word4; 2866 case PIPE_FORMAT_DXT3_RGBA: 2867 case PIPE_FORMAT_DXT3_SRGBA: 2868 result = FMT_BC2; 2869 is_srgb_valid = TRUE; 2870 goto out_word4; 2871 case PIPE_FORMAT_DXT5_RGBA: 2872 case PIPE_FORMAT_DXT5_SRGBA: 2873 result = FMT_BC3; 2874 is_srgb_valid = TRUE; 2875 goto out_word4; 2876 default: 2877 goto out_unknown; 2878 } 2879 } 2880 2881 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 2882 if (rscreen->b.chip_class < EVERGREEN) 2883 goto out_unknown; 2884 2885 switch (format) { 2886 case PIPE_FORMAT_BPTC_RGBA_UNORM: 2887 case PIPE_FORMAT_BPTC_SRGBA: 2888 result = FMT_BC7; 2889 is_srgb_valid = TRUE; 2890 goto out_word4; 2891 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2892 word4 |= sign_bit[0] | sign_bit[1] | sign_bit[2]; 2893 FALLTHROUGH; 2894 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 2895 result = FMT_BC6; 2896 goto out_word4; 2897 default: 2898 goto out_unknown; 2899 } 2900 } 2901 2902 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2903 switch (format) { 2904 case PIPE_FORMAT_R8G8_B8G8_UNORM: 2905 case PIPE_FORMAT_G8R8_B8R8_UNORM: 2906 result = FMT_GB_GR; 2907 goto out_word4; 2908 case PIPE_FORMAT_G8R8_G8B8_UNORM: 2909 case PIPE_FORMAT_R8G8_R8B8_UNORM: 2910 result = FMT_BG_RG; 2911 goto out_word4; 2912 default: 2913 goto out_unknown; 2914 } 2915 } 2916 2917 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 2918 result = FMT_5_9_9_9_SHAREDEXP; 2919 goto out_word4; 2920 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 2921 result = FMT_10_11_11_FLOAT; 2922 goto out_word4; 2923 } 2924 2925 2926 for (i = 0; i < desc->nr_channels; i++) { 2927 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2928 word4 |= sign_bit[i]; 2929 } 2930 } 2931 2932 /* R8G8Bx_SNORM - XXX CxV8U8 */ 2933 2934 /* See whether the components are of the same size. */ 2935 for (i = 1; i < desc->nr_channels; i++) { 2936 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 2937 } 2938 2939 /* Non-uniform formats. */ 2940 if (!uniform) { 2941 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && 2942 desc->channel[0].pure_integer) 2943 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2944 switch(desc->nr_channels) { 2945 case 3: 2946 if (desc->channel[0].size == 5 && 2947 desc->channel[1].size == 6 && 2948 desc->channel[2].size == 5) { 2949 result = FMT_5_6_5; 2950 goto out_word4; 2951 } 2952 goto out_unknown; 2953 case 4: 2954 if (desc->channel[0].size == 5 && 2955 desc->channel[1].size == 5 && 2956 desc->channel[2].size == 5 && 2957 desc->channel[3].size == 1) { 2958 result = FMT_1_5_5_5; 2959 goto out_word4; 2960 } 2961 if (desc->channel[0].size == 10 && 2962 desc->channel[1].size == 10 && 2963 desc->channel[2].size == 10 && 2964 desc->channel[3].size == 2) { 2965 result = FMT_2_10_10_10; 2966 goto out_word4; 2967 } 2968 goto out_unknown; 2969 } 2970 goto out_unknown; 2971 } 2972 2973 /* Find the first non-VOID channel. */ 2974 for (i = 0; i < 4; i++) { 2975 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2976 break; 2977 } 2978 } 2979 2980 if (i == 4) 2981 goto out_unknown; 2982 2983 /* uniform formats */ 2984 switch (desc->channel[i].type) { 2985 case UTIL_FORMAT_TYPE_UNSIGNED: 2986 case UTIL_FORMAT_TYPE_SIGNED: 2987#if 0 2988 if (!desc->channel[i].normalized && 2989 desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { 2990 goto out_unknown; 2991 } 2992#endif 2993 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && 2994 desc->channel[i].pure_integer) 2995 word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); 2996 2997 switch (desc->channel[i].size) { 2998 case 4: 2999 switch (desc->nr_channels) { 3000 case 2: 3001 result = FMT_4_4; 3002 goto out_word4; 3003 case 4: 3004 result = FMT_4_4_4_4; 3005 goto out_word4; 3006 } 3007 goto out_unknown; 3008 case 8: 3009 switch (desc->nr_channels) { 3010 case 1: 3011 result = FMT_8; 3012 is_srgb_valid = TRUE; 3013 goto out_word4; 3014 case 2: 3015 result = FMT_8_8; 3016 goto out_word4; 3017 case 4: 3018 result = FMT_8_8_8_8; 3019 is_srgb_valid = TRUE; 3020 goto out_word4; 3021 } 3022 goto out_unknown; 3023 case 16: 3024 switch (desc->nr_channels) { 3025 case 1: 3026 result = FMT_16; 3027 goto out_word4; 3028 case 2: 3029 result = FMT_16_16; 3030 goto out_word4; 3031 case 4: 3032 result = FMT_16_16_16_16; 3033 goto out_word4; 3034 } 3035 goto out_unknown; 3036 case 32: 3037 switch (desc->nr_channels) { 3038 case 1: 3039 result = FMT_32; 3040 goto out_word4; 3041 case 2: 3042 result = FMT_32_32; 3043 goto out_word4; 3044 case 4: 3045 result = FMT_32_32_32_32; 3046 goto out_word4; 3047 } 3048 } 3049 goto out_unknown; 3050 3051 case UTIL_FORMAT_TYPE_FLOAT: 3052 switch (desc->channel[i].size) { 3053 case 16: 3054 switch (desc->nr_channels) { 3055 case 1: 3056 result = FMT_16_FLOAT; 3057 goto out_word4; 3058 case 2: 3059 result = FMT_16_16_FLOAT; 3060 goto out_word4; 3061 case 4: 3062 result = FMT_16_16_16_16_FLOAT; 3063 goto out_word4; 3064 } 3065 goto out_unknown; 3066 case 32: 3067 switch (desc->nr_channels) { 3068 case 1: 3069 result = FMT_32_FLOAT; 3070 goto out_word4; 3071 case 2: 3072 result = FMT_32_32_FLOAT; 3073 goto out_word4; 3074 case 4: 3075 result = FMT_32_32_32_32_FLOAT; 3076 goto out_word4; 3077 } 3078 } 3079 goto out_unknown; 3080 } 3081 3082out_word4: 3083 3084 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && !is_srgb_valid) 3085 return ~0; 3086 if (word4_p) 3087 *word4_p = word4; 3088 if (yuv_format_p) 3089 *yuv_format_p = yuv_format; 3090 return result; 3091out_unknown: 3092 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 3093 return ~0; 3094} 3095 3096uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format, 3097 bool do_endian_swap) 3098{ 3099 const struct util_format_description *desc = util_format_description(format); 3100 int channel = util_format_get_first_non_void_channel(format); 3101 bool is_float; 3102 if (!desc) 3103 return ~0U; 3104 3105#define HAS_SIZE(x,y,z,w) \ 3106 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 3107 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 3108 3109 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 3110 return V_0280A0_COLOR_10_11_11_FLOAT; 3111 3112 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || 3113 channel == -1) 3114 return ~0U; 3115 3116 is_float = desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT; 3117 3118 switch (desc->nr_channels) { 3119 case 1: 3120 switch (desc->channel[0].size) { 3121 case 8: 3122 return V_0280A0_COLOR_8; 3123 case 16: 3124 if (is_float) 3125 return V_0280A0_COLOR_16_FLOAT; 3126 else 3127 return V_0280A0_COLOR_16; 3128 case 32: 3129 if (is_float) 3130 return V_0280A0_COLOR_32_FLOAT; 3131 else 3132 return V_0280A0_COLOR_32; 3133 } 3134 break; 3135 case 2: 3136 if (desc->channel[0].size == desc->channel[1].size) { 3137 switch (desc->channel[0].size) { 3138 case 4: 3139 if (chip <= R700) 3140 return V_0280A0_COLOR_4_4; 3141 else 3142 return ~0U; /* removed on Evergreen */ 3143 case 8: 3144 return V_0280A0_COLOR_8_8; 3145 case 16: 3146 if (is_float) 3147 return V_0280A0_COLOR_16_16_FLOAT; 3148 else 3149 return V_0280A0_COLOR_16_16; 3150 case 32: 3151 if (is_float) 3152 return V_0280A0_COLOR_32_32_FLOAT; 3153 else 3154 return V_0280A0_COLOR_32_32; 3155 } 3156 } else if (HAS_SIZE(8,24,0,0)) { 3157 return (do_endian_swap ? V_0280A0_COLOR_8_24 : V_0280A0_COLOR_24_8); 3158 } else if (HAS_SIZE(24,8,0,0)) { 3159 return V_0280A0_COLOR_8_24; 3160 } 3161 break; 3162 case 3: 3163 if (HAS_SIZE(5,6,5,0)) { 3164 return V_0280A0_COLOR_5_6_5; 3165 } else if (HAS_SIZE(32,8,24,0)) { 3166 return V_0280A0_COLOR_X24_8_32_FLOAT; 3167 } 3168 break; 3169 case 4: 3170 if (desc->channel[0].size == desc->channel[1].size && 3171 desc->channel[0].size == desc->channel[2].size && 3172 desc->channel[0].size == desc->channel[3].size) { 3173 switch (desc->channel[0].size) { 3174 case 4: 3175 return V_0280A0_COLOR_4_4_4_4; 3176 case 8: 3177 return V_0280A0_COLOR_8_8_8_8; 3178 case 16: 3179 if (is_float) 3180 return V_0280A0_COLOR_16_16_16_16_FLOAT; 3181 else 3182 return V_0280A0_COLOR_16_16_16_16; 3183 case 32: 3184 if (is_float) 3185 return V_0280A0_COLOR_32_32_32_32_FLOAT; 3186 else 3187 return V_0280A0_COLOR_32_32_32_32; 3188 } 3189 } else if (HAS_SIZE(5,5,5,1)) { 3190 return V_0280A0_COLOR_1_5_5_5; 3191 } else if (HAS_SIZE(10,10,10,2)) { 3192 return V_0280A0_COLOR_2_10_10_10; 3193 } 3194 break; 3195 } 3196 return ~0U; 3197} 3198 3199uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap) 3200{ 3201 if (R600_BIG_ENDIAN) { 3202 switch(colorformat) { 3203 /* 8-bit buffers. */ 3204 case V_0280A0_COLOR_4_4: 3205 case V_0280A0_COLOR_8: 3206 return ENDIAN_NONE; 3207 3208 /* 16-bit buffers. */ 3209 case V_0280A0_COLOR_8_8: 3210 /* 3211 * No need to do endian swaps on array formats, 3212 * as mesa<-->pipe formats conversion take into account 3213 * the endianess 3214 */ 3215 return ENDIAN_NONE; 3216 3217 case V_0280A0_COLOR_5_6_5: 3218 case V_0280A0_COLOR_1_5_5_5: 3219 case V_0280A0_COLOR_4_4_4_4: 3220 case V_0280A0_COLOR_16: 3221 return (do_endian_swap ? ENDIAN_8IN16 : ENDIAN_NONE); 3222 3223 /* 32-bit buffers. */ 3224 case V_0280A0_COLOR_8_8_8_8: 3225 /* 3226 * No need to do endian swaps on array formats, 3227 * as mesa<-->pipe formats conversion take into account 3228 * the endianess 3229 */ 3230 return ENDIAN_NONE; 3231 3232 case V_0280A0_COLOR_2_10_10_10: 3233 case V_0280A0_COLOR_8_24: 3234 case V_0280A0_COLOR_24_8: 3235 case V_0280A0_COLOR_32_FLOAT: 3236 return (do_endian_swap ? ENDIAN_8IN32 : ENDIAN_NONE); 3237 3238 case V_0280A0_COLOR_16_16_FLOAT: 3239 case V_0280A0_COLOR_16_16: 3240 return ENDIAN_8IN16; 3241 3242 /* 64-bit buffers. */ 3243 case V_0280A0_COLOR_16_16_16_16: 3244 case V_0280A0_COLOR_16_16_16_16_FLOAT: 3245 return ENDIAN_8IN16; 3246 3247 case V_0280A0_COLOR_32_32_FLOAT: 3248 case V_0280A0_COLOR_32_32: 3249 case V_0280A0_COLOR_X24_8_32_FLOAT: 3250 return ENDIAN_8IN32; 3251 3252 /* 128-bit buffers. */ 3253 case V_0280A0_COLOR_32_32_32_32_FLOAT: 3254 case V_0280A0_COLOR_32_32_32_32: 3255 return ENDIAN_8IN32; 3256 default: 3257 return ENDIAN_NONE; /* Unsupported. */ 3258 } 3259 } else { 3260 return ENDIAN_NONE; 3261 } 3262} 3263 3264static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf) 3265{ 3266 struct r600_context *rctx = (struct r600_context*)ctx; 3267 struct r600_resource *rbuffer = r600_resource(buf); 3268 unsigned i, shader, mask; 3269 struct r600_pipe_sampler_view *view; 3270 3271 /* Reallocate the buffer in the same pipe_resource. */ 3272 r600_alloc_resource(&rctx->screen->b, rbuffer); 3273 3274 /* We changed the buffer, now we need to bind it where the old one was bound. */ 3275 /* Vertex buffers. */ 3276 mask = rctx->vertex_buffer_state.enabled_mask; 3277 while (mask) { 3278 i = u_bit_scan(&mask); 3279 if (rctx->vertex_buffer_state.vb[i].buffer.resource == &rbuffer->b.b) { 3280 rctx->vertex_buffer_state.dirty_mask |= 1 << i; 3281 r600_vertex_buffers_dirty(rctx); 3282 } 3283 } 3284 /* Streamout buffers. */ 3285 for (i = 0; i < rctx->b.streamout.num_targets; i++) { 3286 if (rctx->b.streamout.targets[i] && 3287 rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) { 3288 if (rctx->b.streamout.begin_emitted) { 3289 r600_emit_streamout_end(&rctx->b); 3290 } 3291 rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask; 3292 r600_streamout_buffers_dirty(&rctx->b); 3293 } 3294 } 3295 3296 /* Constant buffers. */ 3297 for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { 3298 struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; 3299 bool found = false; 3300 uint32_t mask = state->enabled_mask; 3301 3302 while (mask) { 3303 unsigned i = u_bit_scan(&mask); 3304 if (state->cb[i].buffer == &rbuffer->b.b) { 3305 found = true; 3306 state->dirty_mask |= 1 << i; 3307 } 3308 } 3309 if (found) { 3310 r600_constant_buffers_dirty(rctx, state); 3311 } 3312 } 3313 3314 /* Texture buffer objects - update the virtual addresses in descriptors. */ 3315 LIST_FOR_EACH_ENTRY(view, &rctx->texture_buffers, list) { 3316 if (view->base.texture == &rbuffer->b.b) { 3317 uint64_t offset = view->base.u.buf.offset; 3318 uint64_t va = rbuffer->gpu_address + offset; 3319 3320 view->tex_resource_words[0] = va; 3321 view->tex_resource_words[2] &= C_038008_BASE_ADDRESS_HI; 3322 view->tex_resource_words[2] |= S_038008_BASE_ADDRESS_HI(va >> 32); 3323 } 3324 } 3325 /* Texture buffer objects - make bindings dirty if needed. */ 3326 for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { 3327 struct r600_samplerview_state *state = &rctx->samplers[shader].views; 3328 bool found = false; 3329 uint32_t mask = state->enabled_mask; 3330 3331 while (mask) { 3332 unsigned i = u_bit_scan(&mask); 3333 if (state->views[i]->base.texture == &rbuffer->b.b) { 3334 found = true; 3335 state->dirty_mask |= 1 << i; 3336 } 3337 } 3338 if (found) { 3339 r600_sampler_views_dirty(rctx, state); 3340 } 3341 } 3342 3343 /* SSBOs */ 3344 struct r600_image_state *istate = &rctx->fragment_buffers; 3345 { 3346 uint32_t mask = istate->enabled_mask; 3347 bool found = false; 3348 while (mask) { 3349 unsigned i = u_bit_scan(&mask); 3350 if (istate->views[i].base.resource == &rbuffer->b.b) { 3351 found = true; 3352 istate->dirty_mask |= 1 << i; 3353 } 3354 } 3355 if (found) { 3356 r600_mark_atom_dirty(rctx, &istate->atom); 3357 } 3358 } 3359 3360} 3361 3362static void r600_set_active_query_state(struct pipe_context *ctx, bool enable) 3363{ 3364 struct r600_context *rctx = (struct r600_context*)ctx; 3365 3366 /* Pipeline stat & streamout queries. */ 3367 if (enable) { 3368 rctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 3369 rctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 3370 } else { 3371 rctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 3372 rctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 3373 } 3374 3375 /* Occlusion queries. */ 3376 if (rctx->db_misc_state.occlusion_queries_disabled != !enable) { 3377 rctx->db_misc_state.occlusion_queries_disabled = !enable; 3378 r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 3379 } 3380} 3381 3382static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3383 bool include_draw_vbo) 3384{ 3385 r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0); 3386} 3387 3388/* keep this at the end of this file, please */ 3389void r600_init_common_state_functions(struct r600_context *rctx) 3390{ 3391 rctx->b.b.create_fs_state = r600_create_ps_state; 3392 rctx->b.b.create_vs_state = r600_create_vs_state; 3393 rctx->b.b.create_gs_state = r600_create_gs_state; 3394 rctx->b.b.create_tcs_state = r600_create_tcs_state; 3395 rctx->b.b.create_tes_state = r600_create_tes_state; 3396 rctx->b.b.create_vertex_elements_state = r600_create_vertex_fetch_shader; 3397 rctx->b.b.bind_blend_state = r600_bind_blend_state; 3398 rctx->b.b.bind_depth_stencil_alpha_state = r600_bind_dsa_state; 3399 rctx->b.b.bind_sampler_states = r600_bind_sampler_states; 3400 rctx->b.b.bind_fs_state = r600_bind_ps_state; 3401 rctx->b.b.bind_rasterizer_state = r600_bind_rs_state; 3402 rctx->b.b.bind_vertex_elements_state = r600_bind_vertex_elements; 3403 rctx->b.b.bind_vs_state = r600_bind_vs_state; 3404 rctx->b.b.bind_gs_state = r600_bind_gs_state; 3405 rctx->b.b.bind_tcs_state = r600_bind_tcs_state; 3406 rctx->b.b.bind_tes_state = r600_bind_tes_state; 3407 rctx->b.b.delete_blend_state = r600_delete_blend_state; 3408 rctx->b.b.delete_depth_stencil_alpha_state = r600_delete_dsa_state; 3409 rctx->b.b.delete_fs_state = r600_delete_ps_state; 3410 rctx->b.b.delete_rasterizer_state = r600_delete_rs_state; 3411 rctx->b.b.delete_sampler_state = r600_delete_sampler_state; 3412 rctx->b.b.delete_vertex_elements_state = r600_delete_vertex_elements; 3413 rctx->b.b.delete_vs_state = r600_delete_vs_state; 3414 rctx->b.b.delete_gs_state = r600_delete_gs_state; 3415 rctx->b.b.delete_tcs_state = r600_delete_tcs_state; 3416 rctx->b.b.delete_tes_state = r600_delete_tes_state; 3417 rctx->b.b.set_blend_color = r600_set_blend_color; 3418 rctx->b.b.set_clip_state = r600_set_clip_state; 3419 rctx->b.b.set_constant_buffer = r600_set_constant_buffer; 3420 rctx->b.b.set_sample_mask = r600_set_sample_mask; 3421 rctx->b.b.set_stencil_ref = r600_set_pipe_stencil_ref; 3422 rctx->b.b.set_vertex_buffers = r600_set_vertex_buffers; 3423 rctx->b.b.set_sampler_views = r600_set_sampler_views; 3424 rctx->b.b.sampler_view_destroy = r600_sampler_view_destroy; 3425 rctx->b.b.memory_barrier = r600_memory_barrier; 3426 rctx->b.b.texture_barrier = r600_texture_barrier; 3427 rctx->b.b.set_stream_output_targets = r600_set_streamout_targets; 3428 rctx->b.b.set_active_query_state = r600_set_active_query_state; 3429 3430 rctx->b.b.draw_vbo = r600_draw_vbo; 3431 rctx->b.invalidate_buffer = r600_invalidate_buffer; 3432 rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space; 3433} 3434