1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26#ifndef R600_PIPE_H 27#define R600_PIPE_H 28 29#include "r600_pipe_common.h" 30#include "r600_cs.h" 31#include "r600_public.h" 32#include "pipe/p_defines.h" 33 34#include "util/u_suballoc.h" 35#include "util/list.h" 36#include "util/u_transfer.h" 37#include "util/u_memory.h" 38 39#include "tgsi/tgsi_scan.h" 40 41#define R600_NUM_ATOMS 56 42 43#define R600_MAX_IMAGES 8 44/* 45 * ranges reserved for images on evergreen 46 * first set for the immediate buffers, 47 * second for the actual resources for RESQ. 48 */ 49#define R600_IMAGE_IMMED_RESOURCE_OFFSET 160 50#define R600_IMAGE_REAL_RESOURCE_OFFSET 168 51 52/* read caches */ 53#define R600_CONTEXT_INV_VERTEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 0) 54#define R600_CONTEXT_INV_TEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 1) 55#define R600_CONTEXT_INV_CONST_CACHE (R600_CONTEXT_PRIVATE_FLAG << 2) 56/* read-write caches */ 57#define R600_CONTEXT_FLUSH_AND_INV (R600_CONTEXT_PRIVATE_FLAG << 3) 58#define R600_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4) 59#define R600_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5) 60#define R600_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6) 61#define R600_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7) 62/* engine synchronization */ 63#define R600_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) 64#define R600_CONTEXT_WAIT_3D_IDLE (R600_CONTEXT_PRIVATE_FLAG << 9) 65#define R600_CONTEXT_WAIT_CP_DMA_IDLE (R600_CONTEXT_PRIVATE_FLAG << 10) 66#define R600_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11) 67 68/* the number of CS dwords for flushing and drawing */ 69#define R600_MAX_FLUSH_CS_DWORDS 18 70#define R600_MAX_DRAW_CS_DWORDS 58 71#define R600_MAX_PFP_SYNC_ME_DWORDS 16 72 73#define EG_MAX_ATOMIC_BUFFERS 8 74 75#define R600_MAX_USER_CONST_BUFFERS 15 76#define R600_MAX_DRIVER_CONST_BUFFERS 3 77#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) 78#define R600_MAX_HW_CONST_BUFFERS 16 79 80/* start driver buffers after user buffers */ 81#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) 82#define R600_UCP_SIZE (4*4*8) 83#define R600_CS_BLOCK_GRID_SIZE (8 * 4) 84#define R600_TCS_DEFAULT_LEVELS_SIZE (6 * 4) 85#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE) 86 87/* 88 * We only access this buffer through vtx clauses hence it's fine to exist 89 * at index beyond 15. 90 */ 91#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) 92/* 93 * Note GS doesn't use a constant buffer binding, just a resource index, 94 * so it's fine to have it exist at index beyond 15. I.e. it's not actually 95 * a const buffer, just a buffer resource. 96 */ 97#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) 98/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit 99 * of 16 const buffers. 100 * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. 101 * 102 * In order to support d3d 11 mandated minimum of 15 user const buffers 103 * we'd have to squash all use cases into one driver buffer. 104 */ 105#define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4])) 106 107/* HW stages */ 108#define R600_HW_STAGE_PS 0 109#define R600_HW_STAGE_VS 1 110#define R600_HW_STAGE_GS 2 111#define R600_HW_STAGE_ES 3 112#define EG_HW_STAGE_LS 4 113#define EG_HW_STAGE_HS 5 114 115#define R600_NUM_HW_STAGES 4 116#define EG_NUM_HW_STAGES 6 117 118struct r600_context; 119struct r600_bytecode; 120union r600_shader_key; 121 122/* This is an atom containing GPU commands that never change. 123 * This is supposed to be copied directly into the CS. */ 124struct r600_command_buffer { 125 uint32_t *buf; 126 unsigned num_dw; 127 unsigned max_num_dw; 128 unsigned pkt_flags; 129}; 130 131struct r600_db_state { 132 struct r600_atom atom; 133 struct r600_surface *rsurf; 134}; 135 136struct r600_db_misc_state { 137 struct r600_atom atom; 138 bool occlusion_queries_disabled; 139 bool flush_depthstencil_through_cb; 140 bool flush_depth_inplace; 141 bool flush_stencil_inplace; 142 bool copy_depth, copy_stencil; 143 unsigned copy_sample; 144 unsigned log_samples; 145 unsigned db_shader_control; 146 bool htile_clear; 147 uint8_t ps_conservative_z; 148}; 149 150struct r600_cb_misc_state { 151 struct r600_atom atom; 152 unsigned cb_color_control; /* this comes from blend state */ 153 unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */ 154 unsigned nr_cbufs; 155 unsigned bound_cbufs_target_mask; 156 unsigned nr_ps_color_outputs; 157 unsigned ps_color_export_mask; 158 unsigned image_rat_enabled_mask; 159 unsigned buffer_rat_enabled_mask; 160 bool multiwrite; 161 bool dual_src_blend; 162}; 163 164struct r600_clip_misc_state { 165 struct r600_atom atom; 166 unsigned pa_cl_clip_cntl; /* from rasterizer */ 167 unsigned pa_cl_vs_out_cntl; /* from vertex shader */ 168 unsigned clip_plane_enable; /* from rasterizer */ 169 unsigned cc_dist_mask; /* from vertex shader */ 170 unsigned clip_dist_write; /* from vertex shader */ 171 unsigned cull_dist_write; /* from vertex shader */ 172 boolean clip_disable; /* from vertex shader */ 173 boolean vs_out_viewport; /* from vertex shader */ 174}; 175 176struct r600_alphatest_state { 177 struct r600_atom atom; 178 unsigned sx_alpha_test_control; /* this comes from dsa state */ 179 unsigned sx_alpha_ref; /* this comes from dsa state */ 180 bool bypass; 181 bool cb0_export_16bpc; /* from set_framebuffer_state */ 182}; 183 184struct r600_vgt_state { 185 struct r600_atom atom; 186 uint32_t vgt_multi_prim_ib_reset_en; 187 uint32_t vgt_multi_prim_ib_reset_indx; 188 uint32_t vgt_indx_offset; 189 bool last_draw_was_indirect; 190}; 191 192struct r600_blend_color { 193 struct r600_atom atom; 194 struct pipe_blend_color state; 195}; 196 197struct r600_clip_state { 198 struct r600_atom atom; 199 struct pipe_clip_state state; 200}; 201 202struct r600_cs_shader_state { 203 struct r600_atom atom; 204 unsigned kernel_index; 205 unsigned pc; 206 struct r600_pipe_compute *shader; 207}; 208 209struct r600_framebuffer { 210 struct r600_atom atom; 211 struct pipe_framebuffer_state state; 212 unsigned compressed_cb_mask; 213 unsigned nr_samples; 214 bool export_16bpc; 215 bool cb0_is_integer; 216 bool is_msaa_resolve; 217 bool dual_src_blend; 218 bool do_update_surf_dirtiness; 219}; 220 221struct r600_sample_mask { 222 struct r600_atom atom; 223 uint16_t sample_mask; /* there are only 8 bits on EG, 16 bits on Cayman */ 224}; 225 226struct r600_config_state { 227 struct r600_atom atom; 228 unsigned sq_gpr_resource_mgmt_1; 229 unsigned sq_gpr_resource_mgmt_2; 230 unsigned sq_gpr_resource_mgmt_3; 231 bool dyn_gpr_enabled; 232}; 233 234struct r600_stencil_ref 235{ 236 ubyte ref_value[2]; 237 ubyte valuemask[2]; 238 ubyte writemask[2]; 239}; 240 241struct r600_stencil_ref_state { 242 struct r600_atom atom; 243 struct r600_stencil_ref state; 244 struct pipe_stencil_ref pipe_state; 245}; 246 247struct r600_shader_stages_state { 248 struct r600_atom atom; 249 unsigned geom_enable; 250}; 251 252struct r600_gs_rings_state { 253 struct r600_atom atom; 254 unsigned enable; 255 struct pipe_constant_buffer esgs_ring; 256 struct pipe_constant_buffer gsvs_ring; 257}; 258 259/* This must start from 16. */ 260/* features */ 261#define DBG_NO_CP_DMA (1 << 30) 262/* shader backend */ 263#define DBG_NO_SB (1 << 21) 264#define DBG_SB_CS (1 << 22) 265#define DBG_SB_DRY_RUN (1 << 23) 266#define DBG_SB_STAT (1 << 24) 267#define DBG_SB_DUMP (1 << 25) 268#define DBG_SB_NO_FALLBACK (1 << 26) 269#define DBG_SB_DISASM (1 << 27) 270#define DBG_SB_SAFEMATH (1 << 28) 271 272struct r600_screen { 273 struct r600_common_screen b; 274 bool has_msaa; 275 bool has_compressed_msaa_texturing; 276 bool has_atomics; 277 278 /*for compute global memory binding, we allocate stuff here, instead of 279 * buffers. 280 * XXX: Not sure if this is the best place for global_pool. Also, 281 * it's not thread safe, so it won't work with multiple contexts. */ 282 struct compute_memory_pool *global_pool; 283}; 284 285struct r600_pipe_sampler_view { 286 struct pipe_sampler_view base; 287 struct list_head list; 288 struct r600_resource *tex_resource; 289 uint32_t tex_resource_words[8]; 290 bool skip_mip_address_reloc; 291 bool is_stencil_sampler; 292}; 293 294struct r600_rasterizer_state { 295 struct r600_command_buffer buffer; 296 boolean flatshade; 297 boolean two_side; 298 unsigned sprite_coord_enable; 299 unsigned clip_plane_enable; 300 unsigned pa_sc_line_stipple; 301 unsigned pa_cl_clip_cntl; 302 unsigned pa_su_sc_mode_cntl; 303 float offset_units; 304 float offset_scale; 305 bool offset_enable; 306 bool offset_units_unscaled; 307 bool scissor_enable; 308 bool multisample_enable; 309 bool clip_halfz; 310 bool rasterizer_discard; 311}; 312 313struct r600_poly_offset_state { 314 struct r600_atom atom; 315 enum pipe_format zs_format; 316 float offset_units; 317 float offset_scale; 318 bool offset_units_unscaled; 319}; 320 321struct r600_blend_state { 322 struct r600_command_buffer buffer; 323 struct r600_command_buffer buffer_no_blend; 324 unsigned cb_target_mask; 325 unsigned cb_color_control; 326 unsigned cb_color_control_no_blend; 327 bool dual_src_blend; 328 bool alpha_to_one; 329}; 330 331struct r600_dsa_state { 332 struct r600_command_buffer buffer; 333 unsigned alpha_ref; 334 ubyte valuemask[2]; 335 ubyte writemask[2]; 336 unsigned zwritemask; 337 unsigned sx_alpha_test_control; 338}; 339 340struct r600_pipe_shader; 341 342struct r600_pipe_shader_selector { 343 struct r600_pipe_shader *current; 344 345 struct tgsi_token *tokens; 346 struct pipe_stream_output_info so; 347 struct tgsi_shader_info info; 348 349 unsigned num_shaders; 350 351 enum pipe_shader_type type; 352 353 /* geometry shader properties */ 354 enum pipe_prim_type gs_output_prim; 355 unsigned gs_max_out_vertices; 356 unsigned gs_num_invocations; 357 358 /* TCS/VS */ 359 uint64_t lds_patch_outputs_written_mask; 360 uint64_t lds_outputs_written_mask; 361 unsigned nr_ps_max_color_exports; 362}; 363 364struct r600_pipe_sampler_state { 365 uint32_t tex_sampler_words[3]; 366 union pipe_color_union border_color; 367 bool border_color_use; 368 bool seamless_cube_map; 369}; 370 371/* needed for blitter save */ 372#define NUM_TEX_UNITS 16 373 374struct r600_seamless_cube_map { 375 struct r600_atom atom; 376 bool enabled; 377}; 378 379struct r600_samplerview_state { 380 struct r600_atom atom; 381 struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; 382 uint32_t enabled_mask; 383 uint32_t dirty_mask; 384 uint32_t compressed_depthtex_mask; /* which textures are depth */ 385 uint32_t compressed_colortex_mask; 386 boolean dirty_buffer_constants; 387}; 388 389struct r600_sampler_states { 390 struct r600_atom atom; 391 struct r600_pipe_sampler_state *states[NUM_TEX_UNITS]; 392 uint32_t enabled_mask; 393 uint32_t dirty_mask; 394 uint32_t has_bordercolor_mask; /* which states contain the border color */ 395}; 396 397struct r600_textures_info { 398 struct r600_samplerview_state views; 399 struct r600_sampler_states states; 400 bool is_array_sampler[NUM_TEX_UNITS]; 401}; 402 403struct r600_shader_driver_constants_info { 404 /* currently 128 bytes for UCP/samplepos + sampler buffer constants */ 405 uint32_t *constants; 406 uint32_t alloc_size; 407 bool texture_const_dirty; 408 bool vs_ucp_dirty; 409 bool ps_sample_pos_dirty; 410 bool cs_block_grid_size_dirty; 411 bool tcs_default_levels_dirty; 412}; 413 414struct r600_constbuf_state 415{ 416 struct r600_atom atom; 417 struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; 418 uint32_t enabled_mask; 419 uint32_t dirty_mask; 420}; 421 422struct r600_vertexbuf_state 423{ 424 struct r600_atom atom; 425 struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; 426 uint32_t enabled_mask; /* non-NULL buffers */ 427 uint32_t dirty_mask; 428}; 429 430/* CSO (constant state object, in other words, immutable state). */ 431struct r600_cso_state 432{ 433 struct r600_atom atom; 434 void *cso; /* e.g. r600_blend_state */ 435 struct r600_command_buffer *cb; 436}; 437 438struct r600_fetch_shader { 439 struct r600_resource *buffer; 440 unsigned offset; 441}; 442 443struct r600_shader_state { 444 struct r600_atom atom; 445 struct r600_pipe_shader *shader; 446}; 447 448struct r600_atomic_buffer_state { 449 struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS]; 450}; 451 452struct r600_image_view { 453 struct pipe_image_view base; 454 uint32_t cb_color_base; 455 uint32_t cb_color_pitch; 456 uint32_t cb_color_slice; 457 uint32_t cb_color_view; 458 uint32_t cb_color_info; 459 uint32_t cb_color_attrib; 460 uint32_t cb_color_dim; 461 uint32_t cb_color_fmask; 462 uint32_t cb_color_fmask_slice; 463 uint32_t immed_resource_words[8]; 464 uint32_t resource_words[8]; 465 bool skip_mip_address_reloc; 466 uint32_t buf_size; 467}; 468 469struct r600_image_state { 470 struct r600_atom atom; 471 uint32_t enabled_mask; 472 uint32_t dirty_mask; 473 uint32_t compressed_depthtex_mask; 474 uint32_t compressed_colortex_mask; 475 boolean dirty_buffer_constants; 476 struct r600_image_view views[R600_MAX_IMAGES]; 477}; 478 479/* Used to spill shader temps */ 480struct r600_scratch_buffer { 481 struct r600_resource *buffer; 482 boolean dirty; 483 unsigned size; 484 unsigned item_size; 485}; 486 487struct r600_context { 488 struct r600_common_context b; 489 struct r600_screen *screen; 490 struct blitter_context *blitter; 491 struct u_suballocator *allocator_fetch_shader; 492 493 /* Hardware info. */ 494 boolean has_vertex_cache; 495 unsigned default_gprs[EG_NUM_HW_STAGES]; 496 unsigned current_gprs[EG_NUM_HW_STAGES]; 497 unsigned r6xx_num_clause_temp_gprs; 498 499 /* Miscellaneous state objects. */ 500 void *custom_dsa_flush; 501 void *custom_blend_resolve; 502 void *custom_blend_decompress; 503 void *custom_blend_fastclear; 504 /* With rasterizer discard, there doesn't have to be a pixel shader. 505 * In that case, we bind this one: */ 506 void *dummy_pixel_shader; 507 /* These dummy CMASK and FMASK buffers are used to get around the R6xx hardware 508 * bug where valid CMASK and FMASK are required to be present to avoid 509 * a hardlock in certain operations but aren't actually used 510 * for anything useful. */ 511 struct r600_resource *dummy_fmask; 512 struct r600_resource *dummy_cmask; 513 514 /* State binding slots are here. */ 515 struct r600_atom *atoms[R600_NUM_ATOMS]; 516 /* Dirty atom bitmask for fast tests */ 517 uint64_t dirty_atoms; 518 /* States for CS initialization. */ 519 struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ 520 /** Compute specific registers initializations. The start_cs_cmd atom 521 * must be emitted before start_compute_cs_cmd. */ 522 struct r600_command_buffer start_compute_cs_cmd; 523 /* Register states. */ 524 struct r600_alphatest_state alphatest_state; 525 struct r600_cso_state blend_state; 526 struct r600_blend_color blend_color; 527 struct r600_cb_misc_state cb_misc_state; 528 struct r600_clip_misc_state clip_misc_state; 529 struct r600_clip_state clip_state; 530 struct r600_db_misc_state db_misc_state; 531 struct r600_db_state db_state; 532 struct r600_cso_state dsa_state; 533 struct r600_framebuffer framebuffer; 534 struct r600_poly_offset_state poly_offset_state; 535 struct r600_cso_state rasterizer_state; 536 struct r600_sample_mask sample_mask; 537 struct r600_seamless_cube_map seamless_cube_map; 538 struct r600_config_state config_state; 539 struct r600_stencil_ref_state stencil_ref; 540 struct r600_vgt_state vgt_state; 541 struct r600_atomic_buffer_state atomic_buffer_state; 542 /* only have images on fragment shader */ 543 struct r600_image_state fragment_images; 544 struct r600_image_state compute_images; 545 struct r600_image_state fragment_buffers; 546 struct r600_image_state compute_buffers; 547 /* Shaders and shader resources. */ 548 struct r600_cso_state vertex_fetch_shader; 549 struct r600_shader_state hw_shader_stages[EG_NUM_HW_STAGES]; 550 struct r600_cs_shader_state cs_shader_state; 551 struct r600_shader_stages_state shader_stages; 552 struct r600_gs_rings_state gs_rings; 553 struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; 554 struct r600_textures_info samplers[PIPE_SHADER_TYPES]; 555 556 struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES]; 557 558 /** Vertex buffers for fetch shaders */ 559 struct r600_vertexbuf_state vertex_buffer_state; 560 /** Vertex buffers for compute shaders */ 561 struct r600_vertexbuf_state cs_vertex_buffer_state; 562 563 /* Additional context states. */ 564 unsigned compute_cb_target_mask; 565 struct r600_pipe_shader_selector *ps_shader; 566 struct r600_pipe_shader_selector *vs_shader; 567 struct r600_pipe_shader_selector *gs_shader; 568 569 struct r600_pipe_shader_selector *tcs_shader; 570 struct r600_pipe_shader_selector *tes_shader; 571 572 struct r600_pipe_shader_selector *fixed_func_tcs_shader; 573 574 struct r600_rasterizer_state *rasterizer; 575 bool alpha_to_one; 576 bool force_blend_disable; 577 bool gs_tri_strip_adj_fix; 578 boolean dual_src_blend; 579 unsigned zwritemask; 580 unsigned ps_iter_samples; 581 582 /* The list of all texture buffer objects in this context. 583 * This list is walked when a buffer is invalidated/reallocated and 584 * the GPU addresses are updated. */ 585 struct list_head texture_buffers; 586 587 /* Last draw state (-1 = unset). */ 588 enum pipe_prim_type last_primitive_type; /* Last primitive type used in draw_vbo. */ 589 enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */ 590 enum pipe_prim_type last_rast_prim; 591 unsigned last_start_instance; 592 593 void *sb_context; 594 struct r600_isa *isa; 595 float sample_positions[4 * 16]; 596 float tess_state[8]; 597 uint32_t cs_block_grid_sizes[8]; /* 3 for grid + 1 pad, 3 for block + 1 pad*/ 598 struct r600_pipe_shader_selector *last_ls; 599 struct r600_pipe_shader_selector *last_tcs; 600 unsigned last_num_tcs_input_cp; 601 unsigned lds_alloc; 602 603 struct r600_scratch_buffer scratch_buffers[MAX2(R600_NUM_HW_STAGES, EG_NUM_HW_STAGES)]; 604 605 /* Debug state. */ 606 bool is_debug; 607 struct radeon_saved_cs last_gfx; 608 struct r600_resource *last_trace_buf; 609 struct r600_resource *trace_buf; 610 unsigned trace_id; 611 612 bool cmd_buf_is_compute; 613 struct pipe_resource *append_fence; 614 uint32_t append_fence_id; 615}; 616 617static inline void r600_emit_command_buffer(struct radeon_cmdbuf *cs, 618 struct r600_command_buffer *cb) 619{ 620 assert(cs->current.cdw + cb->num_dw <= cs->current.max_dw); 621 memcpy(cs->current.buf + cs->current.cdw, cb->buf, 4 * cb->num_dw); 622 cs->current.cdw += cb->num_dw; 623} 624 625static inline void r600_set_atom_dirty(struct r600_context *rctx, 626 struct r600_atom *atom, 627 bool dirty) 628{ 629 uint64_t mask; 630 631 assert(atom->id != 0); 632 assert(atom->id < sizeof(mask) * 8); 633 mask = 1ull << atom->id; 634 if (dirty) 635 rctx->dirty_atoms |= mask; 636 else 637 rctx->dirty_atoms &= ~mask; 638} 639 640static inline void r600_mark_atom_dirty(struct r600_context *rctx, 641 struct r600_atom *atom) 642{ 643 r600_set_atom_dirty(rctx, atom, true); 644} 645 646static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) 647{ 648 atom->emit(&rctx->b, atom); 649 r600_set_atom_dirty(rctx, atom, false); 650} 651 652static inline void r600_set_cso_state(struct r600_context *rctx, 653 struct r600_cso_state *state, void *cso) 654{ 655 state->cso = cso; 656 r600_set_atom_dirty(rctx, &state->atom, cso != NULL); 657} 658 659static inline void r600_set_cso_state_with_cb(struct r600_context *rctx, 660 struct r600_cso_state *state, void *cso, 661 struct r600_command_buffer *cb) 662{ 663 state->cb = cb; 664 state->atom.num_dw = cb ? cb->num_dw : 0; 665 r600_set_cso_state(rctx, state, cso); 666} 667 668/* compute_memory_pool.c */ 669struct compute_memory_pool; 670void compute_memory_pool_delete(struct compute_memory_pool* pool); 671struct compute_memory_pool* compute_memory_pool_new( 672 struct r600_screen *rscreen); 673 674/* evergreen_state.c */ 675struct pipe_sampler_view * 676evergreen_create_sampler_view_custom(struct pipe_context *ctx, 677 struct pipe_resource *texture, 678 const struct pipe_sampler_view *state, 679 unsigned width0, unsigned height0, 680 unsigned force_level); 681void evergreen_init_common_regs(struct r600_context *ctx, 682 struct r600_command_buffer *cb, 683 enum chip_class ctx_chip_class, 684 enum radeon_family ctx_family, 685 int ctx_drm_minor); 686void cayman_init_common_regs(struct r600_command_buffer *cb, 687 enum chip_class ctx_chip_class, 688 enum radeon_family ctx_family, 689 int ctx_drm_minor); 690 691void evergreen_init_state_functions(struct r600_context *rctx); 692void evergreen_init_atom_start_cs(struct r600_context *rctx); 693void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 694void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 695void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 696void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 697void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 698void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 699void *evergreen_create_db_flush_dsa(struct r600_context *rctx); 700void *evergreen_create_resolve_blend(struct r600_context *rctx); 701void *evergreen_create_decompress_blend(struct r600_context *rctx); 702void *evergreen_create_fastclear_blend(struct r600_context *rctx); 703boolean evergreen_is_format_supported(struct pipe_screen *screen, 704 enum pipe_format format, 705 enum pipe_texture_target target, 706 unsigned sample_count, 707 unsigned storage_sample_count, 708 unsigned usage); 709void evergreen_init_color_surface(struct r600_context *rctx, 710 struct r600_surface *surf); 711void evergreen_init_color_surface_rat(struct r600_context *rctx, 712 struct r600_surface *surf); 713void evergreen_update_db_shader_control(struct r600_context * rctx); 714bool evergreen_adjust_gprs(struct r600_context *rctx); 715void evergreen_setup_scratch_buffers(struct r600_context *rctx); 716uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a, 717 unsigned nr_cbufs); 718/* r600_blit.c */ 719void r600_init_blit_functions(struct r600_context *rctx); 720void r600_decompress_depth_textures(struct r600_context *rctx, 721 struct r600_samplerview_state *textures); 722void r600_decompress_depth_images(struct r600_context *rctx, 723 struct r600_image_state *images); 724void r600_decompress_color_textures(struct r600_context *rctx, 725 struct r600_samplerview_state *textures); 726void r600_decompress_color_images(struct r600_context *rctx, 727 struct r600_image_state *images); 728void r600_resource_copy_region(struct pipe_context *ctx, 729 struct pipe_resource *dst, 730 unsigned dst_level, 731 unsigned dstx, unsigned dsty, unsigned dstz, 732 struct pipe_resource *src, 733 unsigned src_level, 734 const struct pipe_box *src_box); 735 736/* r600_shader.c */ 737int r600_pipe_shader_create(struct pipe_context *ctx, 738 struct r600_pipe_shader *shader, 739 union r600_shader_key key); 740 741void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); 742 743/* r600_state.c */ 744struct pipe_sampler_view * 745r600_create_sampler_view_custom(struct pipe_context *ctx, 746 struct pipe_resource *texture, 747 const struct pipe_sampler_view *state, 748 unsigned width_first_level, unsigned height_first_level); 749void r600_init_state_functions(struct r600_context *rctx); 750void r600_init_atom_start_cs(struct r600_context *rctx); 751void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 752void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 753void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 754void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); 755void *r600_create_db_flush_dsa(struct r600_context *rctx); 756void *r600_create_resolve_blend(struct r600_context *rctx); 757void *r700_create_resolve_blend(struct r600_context *rctx); 758void *r600_create_decompress_blend(struct r600_context *rctx); 759bool r600_adjust_gprs(struct r600_context *rctx); 760boolean r600_is_format_supported(struct pipe_screen *screen, 761 enum pipe_format format, 762 enum pipe_texture_target target, 763 unsigned sample_count, 764 unsigned storage_sample_count, 765 unsigned usage); 766void r600_update_db_shader_control(struct r600_context * rctx); 767void r600_setup_scratch_buffers(struct r600_context *rctx); 768 769/* r600_hw_context.c */ 770void r600_context_gfx_flush(void *context, unsigned flags, 771 struct pipe_fence_handle **fence); 772void r600_begin_new_cs(struct r600_context *ctx); 773void r600_flush_emit(struct r600_context *ctx); 774void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics); 775void r600_emit_pfp_sync_me(struct r600_context *rctx); 776void r600_cp_dma_copy_buffer(struct r600_context *rctx, 777 struct pipe_resource *dst, uint64_t dst_offset, 778 struct pipe_resource *src, uint64_t src_offset, 779 unsigned size); 780void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, 781 struct pipe_resource *dst, uint64_t offset, 782 unsigned size, uint32_t clear_value, 783 enum r600_coherency coher); 784void r600_dma_copy_buffer(struct r600_context *rctx, 785 struct pipe_resource *dst, 786 struct pipe_resource *src, 787 uint64_t dst_offset, 788 uint64_t src_offset, 789 uint64_t size); 790 791/* 792 * evergreen_hw_context.c 793 */ 794void evergreen_dma_copy_buffer(struct r600_context *rctx, 795 struct pipe_resource *dst, 796 struct pipe_resource *src, 797 uint64_t dst_offset, 798 uint64_t src_offset, 799 uint64_t size); 800void evergreen_setup_tess_constants(struct r600_context *rctx, 801 const struct pipe_draw_info *info, 802 unsigned *num_patches); 803uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, 804 const struct pipe_draw_info *info, 805 unsigned num_patches); 806void evergreen_set_ls_hs_config(struct r600_context *rctx, 807 struct radeon_cmdbuf *cs, 808 uint32_t ls_hs_config); 809void evergreen_set_lds_alloc(struct r600_context *rctx, 810 struct radeon_cmdbuf *cs, 811 uint32_t lds_alloc); 812 813/* r600_state_common.c */ 814void r600_init_common_state_functions(struct r600_context *rctx); 815void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom); 816void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom); 817void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom); 818void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom); 819void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom); 820void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom); 821void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a); 822void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id); 823void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, 824 void (*emit)(struct r600_context *ctx, struct r600_atom *state), 825 unsigned num_dw); 826void r600_vertex_buffers_dirty(struct r600_context *rctx); 827void r600_sampler_views_dirty(struct r600_context *rctx, 828 struct r600_samplerview_state *state); 829void r600_sampler_states_dirty(struct r600_context *rctx, 830 struct r600_sampler_states *state); 831void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); 832void r600_set_sample_locations_constant_buffer(struct r600_context *rctx); 833void r600_setup_scratch_area_for_shader(struct r600_context *rctx, 834 struct r600_pipe_shader *shader, struct r600_scratch_buffer *scratch, 835 unsigned ring_base_reg, unsigned item_size_reg, unsigned ring_size_reg); 836uint32_t r600_translate_stencil_op(int s_op); 837uint32_t r600_translate_fill(uint32_t func); 838unsigned r600_tex_wrap(unsigned wrap); 839unsigned r600_tex_mipfilter(unsigned filter); 840unsigned r600_tex_compare(unsigned compare); 841bool sampler_state_needs_border_color(const struct pipe_sampler_state *state); 842unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, 843 const unsigned char *swizzle_view, 844 boolean vtx); 845uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format, 846 const unsigned char *swizzle_view, 847 uint32_t *word4_p, uint32_t *yuv_format_p, 848 bool do_endian_swap); 849uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format, 850 bool do_endian_swap); 851uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap); 852 853/* r600_uvd.c */ 854struct pipe_video_codec *r600_uvd_create_decoder(struct pipe_context *context, 855 const struct pipe_video_codec *decoder); 856 857struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, 858 const struct pipe_video_buffer *tmpl); 859 860/* 861 * Helpers for building command buffers 862 */ 863 864#define PKT3_SET_CONFIG_REG 0x68 865#define PKT3_SET_CONTEXT_REG 0x69 866#define PKT3_SET_CTL_CONST 0x6F 867#define PKT3_SET_LOOP_CONST 0x6C 868 869#define R600_CONFIG_REG_OFFSET 0x08000 870#define R600_CONTEXT_REG_OFFSET 0x28000 871#define R600_CTL_CONST_OFFSET 0x3CFF0 872#define R600_LOOP_CONST_OFFSET 0X0003E200 873#define EG_LOOP_CONST_OFFSET 0x0003A200 874 875#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) 876#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) 877#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8) 878#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) 879#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) 880 881#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002 882 883/*Evergreen Compute packet3*/ 884#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE) 885 886static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value) 887{ 888 cb->buf[cb->num_dw++] = value; 889} 890 891static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) 892{ 893 assert(cb->num_dw+num <= cb->max_num_dw); 894 memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0])); 895 cb->num_dw += num; 896} 897 898static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) 899{ 900 assert(reg < R600_CONTEXT_REG_OFFSET); 901 assert(cb->num_dw+2+num <= cb->max_num_dw); 902 cb->buf[cb->num_dw++] = PKT3(PKT3_SET_CONFIG_REG, num, 0); 903 cb->buf[cb->num_dw++] = (reg - R600_CONFIG_REG_OFFSET) >> 2; 904} 905 906/** 907 * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute 908 * shaders. 909 */ 910static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) 911{ 912 assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET); 913 assert(cb->num_dw+2+num <= cb->max_num_dw); 914 cb->buf[cb->num_dw++] = PKT3(PKT3_SET_CONTEXT_REG, num, 0) | cb->pkt_flags; 915 cb->buf[cb->num_dw++] = (reg - R600_CONTEXT_REG_OFFSET) >> 2; 916} 917 918/** 919 * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute 920 * shaders. 921 */ 922static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) 923{ 924 assert(reg >= R600_CTL_CONST_OFFSET); 925 assert(cb->num_dw+2+num <= cb->max_num_dw); 926 cb->buf[cb->num_dw++] = PKT3(PKT3_SET_CTL_CONST, num, 0) | cb->pkt_flags; 927 cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2; 928} 929 930static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) 931{ 932 assert(reg >= R600_LOOP_CONST_OFFSET); 933 assert(cb->num_dw+2+num <= cb->max_num_dw); 934 cb->buf[cb->num_dw++] = PKT3(PKT3_SET_LOOP_CONST, num, 0); 935 cb->buf[cb->num_dw++] = (reg - R600_LOOP_CONST_OFFSET) >> 2; 936} 937 938/** 939 * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute 940 * shaders. 941 */ 942static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) 943{ 944 assert(reg >= EG_LOOP_CONST_OFFSET); 945 assert(cb->num_dw+2+num <= cb->max_num_dw); 946 cb->buf[cb->num_dw++] = PKT3(PKT3_SET_LOOP_CONST, num, 0) | cb->pkt_flags; 947 cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2; 948} 949 950static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) 951{ 952 r600_store_config_reg_seq(cb, reg, 1); 953 r600_store_value(cb, value); 954} 955 956static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) 957{ 958 r600_store_context_reg_seq(cb, reg, 1); 959 r600_store_value(cb, value); 960} 961 962static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) 963{ 964 r600_store_ctl_const_seq(cb, reg, 1); 965 r600_store_value(cb, value); 966} 967 968static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) 969{ 970 r600_store_loop_const_seq(cb, reg, 1); 971 r600_store_value(cb, value); 972} 973 974static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) 975{ 976 eg_store_loop_const_seq(cb, reg, 1); 977 r600_store_value(cb, value); 978} 979 980void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw); 981void r600_release_command_buffer(struct r600_command_buffer *cb); 982 983static inline void radeon_compute_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) 984{ 985 radeon_set_context_reg_seq(cs, reg, num); 986 /* Set the compute bit on the packet header */ 987 cs->current.buf[cs->current.cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE; 988} 989 990static inline void radeon_set_ctl_const_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) 991{ 992 assert(reg >= R600_CTL_CONST_OFFSET); 993 assert(cs->current.cdw + 2 + num <= cs->current.max_dw); 994 radeon_emit(cs, PKT3(PKT3_SET_CTL_CONST, num, 0)); 995 radeon_emit(cs, (reg - R600_CTL_CONST_OFFSET) >> 2); 996} 997 998static inline void radeon_compute_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) 999{ 1000 radeon_compute_set_context_reg_seq(cs, reg, 1); 1001 radeon_emit(cs, value); 1002} 1003 1004static inline void radeon_set_context_reg_flag(struct radeon_cmdbuf *cs, unsigned reg, unsigned value, unsigned flag) 1005{ 1006 if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) { 1007 radeon_compute_set_context_reg(cs, reg, value); 1008 } else { 1009 radeon_set_context_reg(cs, reg, value); 1010 } 1011} 1012 1013static inline void radeon_set_ctl_const(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) 1014{ 1015 radeon_set_ctl_const_seq(cs, reg, 1); 1016 radeon_emit(cs, value); 1017} 1018 1019/* 1020 * common helpers 1021 */ 1022 1023/* 12.4 fixed-point */ 1024static inline unsigned r600_pack_float_12p4(float x) 1025{ 1026 return x <= 0 ? 0 : 1027 x >= 4096 ? 0xffff : x * 16; 1028} 1029 1030static inline unsigned r600_get_flush_flags(enum r600_coherency coher) 1031{ 1032 switch (coher) { 1033 default: 1034 case R600_COHERENCY_NONE: 1035 return 0; 1036 case R600_COHERENCY_SHADER: 1037 return R600_CONTEXT_INV_CONST_CACHE | 1038 R600_CONTEXT_INV_VERTEX_CACHE | 1039 R600_CONTEXT_INV_TEX_CACHE | 1040 R600_CONTEXT_STREAMOUT_FLUSH; 1041 case R600_COHERENCY_CB_META: 1042 return R600_CONTEXT_FLUSH_AND_INV_CB | 1043 R600_CONTEXT_FLUSH_AND_INV_CB_META; 1044 } 1045} 1046 1047#define V_028A6C_OUTPRIM_TYPE_POINTLIST 0 1048#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1 1049#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2 1050 1051unsigned r600_conv_prim_to_gs_out(unsigned mode); 1052 1053void eg_trace_emit(struct r600_context *rctx); 1054void eg_dump_debug_state(struct pipe_context *ctx, FILE *f, 1055 unsigned flags); 1056 1057struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx, 1058 const struct tgsi_token *tokens, 1059 unsigned pipe_shader_type); 1060int r600_shader_select(struct pipe_context *ctx, 1061 struct r600_pipe_shader_selector* sel, 1062 bool *dirty); 1063 1064void r600_delete_shader_selector(struct pipe_context *ctx, 1065 struct r600_pipe_shader_selector *sel); 1066 1067struct r600_shader_atomic; 1068void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, 1069 struct r600_pipe_shader *cs_shader, 1070 struct r600_shader_atomic *combined_atomics, 1071 uint8_t *atomic_used_mask_p); 1072void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, 1073 bool is_compute, 1074 struct r600_shader_atomic *combined_atomics, 1075 uint8_t atomic_used_mask); 1076void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, 1077 bool is_compute, 1078 struct r600_shader_atomic *combined_atomics, 1079 uint8_t *atomic_used_mask_p); 1080void r600_update_compressed_resource_state(struct r600_context *rctx, bool compute_only); 1081 1082void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type); 1083void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_only); 1084#endif 1085