1af69d88dSmrg/* 2af69d88dSmrg * Copyright 2012 Advanced Micro Devices, Inc. 301e04c3fSmrg * All Rights Reserved. 4af69d88dSmrg * 5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 6af69d88dSmrg * copy of this software and associated documentation files (the "Software"), 7af69d88dSmrg * to deal in the Software without restriction, including without limitation 8af69d88dSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 9af69d88dSmrg * license, and/or sell copies of the Software, and to permit persons to whom 10af69d88dSmrg * the Software is furnished to do so, subject to the following conditions: 11af69d88dSmrg * 12af69d88dSmrg * The above copyright notice and this permission notice (including the next 13af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the 14af69d88dSmrg * Software. 15af69d88dSmrg * 16af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19af69d88dSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20af69d88dSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21af69d88dSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22af69d88dSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 23af69d88dSmrg */ 24af69d88dSmrg 25af69d88dSmrg#ifndef SI_STATE_H 26af69d88dSmrg#define SI_STATE_H 27af69d88dSmrg 2801e04c3fSmrg#include "pipebuffer/pb_slab.h" 297ec681f3Smrg#include "si_pm4.h" 3001e04c3fSmrg#include "util/u_blitter.h" 3101e04c3fSmrg 327ec681f3Smrg#ifdef __cplusplus 337ec681f3Smrgextern "C" { 347ec681f3Smrg#endif 357ec681f3Smrg 367ec681f3Smrg#define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL + 1) 377ec681f3Smrg#define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE + 1) 3801e04c3fSmrg 397ec681f3Smrg#define SI_NUM_VERTEX_BUFFERS SI_MAX_ATTRIBS 407ec681f3Smrg#define SI_NUM_SAMPLERS 32 /* OpenGL textures units per shader */ 417ec681f3Smrg#define SI_NUM_CONST_BUFFERS 16 427ec681f3Smrg#define SI_NUM_IMAGES 16 437ec681f3Smrg#define SI_NUM_IMAGE_SLOTS (SI_NUM_IMAGES * 2) /* the second half are FMASK slots */ 447ec681f3Smrg#define SI_NUM_SHADER_BUFFERS 32 45af69d88dSmrg 46af69d88dSmrgstruct si_screen; 4701e04c3fSmrgstruct si_shader; 487ec681f3Smrgstruct si_shader_ctx_state; 4901e04c3fSmrgstruct si_shader_selector; 5001e04c3fSmrgstruct si_texture; 5101e04c3fSmrgstruct si_qbo_state; 52af69d88dSmrg 53af69d88dSmrgstruct si_state_blend { 547ec681f3Smrg struct si_pm4_state pm4; 557ec681f3Smrg uint32_t cb_target_mask; 567ec681f3Smrg /* Set 0xf or 0x0 (4 bits) per render target if the following is 577ec681f3Smrg * true. ANDed with spi_shader_col_format. 587ec681f3Smrg */ 597ec681f3Smrg unsigned cb_target_enabled_4bit; 607ec681f3Smrg unsigned blend_enable_4bit; 617ec681f3Smrg unsigned need_src_alpha_4bit; 627ec681f3Smrg unsigned commutative_4bit; 637ec681f3Smrg unsigned dcc_msaa_corruption_4bit; 647ec681f3Smrg bool alpha_to_coverage : 1; 657ec681f3Smrg bool alpha_to_one : 1; 667ec681f3Smrg bool dual_src_blend : 1; 677ec681f3Smrg bool logicop_enable : 1; 687ec681f3Smrg bool allows_noop_optimization : 1; 69af69d88dSmrg}; 70af69d88dSmrg 7101e04c3fSmrgstruct si_state_rasterizer { 727ec681f3Smrg struct si_pm4_state pm4; 737ec681f3Smrg /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */ 747ec681f3Smrg struct si_pm4_state *pm4_poly_offset; 757ec681f3Smrg unsigned pa_sc_line_stipple; 767ec681f3Smrg unsigned pa_cl_clip_cntl; 777ec681f3Smrg float line_width; 787ec681f3Smrg float max_point_size; 797ec681f3Smrg unsigned ngg_cull_flags : 8; 807ec681f3Smrg unsigned ngg_cull_flags_y_inverted : 8; 817ec681f3Smrg unsigned sprite_coord_enable : 8; 827ec681f3Smrg unsigned clip_plane_enable : 8; 837ec681f3Smrg unsigned half_pixel_center : 1; 847ec681f3Smrg unsigned flatshade : 1; 857ec681f3Smrg unsigned flatshade_first : 1; 867ec681f3Smrg unsigned two_side : 1; 877ec681f3Smrg unsigned multisample_enable : 1; 887ec681f3Smrg unsigned force_persample_interp : 1; 897ec681f3Smrg unsigned line_stipple_enable : 1; 907ec681f3Smrg unsigned poly_stipple_enable : 1; 917ec681f3Smrg unsigned line_smooth : 1; 927ec681f3Smrg unsigned poly_smooth : 1; 937ec681f3Smrg unsigned uses_poly_offset : 1; 947ec681f3Smrg unsigned clamp_fragment_color : 1; 957ec681f3Smrg unsigned clamp_vertex_color : 1; 967ec681f3Smrg unsigned rasterizer_discard : 1; 977ec681f3Smrg unsigned scissor_enable : 1; 987ec681f3Smrg unsigned clip_halfz : 1; 997ec681f3Smrg unsigned polygon_mode_is_lines : 1; 1007ec681f3Smrg unsigned polygon_mode_is_points : 1; 101af69d88dSmrg}; 102af69d88dSmrg 10301e04c3fSmrgstruct si_dsa_stencil_ref_part { 1047ec681f3Smrg uint8_t valuemask[2]; 1057ec681f3Smrg uint8_t writemask[2]; 106af69d88dSmrg}; 107af69d88dSmrg 10801e04c3fSmrgstruct si_dsa_order_invariance { 1097ec681f3Smrg /** Whether the final result in Z/S buffers is guaranteed to be 1107ec681f3Smrg * invariant under changes to the order in which fragments arrive. */ 1117ec681f3Smrg bool zs : 1; 1127ec681f3Smrg 1137ec681f3Smrg /** Whether the set of fragments that pass the combined Z/S test is 1147ec681f3Smrg * guaranteed to be invariant under changes to the order in which 1157ec681f3Smrg * fragments arrive. */ 1167ec681f3Smrg bool pass_set : 1; 1177ec681f3Smrg 1187ec681f3Smrg /** Whether the last fragment that passes the combined Z/S test at each 1197ec681f3Smrg * sample is guaranteed to be invariant under changes to the order in 1207ec681f3Smrg * which fragments arrive. */ 1217ec681f3Smrg bool pass_last : 1; 122af69d88dSmrg}; 123af69d88dSmrg 124af69d88dSmrgstruct si_state_dsa { 1257ec681f3Smrg struct si_pm4_state pm4; 1267ec681f3Smrg struct si_dsa_stencil_ref_part stencil_ref; 1277ec681f3Smrg 1287ec681f3Smrg /* 0 = without stencil buffer, 1 = when both Z and S buffers are present */ 1297ec681f3Smrg struct si_dsa_order_invariance order_invariance[2]; 1307ec681f3Smrg 1317ec681f3Smrg ubyte alpha_func : 3; 1327ec681f3Smrg bool depth_enabled : 1; 1337ec681f3Smrg bool depth_write_enabled : 1; 1347ec681f3Smrg bool stencil_enabled : 1; 1357ec681f3Smrg bool stencil_write_enabled : 1; 1367ec681f3Smrg bool db_can_write : 1; 13701e04c3fSmrg}; 13801e04c3fSmrg 13901e04c3fSmrgstruct si_stencil_ref { 1407ec681f3Smrg struct pipe_stencil_ref state; 1417ec681f3Smrg struct si_dsa_stencil_ref_part dsa_part; 142af69d88dSmrg}; 143af69d88dSmrg 1447ec681f3Smrgstruct si_vertex_elements { 1457ec681f3Smrg struct si_resource *instance_divisor_factor_buffer; 1467ec681f3Smrg uint32_t rsrc_word3[SI_MAX_ATTRIBS]; 1477ec681f3Smrg uint16_t src_offset[SI_MAX_ATTRIBS]; 1487ec681f3Smrg uint8_t fix_fetch[SI_MAX_ATTRIBS]; 1497ec681f3Smrg uint8_t format_size[SI_MAX_ATTRIBS]; 1507ec681f3Smrg uint8_t vertex_buffer_index[SI_MAX_ATTRIBS]; 1517ec681f3Smrg 1527ec681f3Smrg /* Bitmask of elements that always need a fixup to be applied. */ 1537ec681f3Smrg uint16_t fix_fetch_always; 1547ec681f3Smrg 1557ec681f3Smrg /* Bitmask of elements whose fetch should always be opencoded. */ 1567ec681f3Smrg uint16_t fix_fetch_opencode; 1577ec681f3Smrg 1587ec681f3Smrg /* Bitmask of elements which need to be opencoded if the vertex buffer 1597ec681f3Smrg * is unaligned. */ 1607ec681f3Smrg uint16_t fix_fetch_unaligned; 1617ec681f3Smrg 1627ec681f3Smrg /* For elements in fix_fetch_unaligned: whether the effective 1637ec681f3Smrg * element load size as seen by the hardware is a dword (as opposed 1647ec681f3Smrg * to a short). 1657ec681f3Smrg */ 1667ec681f3Smrg uint16_t hw_load_is_dword; 1677ec681f3Smrg 1687ec681f3Smrg /* Bitmask of vertex buffers requiring alignment check */ 1697ec681f3Smrg uint16_t vb_alignment_check_mask; 1707ec681f3Smrg 1717ec681f3Smrg uint8_t count; 1727ec681f3Smrg 1737ec681f3Smrg uint16_t first_vb_use_mask; 1747ec681f3Smrg /* Vertex buffer descriptor list size aligned for optimal prefetch. */ 1757ec681f3Smrg uint16_t vb_desc_list_alloc_size; 1767ec681f3Smrg uint16_t instance_divisor_is_one; /* bitmask of inputs */ 1777ec681f3Smrg uint16_t instance_divisor_is_fetched; /* bitmask of inputs */ 178af69d88dSmrg}; 179af69d88dSmrg 180af69d88dSmrgunion si_state { 1817ec681f3Smrg struct si_state_named { 1827ec681f3Smrg struct si_state_blend *blend; 1837ec681f3Smrg struct si_state_rasterizer *rasterizer; 1847ec681f3Smrg struct si_state_dsa *dsa; 1857ec681f3Smrg struct si_pm4_state *poly_offset; 1867ec681f3Smrg struct si_shader *ls; 1877ec681f3Smrg struct si_shader *hs; 1887ec681f3Smrg struct si_shader *es; 1897ec681f3Smrg struct si_shader *gs; 1907ec681f3Smrg struct si_pm4_state *vgt_shader_config; 1917ec681f3Smrg struct si_shader *vs; 1927ec681f3Smrg struct si_shader *ps; 1937ec681f3Smrg } named; 1947ec681f3Smrg struct si_pm4_state *array[sizeof(struct si_state_named) / sizeof(struct si_pm4_state *)]; 195af69d88dSmrg}; 196af69d88dSmrg 1977ec681f3Smrg#define SI_STATE_IDX(name) (offsetof(union si_state, named.name) / sizeof(struct si_pm4_state *)) 19801e04c3fSmrg#define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name)) 1997ec681f3Smrg#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) 200af69d88dSmrg 20101e04c3fSmrgstatic inline unsigned si_states_that_always_roll_context(void) 20201e04c3fSmrg{ 2037ec681f3Smrg return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) | 2047ec681f3Smrg SI_STATE_BIT(poly_offset) | SI_STATE_BIT(vgt_shader_config)); 20501e04c3fSmrg} 20601e04c3fSmrg 20701e04c3fSmrgunion si_state_atoms { 2087ec681f3Smrg struct si_atoms_s { 2097ec681f3Smrg /* The order matters. */ 2107ec681f3Smrg struct si_atom render_cond; 2117ec681f3Smrg struct si_atom streamout_begin; 2127ec681f3Smrg struct si_atom streamout_enable; /* must be after streamout_begin */ 2137ec681f3Smrg struct si_atom framebuffer; 2147ec681f3Smrg struct si_atom msaa_sample_locs; 2157ec681f3Smrg struct si_atom db_render_state; 2167ec681f3Smrg struct si_atom dpbb_state; 2177ec681f3Smrg struct si_atom msaa_config; 2187ec681f3Smrg struct si_atom sample_mask; 2197ec681f3Smrg struct si_atom cb_render_state; 2207ec681f3Smrg struct si_atom blend_color; 2217ec681f3Smrg struct si_atom clip_regs; 2227ec681f3Smrg struct si_atom clip_state; 2237ec681f3Smrg struct si_atom shader_pointers; 2247ec681f3Smrg struct si_atom guardband; 2257ec681f3Smrg struct si_atom scissors; 2267ec681f3Smrg struct si_atom viewports; 2277ec681f3Smrg struct si_atom stencil_ref; 2287ec681f3Smrg struct si_atom spi_map; 2297ec681f3Smrg struct si_atom scratch_state; 2307ec681f3Smrg struct si_atom window_rectangles; 2317ec681f3Smrg struct si_atom shader_query; 2327ec681f3Smrg struct si_atom ngg_cull_state; 2337ec681f3Smrg } s; 2347ec681f3Smrg struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)]; 23501e04c3fSmrg}; 236af69d88dSmrg 2377ec681f3Smrg#define SI_ATOM_BIT(name) (1 << (offsetof(union si_state_atoms, s.name) / sizeof(struct si_atom))) 2387ec681f3Smrg#define SI_NUM_ATOMS (sizeof(union si_state_atoms) / sizeof(struct si_atom)) 239af69d88dSmrg 24001e04c3fSmrgstatic inline unsigned si_atoms_that_always_roll_context(void) 24101e04c3fSmrg{ 2427ec681f3Smrg return (SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) | 2437ec681f3Smrg SI_ATOM_BIT(msaa_sample_locs) | SI_ATOM_BIT(sample_mask) | SI_ATOM_BIT(blend_color) | 2447ec681f3Smrg SI_ATOM_BIT(clip_state) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports) | 2457ec681f3Smrg SI_ATOM_BIT(stencil_ref) | SI_ATOM_BIT(scratch_state) | SI_ATOM_BIT(window_rectangles)); 24601e04c3fSmrg} 24701e04c3fSmrg 24801e04c3fSmrgstruct si_shader_data { 2497ec681f3Smrg uint32_t sh_base[SI_NUM_SHADERS]; 25001e04c3fSmrg}; 251af69d88dSmrg 25201e04c3fSmrg/* The list of registers whose emitted values are remembered by si_context. */ 2537ec681f3Smrgenum si_tracked_reg 2547ec681f3Smrg{ 2557ec681f3Smrg SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */ 2567ec681f3Smrg SI_TRACKED_DB_COUNT_CONTROL, 2577ec681f3Smrg 2587ec681f3Smrg SI_TRACKED_DB_RENDER_OVERRIDE2, 2597ec681f3Smrg SI_TRACKED_DB_SHADER_CONTROL, 2607ec681f3Smrg 2617ec681f3Smrg SI_TRACKED_CB_TARGET_MASK, 2627ec681f3Smrg SI_TRACKED_CB_DCC_CONTROL, 2637ec681f3Smrg 2647ec681f3Smrg SI_TRACKED_SX_PS_DOWNCONVERT, /* 3 consecutive registers */ 2657ec681f3Smrg SI_TRACKED_SX_BLEND_OPT_EPSILON, 2667ec681f3Smrg SI_TRACKED_SX_BLEND_OPT_CONTROL, 2677ec681f3Smrg 2687ec681f3Smrg SI_TRACKED_PA_SC_LINE_CNTL, /* 2 consecutive registers */ 2697ec681f3Smrg SI_TRACKED_PA_SC_AA_CONFIG, 270af69d88dSmrg 2717ec681f3Smrg SI_TRACKED_DB_EQAA, 2727ec681f3Smrg SI_TRACKED_PA_SC_MODE_CNTL_1, 273af69d88dSmrg 2747ec681f3Smrg SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 2757ec681f3Smrg SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, 276af69d88dSmrg 2777ec681f3Smrg SI_TRACKED_PA_CL_VS_OUT_CNTL, 2787ec681f3Smrg SI_TRACKED_PA_CL_CLIP_CNTL, 279af69d88dSmrg 2807ec681f3Smrg SI_TRACKED_PA_SC_BINNER_CNTL_0, 281af69d88dSmrg 2827ec681f3Smrg SI_TRACKED_DB_VRS_OVERRIDE_CNTL, 28301e04c3fSmrg 2847ec681f3Smrg SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */ 2857ec681f3Smrg SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ, 2867ec681f3Smrg SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ, 2877ec681f3Smrg SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ, 28801e04c3fSmrg 2897ec681f3Smrg SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, 2907ec681f3Smrg SI_TRACKED_PA_SU_VTX_CNTL, 29101e04c3fSmrg 2927ec681f3Smrg SI_TRACKED_PA_SC_CLIPRECT_RULE, 29301e04c3fSmrg 2947ec681f3Smrg SI_TRACKED_PA_SC_LINE_STIPPLE, 29501e04c3fSmrg 2967ec681f3Smrg SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, 29701e04c3fSmrg 2987ec681f3Smrg SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */ 2997ec681f3Smrg SI_TRACKED_VGT_GSVS_RING_OFFSET_2, 3007ec681f3Smrg SI_TRACKED_VGT_GSVS_RING_OFFSET_3, 30101e04c3fSmrg 3027ec681f3Smrg SI_TRACKED_VGT_GSVS_RING_ITEMSIZE, 3037ec681f3Smrg SI_TRACKED_VGT_GS_MAX_VERT_OUT, 30401e04c3fSmrg 3057ec681f3Smrg SI_TRACKED_VGT_GS_VERT_ITEMSIZE, /* 4 consecutive registers */ 3067ec681f3Smrg SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1, 3077ec681f3Smrg SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2, 3087ec681f3Smrg SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3, 30901e04c3fSmrg 3107ec681f3Smrg SI_TRACKED_VGT_GS_INSTANCE_CNT, 3117ec681f3Smrg SI_TRACKED_VGT_GS_ONCHIP_CNTL, 3127ec681f3Smrg SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, 3137ec681f3Smrg SI_TRACKED_VGT_GS_MODE, 3147ec681f3Smrg SI_TRACKED_VGT_PRIMITIVEID_EN, 3157ec681f3Smrg SI_TRACKED_VGT_REUSE_OFF, 3167ec681f3Smrg SI_TRACKED_SPI_VS_OUT_CONFIG, 3177ec681f3Smrg SI_TRACKED_PA_CL_VTE_CNTL, 3187ec681f3Smrg SI_TRACKED_PA_CL_NGG_CNTL, 3197ec681f3Smrg SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, 3207ec681f3Smrg SI_TRACKED_GE_NGG_SUBGRP_CNTL, 32101e04c3fSmrg 3227ec681f3Smrg SI_TRACKED_SPI_SHADER_IDX_FORMAT, /* 2 consecutive registers */ 3237ec681f3Smrg SI_TRACKED_SPI_SHADER_POS_FORMAT, 32401e04c3fSmrg 3257ec681f3Smrg SI_TRACKED_SPI_PS_INPUT_ENA, /* 2 consecutive registers */ 3267ec681f3Smrg SI_TRACKED_SPI_PS_INPUT_ADDR, 32701e04c3fSmrg 3287ec681f3Smrg SI_TRACKED_SPI_BARYC_CNTL, 3297ec681f3Smrg SI_TRACKED_SPI_PS_IN_CONTROL, 33001e04c3fSmrg 3317ec681f3Smrg SI_TRACKED_SPI_SHADER_Z_FORMAT, /* 2 consecutive registers */ 3327ec681f3Smrg SI_TRACKED_SPI_SHADER_COL_FORMAT, 33301e04c3fSmrg 3347ec681f3Smrg SI_TRACKED_CB_SHADER_MASK, 3357ec681f3Smrg SI_TRACKED_VGT_TF_PARAM, 3367ec681f3Smrg SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, 33701e04c3fSmrg 3387ec681f3Smrg /* Non-context registers: */ 3397ec681f3Smrg SI_TRACKED_GE_PC_ALLOC, 3407ec681f3Smrg SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, 3417ec681f3Smrg SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, 34201e04c3fSmrg 3437ec681f3Smrg SI_NUM_TRACKED_REGS, 344af69d88dSmrg}; 345af69d88dSmrg 34601e04c3fSmrgstruct si_tracked_regs { 3477ec681f3Smrg uint64_t reg_saved; 3487ec681f3Smrg uint32_t reg_value[SI_NUM_TRACKED_REGS]; 3497ec681f3Smrg uint32_t spi_ps_input_cntl[32]; 350af69d88dSmrg}; 351af69d88dSmrg 35201e04c3fSmrg/* Private read-write buffer slots. */ 3537ec681f3Smrgenum 3547ec681f3Smrg{ 3557ec681f3Smrg SI_ES_RING_ESGS, 3567ec681f3Smrg SI_GS_RING_ESGS, 3577ec681f3Smrg 3587ec681f3Smrg SI_RING_GSVS, 3597ec681f3Smrg 3607ec681f3Smrg SI_VS_STREAMOUT_BUF0, 3617ec681f3Smrg SI_VS_STREAMOUT_BUF1, 3627ec681f3Smrg SI_VS_STREAMOUT_BUF2, 3637ec681f3Smrg SI_VS_STREAMOUT_BUF3, 3647ec681f3Smrg 3657ec681f3Smrg SI_HS_CONST_DEFAULT_TESS_LEVELS, 3667ec681f3Smrg SI_VS_CONST_INSTANCE_DIVISORS, 3677ec681f3Smrg SI_VS_CONST_CLIP_PLANES, 3687ec681f3Smrg SI_PS_CONST_POLY_STIPPLE, 3697ec681f3Smrg SI_PS_CONST_SAMPLE_POSITIONS, 3707ec681f3Smrg 3717ec681f3Smrg /* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */ 3727ec681f3Smrg SI_PS_IMAGE_COLORBUF0, 3737ec681f3Smrg SI_PS_IMAGE_COLORBUF0_HI, 3747ec681f3Smrg SI_PS_IMAGE_COLORBUF0_FMASK, 3757ec681f3Smrg SI_PS_IMAGE_COLORBUF0_FMASK_HI, 3767ec681f3Smrg 3777ec681f3Smrg GFX10_GS_QUERY_BUF, 3787ec681f3Smrg 3797ec681f3Smrg SI_NUM_INTERNAL_BINDINGS, 38001e04c3fSmrg}; 38101e04c3fSmrg 38201e04c3fSmrg/* Indices into sctx->descriptors, laid out so that gfx and compute pipelines 38301e04c3fSmrg * are contiguous: 38401e04c3fSmrg * 38501e04c3fSmrg * 0 - rw buffers 38601e04c3fSmrg * 1 - vertex const and shader buffers 38701e04c3fSmrg * 2 - vertex samplers and images 38801e04c3fSmrg * 3 - fragment const and shader buffer 38901e04c3fSmrg * ... 39001e04c3fSmrg * 11 - compute const and shader buffers 39101e04c3fSmrg * 12 - compute samplers and images 39201e04c3fSmrg */ 3937ec681f3Smrgenum 3947ec681f3Smrg{ 3957ec681f3Smrg SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS, 3967ec681f3Smrg SI_SHADER_DESCS_SAMPLERS_AND_IMAGES, 3977ec681f3Smrg SI_NUM_SHADER_DESCS, 39801e04c3fSmrg}; 39901e04c3fSmrg 4007ec681f3Smrg#define SI_DESCS_INTERNAL 0 4017ec681f3Smrg#define SI_DESCS_FIRST_SHADER 1 4027ec681f3Smrg#define SI_DESCS_FIRST_COMPUTE (SI_DESCS_FIRST_SHADER + PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS) 4037ec681f3Smrg#define SI_NUM_DESCS (SI_DESCS_FIRST_SHADER + SI_NUM_SHADERS * SI_NUM_SHADER_DESCS) 4047ec681f3Smrg 4057ec681f3Smrg#define SI_DESCS_SHADER_MASK(name) \ 4067ec681f3Smrg u_bit_consecutive(SI_DESCS_FIRST_SHADER + PIPE_SHADER_##name * SI_NUM_SHADER_DESCS, \ 4077ec681f3Smrg SI_NUM_SHADER_DESCS) 40801e04c3fSmrg 4097ec681f3Smrgstatic inline unsigned si_const_and_shader_buffer_descriptors_idx(unsigned shader) 4107ec681f3Smrg{ 4117ec681f3Smrg return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 4127ec681f3Smrg SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS; 4137ec681f3Smrg} 4147ec681f3Smrg 4157ec681f3Smrgstatic inline unsigned si_sampler_and_image_descriptors_idx(unsigned shader) 4167ec681f3Smrg{ 4177ec681f3Smrg return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + 4187ec681f3Smrg SI_SHADER_DESCS_SAMPLERS_AND_IMAGES; 4197ec681f3Smrg} 42001e04c3fSmrg 42101e04c3fSmrg/* This represents descriptors in memory, such as buffer resources, 42201e04c3fSmrg * image resources, and sampler states. 42301e04c3fSmrg */ 42401e04c3fSmrgstruct si_descriptors { 4257ec681f3Smrg /* The list of descriptors in malloc'd memory. */ 4267ec681f3Smrg uint32_t *list; 4277ec681f3Smrg /* The list in mapped GPU memory. */ 4287ec681f3Smrg uint32_t *gpu_list; 4297ec681f3Smrg 4307ec681f3Smrg /* The buffer where the descriptors have been uploaded. */ 4317ec681f3Smrg struct si_resource *buffer; 4327ec681f3Smrg uint64_t gpu_address; 4337ec681f3Smrg 4347ec681f3Smrg /* The maximum number of descriptors. */ 4357ec681f3Smrg uint32_t num_elements; 4367ec681f3Smrg 4377ec681f3Smrg /* Slots that are used by currently-bound shaders. 4387ec681f3Smrg * It determines which slots are uploaded. 4397ec681f3Smrg */ 4407ec681f3Smrg uint32_t first_active_slot; 4417ec681f3Smrg uint32_t num_active_slots; 4427ec681f3Smrg 4437ec681f3Smrg /* The SH register offset relative to USER_DATA*_0 where the pointer 4447ec681f3Smrg * to the descriptor array will be stored. */ 4457ec681f3Smrg short shader_userdata_offset; 4467ec681f3Smrg /* The size of one descriptor. */ 4477ec681f3Smrg ubyte element_dw_size; 4487ec681f3Smrg /* If there is only one slot enabled, bind it directly instead of 4497ec681f3Smrg * uploading descriptors. -1 if disabled. */ 4507ec681f3Smrg signed char slot_index_to_bind_directly; 451af69d88dSmrg}; 452af69d88dSmrg 453af69d88dSmrgstruct si_buffer_resources { 4547ec681f3Smrg struct pipe_resource **buffers; /* this has num_buffers elements */ 4557ec681f3Smrg unsigned *offsets; /* this has num_buffers elements */ 456af69d88dSmrg 4577ec681f3Smrg enum radeon_bo_priority priority : 6; 4587ec681f3Smrg enum radeon_bo_priority priority_constbuf : 6; 45901e04c3fSmrg 4607ec681f3Smrg /* The i-th bit is set if that element is enabled (non-NULL resource). */ 4617ec681f3Smrg uint64_t enabled_mask; 4627ec681f3Smrg uint64_t writable_mask; 46301e04c3fSmrg}; 464af69d88dSmrg 4657ec681f3Smrg#define si_pm4_state_changed(sctx, member) \ 4667ec681f3Smrg ((sctx)->queued.named.member != (sctx)->emitted.named.member) 467af69d88dSmrg 4687ec681f3Smrg#define si_pm4_state_enabled_and_changed(sctx, member) \ 4697ec681f3Smrg ((sctx)->queued.named.member && si_pm4_state_changed(sctx, member)) 47001e04c3fSmrg 4717ec681f3Smrg#define si_pm4_bind_state(sctx, member, value) \ 4727ec681f3Smrg do { \ 4737ec681f3Smrg (sctx)->queued.named.member = (value); \ 4747ec681f3Smrg if (value && value != (sctx)->emitted.named.member) \ 4757ec681f3Smrg (sctx)->dirty_states |= SI_STATE_BIT(member); \ 4767ec681f3Smrg else \ 4777ec681f3Smrg (sctx)->dirty_states &= ~SI_STATE_BIT(member); \ 4787ec681f3Smrg } while (0) 479af69d88dSmrg 480af69d88dSmrg/* si_descriptors.c */ 4817ec681f3Smrgvoid si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex, 4827ec681f3Smrg const struct legacy_surf_level *base_level_info, 4837ec681f3Smrg unsigned base_level, unsigned first_level, unsigned block_width, 4847ec681f3Smrg /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 4857ec681f3Smrg bool is_stencil, uint16_t access, uint32_t * restrict state); 48601e04c3fSmrgvoid si_update_ps_colorbuf0_slot(struct si_context *sctx); 4877ec681f3Smrgvoid si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader); 4887ec681f3Smrgvoid si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, 4897ec681f3Smrg struct pipe_constant_buffer *cbuf); 4907ec681f3Smrgvoid si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot, 4917ec681f3Smrg uint count, struct pipe_shader_buffer *sbuf); 4927ec681f3Smrgvoid si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer, 4937ec681f3Smrg unsigned stride, unsigned num_records, bool add_tid, bool swizzle, 4947ec681f3Smrg unsigned element_size, unsigned index_stride, uint64_t offset); 495af69d88dSmrgvoid si_init_all_descriptors(struct si_context *sctx); 49601e04c3fSmrgbool si_upload_graphics_shader_descriptors(struct si_context *sctx); 49701e04c3fSmrgbool si_upload_compute_shader_descriptors(struct si_context *sctx); 498af69d88dSmrgvoid si_release_all_descriptors(struct si_context *sctx); 4999f464c52Smayavoid si_gfx_resources_add_all_to_bo_list(struct si_context *sctx); 5009f464c52Smayavoid si_compute_resources_add_all_to_bo_list(struct si_context *sctx); 5017ec681f3Smrgbool si_gfx_resources_check_encrypted(struct si_context *sctx); 5027ec681f3Smrgbool si_compute_resources_check_encrypted(struct si_context *sctx); 5037ec681f3Smrgvoid si_shader_pointers_mark_dirty(struct si_context *sctx); 5047ec681f3Smrgvoid si_add_all_descriptors_to_bo_list(struct si_context *sctx); 50501e04c3fSmrgvoid si_update_all_texture_descriptors(struct si_context *sctx); 50601e04c3fSmrgvoid si_shader_change_notify(struct si_context *sctx); 50701e04c3fSmrgvoid si_update_needs_color_decompress_masks(struct si_context *sctx); 50801e04c3fSmrgvoid si_emit_graphics_shader_pointers(struct si_context *sctx); 50901e04c3fSmrgvoid si_emit_compute_shader_pointers(struct si_context *sctx); 5107ec681f3Smrgvoid si_set_internal_const_buffer(struct si_context *sctx, uint slot, 5117ec681f3Smrg const struct pipe_constant_buffer *input); 5127ec681f3Smrgvoid si_set_internal_shader_buffer(struct si_context *sctx, uint slot, 5137ec681f3Smrg const struct pipe_shader_buffer *sbuffer); 51401e04c3fSmrgvoid si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, 5157ec681f3Smrg uint64_t new_active_mask); 5167ec681f3Smrgvoid si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel); 5177ec681f3Smrgbool si_bindless_descriptor_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); 5187ec681f3Smrgstruct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap, unsigned entry_size, 5197ec681f3Smrg unsigned group_index); 52001e04c3fSmrgvoid si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab); 5219f464c52Smayavoid si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf); 522af69d88dSmrg/* si_state.c */ 5237ec681f3Smrguint32_t si_translate_colorformat(enum chip_class chip_class, enum pipe_format format); 5249f464c52Smayavoid si_init_state_compute_functions(struct si_context *sctx); 525af69d88dSmrgvoid si_init_state_functions(struct si_context *sctx); 52601e04c3fSmrgvoid si_init_screen_state_functions(struct si_screen *sscreen); 5277ec681f3Smrgvoid si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing); 5287ec681f3Smrgvoid si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 5297ec681f3Smrg enum pipe_format format, unsigned offset, unsigned size, 5307ec681f3Smrg uint32_t *state); 5317ec681f3Smrgstruct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx, 5327ec681f3Smrg struct pipe_resource *texture, 5337ec681f3Smrg const struct pipe_sampler_view *state, 5347ec681f3Smrg unsigned width0, unsigned height0, 5357ec681f3Smrg unsigned force_level); 53601e04c3fSmrgvoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx); 5377ec681f3Smrgvoid si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex); 53801e04c3fSmrgvoid si_update_ps_iter_samples(struct si_context *sctx); 53901e04c3fSmrgvoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st); 5407ec681f3Smrgvoid si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st); 5417ec681f3Smrgvoid si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable); 5427ec681f3Smrg 5437ec681f3Smrgstruct si_fast_udiv_info32 { 5447ec681f3Smrg unsigned multiplier; /* the "magic number" multiplier */ 5457ec681f3Smrg unsigned pre_shift; /* shift for the dividend before multiplying */ 5467ec681f3Smrg unsigned post_shift; /* shift for the dividend after multiplying */ 5477ec681f3Smrg int increment; /* 0 or 1; if set then increment the numerator, using one of 5487ec681f3Smrg the two strategies */ 5497ec681f3Smrg}; 5507ec681f3Smrg 5517ec681f3Smrgstruct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits); 55201e04c3fSmrg 55301e04c3fSmrg/* si_state_binning.c */ 55401e04c3fSmrgvoid si_emit_dpbb_state(struct si_context *sctx); 55501e04c3fSmrg 55601e04c3fSmrg/* si_state_shaders.c */ 5577ec681f3Smrgvoid si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, 5587ec681f3Smrg unsigned char ir_sha1_cache_key[20]); 5597ec681f3Smrgbool si_shader_cache_load_shader(struct si_screen *sscreen, unsigned char ir_sha1_cache_key[20], 5607ec681f3Smrg struct si_shader *shader); 5617ec681f3Smrgvoid si_shader_cache_insert_shader(struct si_screen *sscreen, unsigned char ir_sha1_cache_key[20], 5627ec681f3Smrg struct si_shader *shader, bool insert_into_disk_cache); 5637ec681f3Smrgbool si_shader_mem_ordered(struct si_shader *shader); 5647ec681f3Smrgvoid si_init_screen_live_shader_cache(struct si_screen *sscreen); 56501e04c3fSmrgvoid si_init_shader_functions(struct si_context *sctx); 56601e04c3fSmrgbool si_init_shader_cache(struct si_screen *sscreen); 56701e04c3fSmrgvoid si_destroy_shader_cache(struct si_screen *sscreen); 5687ec681f3Smrgvoid si_schedule_initial_compile(struct si_context *sctx, gl_shader_stage stage, 5697ec681f3Smrg struct util_queue_fence *ready_fence, 5707ec681f3Smrg struct si_compiler_ctx_state *compiler_ctx_state, void *job, 5717ec681f3Smrg util_queue_execute_func execute); 5727ec681f3Smrgvoid si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers, 5737ec681f3Smrg uint64_t *samplers_and_images); 5747ec681f3Smrgint si_shader_select_with_key(struct si_context *sctx, struct si_shader_ctx_state *state, 5757ec681f3Smrg const struct si_shader_key *key, int thread_index, 5767ec681f3Smrg bool optimized_or_none); 5777ec681f3Smrgint si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state); 5787ec681f3Smrgvoid si_vs_key_update_inputs(struct si_context *sctx); 5797ec681f3Smrgvoid si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key, 5807ec681f3Smrg struct si_vs_prolog_bits *prolog_key); 5817ec681f3Smrgvoid si_update_ps_inputs_read_or_disabled(struct si_context *sctx); 5827ec681f3Smrgvoid si_update_ps_kill_enable(struct si_context *sctx); 5837ec681f3Smrgvoid si_update_vrs_flat_shading(struct si_context *sctx); 5847ec681f3Smrgunsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key); 5857ec681f3Smrgbool si_update_ngg(struct si_context *sctx); 5867ec681f3Smrgvoid si_ps_key_update_framebuffer(struct si_context *sctx); 5877ec681f3Smrgvoid si_ps_key_update_framebuffer_blend(struct si_context *sctx); 5887ec681f3Smrgvoid si_ps_key_update_blend_rasterizer(struct si_context *sctx); 5897ec681f3Smrgvoid si_ps_key_update_rasterizer(struct si_context *sctx); 5907ec681f3Smrgvoid si_ps_key_update_dsa(struct si_context *sctx); 5917ec681f3Smrgvoid si_ps_key_update_sample_shading(struct si_context *sctx); 5927ec681f3Smrgvoid si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *sctx); 5937ec681f3Smrgvoid si_init_tess_factor_ring(struct si_context *sctx); 5947ec681f3Smrgbool si_update_gs_ring_buffers(struct si_context *sctx); 5957ec681f3Smrgbool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes); 5967ec681f3Smrg 5977ec681f3Smrg/* si_state_draw.cpp */ 5987ec681f3Smrgvoid si_set_vertex_buffer_descriptor(struct si_screen *sscreen, struct si_vertex_elements *velems, 5997ec681f3Smrg struct pipe_vertex_buffer *vb, unsigned element_index, 6007ec681f3Smrg uint32_t *out); 6017ec681f3Smrgvoid si_init_draw_functions_GFX6(struct si_context *sctx); 6027ec681f3Smrgvoid si_init_draw_functions_GFX7(struct si_context *sctx); 6037ec681f3Smrgvoid si_init_draw_functions_GFX8(struct si_context *sctx); 6047ec681f3Smrgvoid si_init_draw_functions_GFX9(struct si_context *sctx); 6057ec681f3Smrgvoid si_init_draw_functions_GFX10(struct si_context *sctx); 6067ec681f3Smrgvoid si_init_draw_functions_GFX10_3(struct si_context *sctx); 6077ec681f3Smrgvoid si_init_spi_map_functions(struct si_context *sctx); 60801e04c3fSmrg 60901e04c3fSmrg/* si_state_msaa.c */ 61001e04c3fSmrgvoid si_init_msaa_functions(struct si_context *sctx); 61101e04c3fSmrgvoid si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); 61201e04c3fSmrg 61301e04c3fSmrg/* si_state_streamout.c */ 61401e04c3fSmrgvoid si_streamout_buffers_dirty(struct si_context *sctx); 61501e04c3fSmrgvoid si_emit_streamout_end(struct si_context *sctx); 6167ec681f3Smrgvoid si_update_prims_generated_query_state(struct si_context *sctx, unsigned type, int diff); 61701e04c3fSmrgvoid si_init_streamout_functions(struct si_context *sctx); 61801e04c3fSmrg 61901e04c3fSmrgstatic inline unsigned si_get_constbuf_slot(unsigned slot) 62001e04c3fSmrg{ 6217ec681f3Smrg /* Constant buffers are in slots [32..47], ascending */ 6227ec681f3Smrg return SI_NUM_SHADER_BUFFERS + slot; 62301e04c3fSmrg} 624af69d88dSmrg 62501e04c3fSmrgstatic inline unsigned si_get_shaderbuf_slot(unsigned slot) 62601e04c3fSmrg{ 6277ec681f3Smrg /* shader buffers are in slots [31..0], descending */ 6287ec681f3Smrg return SI_NUM_SHADER_BUFFERS - 1 - slot; 62901e04c3fSmrg} 63001e04c3fSmrg 63101e04c3fSmrgstatic inline unsigned si_get_sampler_slot(unsigned slot) 63201e04c3fSmrg{ 6337ec681f3Smrg /* 32 samplers are in sampler slots [16..47], 16 dw per slot, ascending */ 6347ec681f3Smrg /* those are equivalent to image slots [32..95], 8 dw per slot, ascending */ 6357ec681f3Smrg return SI_NUM_IMAGE_SLOTS / 2 + slot; 63601e04c3fSmrg} 63701e04c3fSmrg 63801e04c3fSmrgstatic inline unsigned si_get_image_slot(unsigned slot) 63901e04c3fSmrg{ 6407ec681f3Smrg /* image slots are in [31..0] (sampler slots [15..0]), descending */ 6417ec681f3Smrg /* images are in slots [31..16], while FMASKs are in slots [15..0] */ 6427ec681f3Smrg return SI_NUM_IMAGE_SLOTS - 1 - slot; 64301e04c3fSmrg} 644af69d88dSmrg 6457ec681f3Smrg#ifdef __cplusplus 6467ec681f3Smrg} 6477ec681f3Smrg#endif 6487ec681f3Smrg 649af69d88dSmrg#endif 650