13464ebd5Sriastradh/* 23464ebd5Sriastradh * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 33464ebd5Sriastradh * 43464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 53464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 63464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 73464ebd5Sriastradh * on the rights to use, copy, modify, merge, publish, distribute, sub 83464ebd5Sriastradh * license, and/or sell copies of the Software, and to permit persons to whom 93464ebd5Sriastradh * the Software is furnished to do so, subject to the following conditions: 103464ebd5Sriastradh * 113464ebd5Sriastradh * The above copyright notice and this permission notice (including the next 123464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the 133464ebd5Sriastradh * Software. 143464ebd5Sriastradh * 153464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 163464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 173464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 183464ebd5Sriastradh * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 193464ebd5Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 203464ebd5Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 213464ebd5Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE. 223464ebd5Sriastradh */ 23af69d88dSmrg#include "r600_formats.h" 243464ebd5Sriastradh#include "r600_shader.h" 2501e04c3fSmrg#include "r600_query.h" 26af69d88dSmrg#include "evergreend.h" 273464ebd5Sriastradh 28af69d88dSmrg#include "pipe/p_shader_tokens.h" 29af69d88dSmrg#include "util/u_pack_color.h" 30af69d88dSmrg#include "util/u_memory.h" 31af69d88dSmrg#include "util/u_framebuffer.h" 32af69d88dSmrg#include "util/u_dual_blend.h" 33af69d88dSmrg#include "evergreen_compute.h" 34af69d88dSmrg#include "util/u_math.h" 35af69d88dSmrg 3601e04c3fSmrgstatic inline unsigned evergreen_array_mode(unsigned mode) 373464ebd5Sriastradh{ 38af69d88dSmrg switch (mode) { 3901e04c3fSmrg default: 40af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED; 41af69d88dSmrg break; 42af69d88dSmrg case RADEON_SURF_MODE_1D: return V_028C70_ARRAY_1D_TILED_THIN1; 43af69d88dSmrg break; 44af69d88dSmrg case RADEON_SURF_MODE_2D: return V_028C70_ARRAY_2D_TILED_THIN1; 45af69d88dSmrg } 46af69d88dSmrg} 473464ebd5Sriastradh 48af69d88dSmrgstatic uint32_t eg_num_banks(uint32_t nbanks) 49af69d88dSmrg{ 50af69d88dSmrg switch (nbanks) { 51af69d88dSmrg case 2: 52af69d88dSmrg return 0; 53af69d88dSmrg case 4: 54af69d88dSmrg return 1; 55af69d88dSmrg case 8: 56af69d88dSmrg default: 57af69d88dSmrg return 2; 58af69d88dSmrg case 16: 59af69d88dSmrg return 3; 60af69d88dSmrg } 61af69d88dSmrg} 623464ebd5Sriastradh 633464ebd5Sriastradh 64af69d88dSmrgstatic unsigned eg_tile_split(unsigned tile_split) 65af69d88dSmrg{ 66af69d88dSmrg switch (tile_split) { 67af69d88dSmrg case 64: tile_split = 0; break; 68af69d88dSmrg case 128: tile_split = 1; break; 69af69d88dSmrg case 256: tile_split = 2; break; 70af69d88dSmrg case 512: tile_split = 3; break; 71af69d88dSmrg default: 72af69d88dSmrg case 1024: tile_split = 4; break; 73af69d88dSmrg case 2048: tile_split = 5; break; 74af69d88dSmrg case 4096: tile_split = 6; break; 75af69d88dSmrg } 76af69d88dSmrg return tile_split; 773464ebd5Sriastradh} 783464ebd5Sriastradh 79af69d88dSmrgstatic unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect) 803464ebd5Sriastradh{ 81af69d88dSmrg switch (macro_tile_aspect) { 82af69d88dSmrg default: 83af69d88dSmrg case 1: macro_tile_aspect = 0; break; 84af69d88dSmrg case 2: macro_tile_aspect = 1; break; 85af69d88dSmrg case 4: macro_tile_aspect = 2; break; 86af69d88dSmrg case 8: macro_tile_aspect = 3; break; 87af69d88dSmrg } 88af69d88dSmrg return macro_tile_aspect; 89af69d88dSmrg} 903464ebd5Sriastradh 91af69d88dSmrgstatic unsigned eg_bank_wh(unsigned bankwh) 92af69d88dSmrg{ 93af69d88dSmrg switch (bankwh) { 94af69d88dSmrg default: 95af69d88dSmrg case 1: bankwh = 0; break; 96af69d88dSmrg case 2: bankwh = 1; break; 97af69d88dSmrg case 4: bankwh = 2; break; 98af69d88dSmrg case 8: bankwh = 3; break; 993464ebd5Sriastradh } 100af69d88dSmrg return bankwh; 101af69d88dSmrg} 1023464ebd5Sriastradh 103af69d88dSmrgstatic uint32_t r600_translate_blend_function(int blend_func) 104af69d88dSmrg{ 105af69d88dSmrg switch (blend_func) { 106af69d88dSmrg case PIPE_BLEND_ADD: 107af69d88dSmrg return V_028780_COMB_DST_PLUS_SRC; 108af69d88dSmrg case PIPE_BLEND_SUBTRACT: 109af69d88dSmrg return V_028780_COMB_SRC_MINUS_DST; 110af69d88dSmrg case PIPE_BLEND_REVERSE_SUBTRACT: 111af69d88dSmrg return V_028780_COMB_DST_MINUS_SRC; 112af69d88dSmrg case PIPE_BLEND_MIN: 113af69d88dSmrg return V_028780_COMB_MIN_DST_SRC; 114af69d88dSmrg case PIPE_BLEND_MAX: 115af69d88dSmrg return V_028780_COMB_MAX_DST_SRC; 116af69d88dSmrg default: 117af69d88dSmrg R600_ERR("Unknown blend function %d\n", blend_func); 118af69d88dSmrg assert(0); 119af69d88dSmrg break; 120af69d88dSmrg } 121af69d88dSmrg return 0; 122af69d88dSmrg} 123af69d88dSmrg 124af69d88dSmrgstatic uint32_t r600_translate_blend_factor(int blend_fact) 125af69d88dSmrg{ 126af69d88dSmrg switch (blend_fact) { 127af69d88dSmrg case PIPE_BLENDFACTOR_ONE: 128af69d88dSmrg return V_028780_BLEND_ONE; 129af69d88dSmrg case PIPE_BLENDFACTOR_SRC_COLOR: 130af69d88dSmrg return V_028780_BLEND_SRC_COLOR; 131af69d88dSmrg case PIPE_BLENDFACTOR_SRC_ALPHA: 132af69d88dSmrg return V_028780_BLEND_SRC_ALPHA; 133af69d88dSmrg case PIPE_BLENDFACTOR_DST_ALPHA: 134af69d88dSmrg return V_028780_BLEND_DST_ALPHA; 135af69d88dSmrg case PIPE_BLENDFACTOR_DST_COLOR: 136af69d88dSmrg return V_028780_BLEND_DST_COLOR; 137af69d88dSmrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 138af69d88dSmrg return V_028780_BLEND_SRC_ALPHA_SATURATE; 139af69d88dSmrg case PIPE_BLENDFACTOR_CONST_COLOR: 140af69d88dSmrg return V_028780_BLEND_CONST_COLOR; 141af69d88dSmrg case PIPE_BLENDFACTOR_CONST_ALPHA: 142af69d88dSmrg return V_028780_BLEND_CONST_ALPHA; 143af69d88dSmrg case PIPE_BLENDFACTOR_ZERO: 144af69d88dSmrg return V_028780_BLEND_ZERO; 145af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 146af69d88dSmrg return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 147af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 148af69d88dSmrg return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 149af69d88dSmrg case PIPE_BLENDFACTOR_INV_DST_ALPHA: 150af69d88dSmrg return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 151af69d88dSmrg case PIPE_BLENDFACTOR_INV_DST_COLOR: 152af69d88dSmrg return V_028780_BLEND_ONE_MINUS_DST_COLOR; 153af69d88dSmrg case PIPE_BLENDFACTOR_INV_CONST_COLOR: 154af69d88dSmrg return V_028780_BLEND_ONE_MINUS_CONST_COLOR; 155af69d88dSmrg case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 156af69d88dSmrg return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; 157af69d88dSmrg case PIPE_BLENDFACTOR_SRC1_COLOR: 158af69d88dSmrg return V_028780_BLEND_SRC1_COLOR; 159af69d88dSmrg case PIPE_BLENDFACTOR_SRC1_ALPHA: 160af69d88dSmrg return V_028780_BLEND_SRC1_ALPHA; 161af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 162af69d88dSmrg return V_028780_BLEND_INV_SRC1_COLOR; 163af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 164af69d88dSmrg return V_028780_BLEND_INV_SRC1_ALPHA; 165af69d88dSmrg default: 166af69d88dSmrg R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 167af69d88dSmrg assert(0); 168af69d88dSmrg break; 169af69d88dSmrg } 170af69d88dSmrg return 0; 171af69d88dSmrg} 172af69d88dSmrg 17301e04c3fSmrgstatic unsigned r600_tex_dim(struct r600_texture *rtex, 17401e04c3fSmrg unsigned view_target, unsigned nr_samples) 175af69d88dSmrg{ 17601e04c3fSmrg unsigned res_target = rtex->resource.b.b.target; 17701e04c3fSmrg 17801e04c3fSmrg if (view_target == PIPE_TEXTURE_CUBE || 17901e04c3fSmrg view_target == PIPE_TEXTURE_CUBE_ARRAY) 18001e04c3fSmrg res_target = view_target; 18101e04c3fSmrg /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 18201e04c3fSmrg else if (res_target == PIPE_TEXTURE_CUBE || 18301e04c3fSmrg res_target == PIPE_TEXTURE_CUBE_ARRAY) 18401e04c3fSmrg res_target = PIPE_TEXTURE_2D_ARRAY; 18501e04c3fSmrg 18601e04c3fSmrg switch (res_target) { 187af69d88dSmrg default: 188af69d88dSmrg case PIPE_TEXTURE_1D: 189af69d88dSmrg return V_030000_SQ_TEX_DIM_1D; 190af69d88dSmrg case PIPE_TEXTURE_1D_ARRAY: 191af69d88dSmrg return V_030000_SQ_TEX_DIM_1D_ARRAY; 192af69d88dSmrg case PIPE_TEXTURE_2D: 193af69d88dSmrg case PIPE_TEXTURE_RECT: 194af69d88dSmrg return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_MSAA : 195af69d88dSmrg V_030000_SQ_TEX_DIM_2D; 196af69d88dSmrg case PIPE_TEXTURE_2D_ARRAY: 197af69d88dSmrg return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA : 198af69d88dSmrg V_030000_SQ_TEX_DIM_2D_ARRAY; 199af69d88dSmrg case PIPE_TEXTURE_3D: 200af69d88dSmrg return V_030000_SQ_TEX_DIM_3D; 201af69d88dSmrg case PIPE_TEXTURE_CUBE: 202af69d88dSmrg case PIPE_TEXTURE_CUBE_ARRAY: 203af69d88dSmrg return V_030000_SQ_TEX_DIM_CUBEMAP; 204af69d88dSmrg } 205af69d88dSmrg} 206af69d88dSmrg 207af69d88dSmrgstatic uint32_t r600_translate_dbformat(enum pipe_format format) 208af69d88dSmrg{ 209af69d88dSmrg switch (format) { 210af69d88dSmrg case PIPE_FORMAT_Z16_UNORM: 211af69d88dSmrg return V_028040_Z_16; 212af69d88dSmrg case PIPE_FORMAT_Z24X8_UNORM: 213af69d88dSmrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 214af69d88dSmrg case PIPE_FORMAT_X8Z24_UNORM: 215af69d88dSmrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 216af69d88dSmrg return V_028040_Z_24; 217af69d88dSmrg case PIPE_FORMAT_Z32_FLOAT: 218af69d88dSmrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 219af69d88dSmrg return V_028040_Z_32_FLOAT; 220af69d88dSmrg default: 221af69d88dSmrg return ~0U; 222af69d88dSmrg } 223af69d88dSmrg} 224af69d88dSmrg 225af69d88dSmrgstatic bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 226af69d88dSmrg{ 22701e04c3fSmrg return r600_translate_texformat(screen, format, NULL, NULL, NULL, 22801e04c3fSmrg FALSE) != ~0U; 229af69d88dSmrg} 2303464ebd5Sriastradh 231af69d88dSmrgstatic bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format) 232af69d88dSmrg{ 23301e04c3fSmrg return r600_translate_colorformat(chip, format, FALSE) != ~0U && 23401e04c3fSmrg r600_translate_colorswap(format, FALSE) != ~0U; 235af69d88dSmrg} 236af69d88dSmrg 237af69d88dSmrgstatic bool r600_is_zs_format_supported(enum pipe_format format) 238af69d88dSmrg{ 239af69d88dSmrg return r600_translate_dbformat(format) != ~0U; 240af69d88dSmrg} 241af69d88dSmrg 2427ec681f3Smrgbool evergreen_is_format_supported(struct pipe_screen *screen, 2437ec681f3Smrg enum pipe_format format, 2447ec681f3Smrg enum pipe_texture_target target, 2457ec681f3Smrg unsigned sample_count, 2467ec681f3Smrg unsigned storage_sample_count, 2477ec681f3Smrg unsigned usage) 248af69d88dSmrg{ 249af69d88dSmrg struct r600_screen *rscreen = (struct r600_screen*)screen; 250af69d88dSmrg unsigned retval = 0; 251af69d88dSmrg 252af69d88dSmrg if (target >= PIPE_MAX_TEXTURE_TYPES) { 253af69d88dSmrg R600_ERR("r600: unsupported texture type %d\n", target); 2547ec681f3Smrg return false; 255af69d88dSmrg } 256af69d88dSmrg 25701e04c3fSmrg if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) 25801e04c3fSmrg return false; 259af69d88dSmrg 260af69d88dSmrg if (sample_count > 1) { 261af69d88dSmrg if (!rscreen->has_msaa) 2627ec681f3Smrg return false; 263af69d88dSmrg 264af69d88dSmrg switch (sample_count) { 265af69d88dSmrg case 2: 266af69d88dSmrg case 4: 267af69d88dSmrg case 8: 268af69d88dSmrg break; 269af69d88dSmrg default: 2707ec681f3Smrg return false; 271af69d88dSmrg } 272af69d88dSmrg } 273af69d88dSmrg 274af69d88dSmrg if (usage & PIPE_BIND_SAMPLER_VIEW) { 275af69d88dSmrg if (target == PIPE_BUFFER) { 276af69d88dSmrg if (r600_is_vertex_format_supported(format)) 277af69d88dSmrg retval |= PIPE_BIND_SAMPLER_VIEW; 278af69d88dSmrg } else { 279af69d88dSmrg if (r600_is_sampler_format_supported(screen, format)) 280af69d88dSmrg retval |= PIPE_BIND_SAMPLER_VIEW; 281af69d88dSmrg } 282af69d88dSmrg } 283af69d88dSmrg 284af69d88dSmrg if ((usage & (PIPE_BIND_RENDER_TARGET | 285af69d88dSmrg PIPE_BIND_DISPLAY_TARGET | 286af69d88dSmrg PIPE_BIND_SCANOUT | 28701e04c3fSmrg PIPE_BIND_SHARED | 28801e04c3fSmrg PIPE_BIND_BLENDABLE)) && 289af69d88dSmrg r600_is_colorbuffer_format_supported(rscreen->b.chip_class, format)) { 290af69d88dSmrg retval |= usage & 291af69d88dSmrg (PIPE_BIND_RENDER_TARGET | 292af69d88dSmrg PIPE_BIND_DISPLAY_TARGET | 293af69d88dSmrg PIPE_BIND_SCANOUT | 294af69d88dSmrg PIPE_BIND_SHARED); 29501e04c3fSmrg if (!util_format_is_pure_integer(format) && 29601e04c3fSmrg !util_format_is_depth_or_stencil(format)) 29701e04c3fSmrg retval |= usage & PIPE_BIND_BLENDABLE; 298af69d88dSmrg } 299af69d88dSmrg 300af69d88dSmrg if ((usage & PIPE_BIND_DEPTH_STENCIL) && 301af69d88dSmrg r600_is_zs_format_supported(format)) { 302af69d88dSmrg retval |= PIPE_BIND_DEPTH_STENCIL; 303af69d88dSmrg } 304af69d88dSmrg 305af69d88dSmrg if ((usage & PIPE_BIND_VERTEX_BUFFER) && 306af69d88dSmrg r600_is_vertex_format_supported(format)) { 307af69d88dSmrg retval |= PIPE_BIND_VERTEX_BUFFER; 308af69d88dSmrg } 309af69d88dSmrg 3107ec681f3Smrg if (usage & PIPE_BIND_INDEX_BUFFER && 3117ec681f3Smrg r600_is_index_format_supported(format)) { 3127ec681f3Smrg retval |= PIPE_BIND_INDEX_BUFFER; 3137ec681f3Smrg } 3147ec681f3Smrg 31501e04c3fSmrg if ((usage & PIPE_BIND_LINEAR) && 31601e04c3fSmrg !util_format_is_compressed(format) && 31701e04c3fSmrg !(usage & PIPE_BIND_DEPTH_STENCIL)) 31801e04c3fSmrg retval |= PIPE_BIND_LINEAR; 319af69d88dSmrg 320af69d88dSmrg return retval == usage; 321af69d88dSmrg} 322af69d88dSmrg 323af69d88dSmrgstatic void *evergreen_create_blend_state_mode(struct pipe_context *ctx, 324af69d88dSmrg const struct pipe_blend_state *state, int mode) 325af69d88dSmrg{ 326af69d88dSmrg uint32_t color_control = 0, target_mask = 0; 327af69d88dSmrg struct r600_blend_state *blend = CALLOC_STRUCT(r600_blend_state); 328af69d88dSmrg 329af69d88dSmrg if (!blend) { 330af69d88dSmrg return NULL; 331af69d88dSmrg } 332af69d88dSmrg 333af69d88dSmrg r600_init_command_buffer(&blend->buffer, 20); 334af69d88dSmrg r600_init_command_buffer(&blend->buffer_no_blend, 20); 3353464ebd5Sriastradh 3363464ebd5Sriastradh if (state->logicop_enable) { 3373464ebd5Sriastradh color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); 3383464ebd5Sriastradh } else { 3393464ebd5Sriastradh color_control |= (0xcc << 16); 3403464ebd5Sriastradh } 3413464ebd5Sriastradh /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 3423464ebd5Sriastradh if (state->independent_blend_enable) { 3433464ebd5Sriastradh for (int i = 0; i < 8; i++) { 3443464ebd5Sriastradh target_mask |= (state->rt[i].colormask << (4 * i)); 3453464ebd5Sriastradh } 3463464ebd5Sriastradh } else { 3473464ebd5Sriastradh for (int i = 0; i < 8; i++) { 3483464ebd5Sriastradh target_mask |= (state->rt[0].colormask << (4 * i)); 3493464ebd5Sriastradh } 3503464ebd5Sriastradh } 351af69d88dSmrg 352af69d88dSmrg /* only have dual source on MRT0 */ 353af69d88dSmrg blend->dual_src_blend = util_blend_state_is_dual(state, 0); 3543464ebd5Sriastradh blend->cb_target_mask = target_mask; 355af69d88dSmrg blend->alpha_to_one = state->alpha_to_one; 3563464ebd5Sriastradh 357af69d88dSmrg if (target_mask) 358af69d88dSmrg color_control |= S_028808_MODE(mode); 359af69d88dSmrg else 360af69d88dSmrg color_control |= S_028808_MODE(V_028808_CB_DISABLE); 361af69d88dSmrg 362af69d88dSmrg 363af69d88dSmrg r600_store_context_reg(&blend->buffer, R_028808_CB_COLOR_CONTROL, color_control); 364af69d88dSmrg r600_store_context_reg(&blend->buffer, R_028B70_DB_ALPHA_TO_MASK, 365af69d88dSmrg S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 366af69d88dSmrg S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 367af69d88dSmrg S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 368af69d88dSmrg S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 369af69d88dSmrg S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 370af69d88dSmrg r600_store_context_reg_seq(&blend->buffer, R_028780_CB_BLEND0_CONTROL, 8); 371af69d88dSmrg 372af69d88dSmrg /* Copy over the dwords set so far into buffer_no_blend. 373af69d88dSmrg * Only the CB_BLENDi_CONTROL registers must be set after this. */ 374af69d88dSmrg memcpy(blend->buffer_no_blend.buf, blend->buffer.buf, blend->buffer.num_dw * 4); 375af69d88dSmrg blend->buffer_no_blend.num_dw = blend->buffer.num_dw; 3763464ebd5Sriastradh 3773464ebd5Sriastradh for (int i = 0; i < 8; i++) { 3783464ebd5Sriastradh /* state->rt entries > 0 only written if independent blending */ 3793464ebd5Sriastradh const int j = state->independent_blend_enable ? i : 0; 3803464ebd5Sriastradh 3813464ebd5Sriastradh unsigned eqRGB = state->rt[j].rgb_func; 3823464ebd5Sriastradh unsigned srcRGB = state->rt[j].rgb_src_factor; 3833464ebd5Sriastradh unsigned dstRGB = state->rt[j].rgb_dst_factor; 3843464ebd5Sriastradh unsigned eqA = state->rt[j].alpha_func; 3853464ebd5Sriastradh unsigned srcA = state->rt[j].alpha_src_factor; 3863464ebd5Sriastradh unsigned dstA = state->rt[j].alpha_dst_factor; 387af69d88dSmrg uint32_t bc = 0; 388af69d88dSmrg 389af69d88dSmrg r600_store_value(&blend->buffer_no_blend, 0); 3903464ebd5Sriastradh 391af69d88dSmrg if (!state->rt[j].blend_enable) { 392af69d88dSmrg r600_store_value(&blend->buffer, 0); 3933464ebd5Sriastradh continue; 394af69d88dSmrg } 3953464ebd5Sriastradh 396af69d88dSmrg bc |= S_028780_BLEND_CONTROL_ENABLE(1); 397af69d88dSmrg bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); 398af69d88dSmrg bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); 399af69d88dSmrg bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); 4003464ebd5Sriastradh 4013464ebd5Sriastradh if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 402af69d88dSmrg bc |= S_028780_SEPARATE_ALPHA_BLEND(1); 403af69d88dSmrg bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); 404af69d88dSmrg bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); 405af69d88dSmrg bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); 4063464ebd5Sriastradh } 407af69d88dSmrg r600_store_value(&blend->buffer, bc); 4083464ebd5Sriastradh } 409af69d88dSmrg return blend; 410af69d88dSmrg} 4113464ebd5Sriastradh 412af69d88dSmrgstatic void *evergreen_create_blend_state(struct pipe_context *ctx, 413af69d88dSmrg const struct pipe_blend_state *state) 414af69d88dSmrg{ 415af69d88dSmrg 416af69d88dSmrg return evergreen_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 4173464ebd5Sriastradh} 4183464ebd5Sriastradh 4193464ebd5Sriastradhstatic void *evergreen_create_dsa_state(struct pipe_context *ctx, 4203464ebd5Sriastradh const struct pipe_depth_stencil_alpha_state *state) 4213464ebd5Sriastradh{ 422af69d88dSmrg unsigned db_depth_control, alpha_test_control, alpha_ref; 423af69d88dSmrg struct r600_dsa_state *dsa = CALLOC_STRUCT(r600_dsa_state); 4243464ebd5Sriastradh 42501e04c3fSmrg if (!dsa) { 4263464ebd5Sriastradh return NULL; 4273464ebd5Sriastradh } 4283464ebd5Sriastradh 429af69d88dSmrg r600_init_command_buffer(&dsa->buffer, 3); 430af69d88dSmrg 431af69d88dSmrg dsa->valuemask[0] = state->stencil[0].valuemask; 432af69d88dSmrg dsa->valuemask[1] = state->stencil[1].valuemask; 433af69d88dSmrg dsa->writemask[0] = state->stencil[0].writemask; 434af69d88dSmrg dsa->writemask[1] = state->stencil[1].writemask; 4357ec681f3Smrg dsa->zwritemask = state->depth_writemask; 4363464ebd5Sriastradh 4377ec681f3Smrg db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) | 4387ec681f3Smrg S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 4397ec681f3Smrg S_028800_ZFUNC(state->depth_func); 4403464ebd5Sriastradh 4413464ebd5Sriastradh /* stencil */ 4423464ebd5Sriastradh if (state->stencil[0].enabled) { 4433464ebd5Sriastradh db_depth_control |= S_028800_STENCIL_ENABLE(1); 444af69d88dSmrg db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); /* translates straight */ 4453464ebd5Sriastradh db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); 4463464ebd5Sriastradh db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); 4473464ebd5Sriastradh db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); 4483464ebd5Sriastradh 4493464ebd5Sriastradh if (state->stencil[1].enabled) { 4503464ebd5Sriastradh db_depth_control |= S_028800_BACKFACE_ENABLE(1); 451af69d88dSmrg db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); /* translates straight */ 4523464ebd5Sriastradh db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); 4533464ebd5Sriastradh db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); 4543464ebd5Sriastradh db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); 4553464ebd5Sriastradh } 4563464ebd5Sriastradh } 4573464ebd5Sriastradh 4583464ebd5Sriastradh /* alpha */ 4593464ebd5Sriastradh alpha_test_control = 0; 4603464ebd5Sriastradh alpha_ref = 0; 4617ec681f3Smrg if (state->alpha_enabled) { 4627ec681f3Smrg alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func); 4633464ebd5Sriastradh alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); 4647ec681f3Smrg alpha_ref = fui(state->alpha_ref_value); 4653464ebd5Sriastradh } 466af69d88dSmrg dsa->sx_alpha_test_control = alpha_test_control & 0xff; 4673464ebd5Sriastradh dsa->alpha_ref = alpha_ref; 4683464ebd5Sriastradh 4693464ebd5Sriastradh /* misc */ 470af69d88dSmrg r600_store_context_reg(&dsa->buffer, R_028800_DB_DEPTH_CONTROL, db_depth_control); 471af69d88dSmrg return dsa; 4723464ebd5Sriastradh} 4733464ebd5Sriastradh 4743464ebd5Sriastradhstatic void *evergreen_create_rs_state(struct pipe_context *ctx, 4753464ebd5Sriastradh const struct pipe_rasterizer_state *state) 4763464ebd5Sriastradh{ 477af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 478af69d88dSmrg unsigned tmp, spi_interp; 479af69d88dSmrg float psize_min, psize_max; 480af69d88dSmrg struct r600_rasterizer_state *rs = CALLOC_STRUCT(r600_rasterizer_state); 4813464ebd5Sriastradh 48201e04c3fSmrg if (!rs) { 4833464ebd5Sriastradh return NULL; 4843464ebd5Sriastradh } 4853464ebd5Sriastradh 486af69d88dSmrg r600_init_command_buffer(&rs->buffer, 30); 487af69d88dSmrg 48801e04c3fSmrg rs->scissor_enable = state->scissor; 48901e04c3fSmrg rs->clip_halfz = state->clip_halfz; 4903464ebd5Sriastradh rs->flatshade = state->flatshade; 4913464ebd5Sriastradh rs->sprite_coord_enable = state->sprite_coord_enable; 49201e04c3fSmrg rs->rasterizer_discard = state->rasterizer_discard; 493af69d88dSmrg rs->two_side = state->light_twoside; 494af69d88dSmrg rs->clip_plane_enable = state->clip_plane_enable; 495af69d88dSmrg rs->pa_sc_line_stipple = state->line_stipple_enable ? 496af69d88dSmrg S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 497af69d88dSmrg S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 498af69d88dSmrg rs->pa_cl_clip_cntl = 49901e04c3fSmrg S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 50001e04c3fSmrg S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 50101e04c3fSmrg S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 502af69d88dSmrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) | 503af69d88dSmrg S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard); 504af69d88dSmrg rs->multisample_enable = state->multisample; 5053464ebd5Sriastradh 5063464ebd5Sriastradh /* offset */ 5073464ebd5Sriastradh rs->offset_units = state->offset_units; 50801e04c3fSmrg rs->offset_scale = state->offset_scale * 16.0f; 509af69d88dSmrg rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri; 51001e04c3fSmrg rs->offset_units_unscaled = state->offset_units_unscaled; 5113464ebd5Sriastradh 512af69d88dSmrg if (state->point_size_per_vertex) { 513af69d88dSmrg psize_min = util_get_min_point_size(state); 514af69d88dSmrg psize_max = 8192; 515af69d88dSmrg } else { 516af69d88dSmrg /* Force the point size to be as if the vertex output was disabled. */ 517af69d88dSmrg psize_min = state->point_size; 518af69d88dSmrg psize_max = state->point_size; 519af69d88dSmrg } 520af69d88dSmrg 521af69d88dSmrg spi_interp = S_0286D4_FLAT_SHADE_ENA(1); 5227ec681f3Smrg spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | 5237ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_X(2) | 5247ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Y(3) | 5257ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Z(0) | 5267ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_W(1); 5277ec681f3Smrg if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 5287ec681f3Smrg spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); 5293464ebd5Sriastradh } 5303464ebd5Sriastradh 531af69d88dSmrg r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3); 532af69d88dSmrg /* point size 12.4 fixed point (divide by two, because 0.5 = 1 pixel) */ 533af69d88dSmrg tmp = r600_pack_float_12p4(state->point_size/2); 534af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A00_PA_SU_POINT_SIZE */ 535af69d88dSmrg S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 536af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A04_PA_SU_POINT_MINMAX */ 537af69d88dSmrg S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) | 538af69d88dSmrg S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2))); 539af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A08_PA_SU_LINE_CNTL */ 540af69d88dSmrg S_028A08_WIDTH((unsigned)(state->line_width * 8))); 541af69d88dSmrg 542af69d88dSmrg r600_store_context_reg(&rs->buffer, R_0286D4_SPI_INTERP_CONTROL_0, spi_interp); 543af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028A48_PA_SC_MODE_CNTL_0, 544af69d88dSmrg S_028A48_MSAA_ENABLE(state->multisample) | 54501e04c3fSmrg S_028A48_VPORT_SCISSOR_ENABLE(1) | 546af69d88dSmrg S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable)); 547af69d88dSmrg 548af69d88dSmrg if (rctx->b.chip_class == CAYMAN) { 549af69d88dSmrg r600_store_context_reg(&rs->buffer, CM_R_028BE4_PA_SU_VTX_CNTL, 550af69d88dSmrg S_028C08_PIX_CENTER_HALF(state->half_pixel_center) | 551af69d88dSmrg S_028C08_QUANT_MODE(V_028C08_X_1_256TH)); 5523464ebd5Sriastradh } else { 553af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028C08_PA_SU_VTX_CNTL, 554af69d88dSmrg S_028C08_PIX_CENTER_HALF(state->half_pixel_center) | 555af69d88dSmrg S_028C08_QUANT_MODE(V_028C08_X_1_256TH)); 5563464ebd5Sriastradh } 557af69d88dSmrg 558af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 559af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL, 560af69d88dSmrg S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 561af69d88dSmrg S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 562af69d88dSmrg S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 563af69d88dSmrg S_028814_FACE(!state->front_ccw) | 564af69d88dSmrg S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 565af69d88dSmrg S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 566af69d88dSmrg S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 567af69d88dSmrg S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 568af69d88dSmrg state->fill_back != PIPE_POLYGON_MODE_FILL) | 569af69d88dSmrg S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | 570af69d88dSmrg S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back))); 571af69d88dSmrg return rs; 5723464ebd5Sriastradh} 5733464ebd5Sriastradh 5743464ebd5Sriastradhstatic void *evergreen_create_sampler_state(struct pipe_context *ctx, 5753464ebd5Sriastradh const struct pipe_sampler_state *state) 5763464ebd5Sriastradh{ 57701e04c3fSmrg struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; 578af69d88dSmrg struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); 57901e04c3fSmrg unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 58001e04c3fSmrg : state->max_anisotropy; 58101e04c3fSmrg unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 5827ec681f3Smrg bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && 5837ec681f3Smrg state->mag_img_filter == PIPE_TEX_FILTER_NEAREST; 58401e04c3fSmrg float max_lod = state->max_lod; 5853464ebd5Sriastradh 58601e04c3fSmrg if (!ss) { 5873464ebd5Sriastradh return NULL; 5883464ebd5Sriastradh } 5893464ebd5Sriastradh 59001e04c3fSmrg /* If the min_mip_filter is NONE, then the texture has no mipmapping and 59101e04c3fSmrg * MIP_FILTER will also be set to NONE. However, if more then one LOD is 59201e04c3fSmrg * configured, then the texture lookup seems to fail for some specific texture 59301e04c3fSmrg * formats. Forcing the number of LODs to one in this case fixes it. */ 59401e04c3fSmrg if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) 59501e04c3fSmrg max_lod = state->min_lod; 59601e04c3fSmrg 597af69d88dSmrg ss->border_color_use = sampler_state_needs_border_color(state); 598af69d88dSmrg 599af69d88dSmrg /* R_03C000_SQ_TEX_SAMPLER_WORD0_0 */ 600af69d88dSmrg ss->tex_sampler_words[0] = 601af69d88dSmrg S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | 602af69d88dSmrg S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | 603af69d88dSmrg S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | 60401e04c3fSmrg S_03C000_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 60501e04c3fSmrg S_03C000_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 606af69d88dSmrg S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | 60701e04c3fSmrg S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) | 608af69d88dSmrg S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | 609af69d88dSmrg S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); 610af69d88dSmrg /* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */ 611af69d88dSmrg ss->tex_sampler_words[1] = 612af69d88dSmrg S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 61301e04c3fSmrg S_03C004_MAX_LOD(S_FIXED(CLAMP(max_lod, 0, 15), 8)); 614af69d88dSmrg /* R_03C008_SQ_TEX_SAMPLER_WORD2_0 */ 615af69d88dSmrg ss->tex_sampler_words[2] = 616af69d88dSmrg S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 617af69d88dSmrg (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | 6187ec681f3Smrg S_03C008_TRUNCATE_COORD(trunc_coord) | 619af69d88dSmrg S_03C008_TYPE(1); 620af69d88dSmrg 621af69d88dSmrg if (ss->border_color_use) { 622af69d88dSmrg memcpy(&ss->border_color, &state->border_color, sizeof(state->border_color)); 623af69d88dSmrg } 624af69d88dSmrg return ss; 625af69d88dSmrg} 626af69d88dSmrg 62701e04c3fSmrgstruct eg_buf_res_params { 62801e04c3fSmrg enum pipe_format pipe_format; 62901e04c3fSmrg unsigned offset; 63001e04c3fSmrg unsigned size; 63101e04c3fSmrg unsigned char swizzle[4]; 63201e04c3fSmrg bool uncached; 63301e04c3fSmrg bool force_swizzle; 63401e04c3fSmrg bool size_in_bytes; 63501e04c3fSmrg}; 63601e04c3fSmrg 63701e04c3fSmrgstatic void evergreen_fill_buffer_resource_words(struct r600_context *rctx, 63801e04c3fSmrg struct pipe_resource *buffer, 63901e04c3fSmrg struct eg_buf_res_params *params, 64001e04c3fSmrg bool *skip_mip_address_reloc, 64101e04c3fSmrg unsigned tex_resource_words[8]) 642af69d88dSmrg{ 64301e04c3fSmrg struct r600_texture *tmp = (struct r600_texture*)buffer; 644af69d88dSmrg uint64_t va; 64501e04c3fSmrg int stride = util_format_get_blocksize(params->pipe_format); 646af69d88dSmrg unsigned format, num_format, format_comp, endian; 647af69d88dSmrg unsigned swizzle_res; 6483464ebd5Sriastradh const struct util_format_description *desc; 649af69d88dSmrg 65001e04c3fSmrg r600_vertex_data_type(params->pipe_format, 651af69d88dSmrg &format, &num_format, &format_comp, 652af69d88dSmrg &endian); 653af69d88dSmrg 65401e04c3fSmrg desc = util_format_description(params->pipe_format); 655af69d88dSmrg 65601e04c3fSmrg if (params->force_swizzle) 65701e04c3fSmrg swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE); 65801e04c3fSmrg else 65901e04c3fSmrg swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE); 660af69d88dSmrg 66101e04c3fSmrg va = tmp->resource.gpu_address + params->offset; 66201e04c3fSmrg *skip_mip_address_reloc = true; 66301e04c3fSmrg tex_resource_words[0] = va; 66401e04c3fSmrg tex_resource_words[1] = params->size - 1; 66501e04c3fSmrg tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) | 666af69d88dSmrg S_030008_STRIDE(stride) | 667af69d88dSmrg S_030008_DATA_FORMAT(format) | 668af69d88dSmrg S_030008_NUM_FORMAT_ALL(num_format) | 669af69d88dSmrg S_030008_FORMAT_COMP_ALL(format_comp) | 670af69d88dSmrg S_030008_ENDIAN_SWAP(endian); 67101e04c3fSmrg tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached); 672af69d88dSmrg /* 67301e04c3fSmrg * dword 4 is for number of elements, for use with resinfo, 67401e04c3fSmrg * albeit the amd gpu shader analyser 675af69d88dSmrg * uses a const buffer to store the element sizes for buffer txq 676af69d88dSmrg */ 67701e04c3fSmrg tex_resource_words[4] = params->size_in_bytes ? params->size : (params->size / stride); 67801e04c3fSmrg 67901e04c3fSmrg tex_resource_words[5] = tex_resource_words[6] = 0; 68001e04c3fSmrg tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); 68101e04c3fSmrg} 68201e04c3fSmrg 68301e04c3fSmrgstatic struct pipe_sampler_view * 68401e04c3fSmrgtexture_buffer_sampler_view(struct r600_context *rctx, 68501e04c3fSmrg struct r600_pipe_sampler_view *view, 68601e04c3fSmrg unsigned width0, unsigned height0) 68701e04c3fSmrg{ 68801e04c3fSmrg struct r600_texture *tmp = (struct r600_texture*)view->base.texture; 68901e04c3fSmrg struct eg_buf_res_params params; 69001e04c3fSmrg 69101e04c3fSmrg memset(¶ms, 0, sizeof(params)); 69201e04c3fSmrg 69301e04c3fSmrg params.pipe_format = view->base.format; 69401e04c3fSmrg params.offset = view->base.u.buf.offset; 69501e04c3fSmrg params.size = view->base.u.buf.size; 69601e04c3fSmrg params.swizzle[0] = view->base.swizzle_r; 69701e04c3fSmrg params.swizzle[1] = view->base.swizzle_g; 69801e04c3fSmrg params.swizzle[2] = view->base.swizzle_b; 69901e04c3fSmrg params.swizzle[3] = view->base.swizzle_a; 70001e04c3fSmrg 70101e04c3fSmrg evergreen_fill_buffer_resource_words(rctx, view->base.texture, 70201e04c3fSmrg ¶ms, &view->skip_mip_address_reloc, 70301e04c3fSmrg view->tex_resource_words); 70401e04c3fSmrg view->tex_resource = &tmp->resource; 705af69d88dSmrg 706af69d88dSmrg if (tmp->resource.gpu_address) 7077ec681f3Smrg list_addtail(&view->list, &rctx->texture_buffers); 708af69d88dSmrg return &view->base; 709af69d88dSmrg} 710af69d88dSmrg 71101e04c3fSmrgstruct eg_tex_res_params { 71201e04c3fSmrg enum pipe_format pipe_format; 71301e04c3fSmrg int force_level; 71401e04c3fSmrg unsigned width0; 71501e04c3fSmrg unsigned height0; 71601e04c3fSmrg unsigned first_level; 71701e04c3fSmrg unsigned last_level; 71801e04c3fSmrg unsigned first_layer; 71901e04c3fSmrg unsigned last_layer; 72001e04c3fSmrg unsigned target; 72101e04c3fSmrg unsigned char swizzle[4]; 72201e04c3fSmrg}; 72301e04c3fSmrg 72401e04c3fSmrgstatic int evergreen_fill_tex_resource_words(struct r600_context *rctx, 72501e04c3fSmrg struct pipe_resource *texture, 72601e04c3fSmrg struct eg_tex_res_params *params, 72701e04c3fSmrg bool *skip_mip_address_reloc, 72801e04c3fSmrg unsigned tex_resource_words[8]) 729af69d88dSmrg{ 73001e04c3fSmrg struct r600_screen *rscreen = (struct r600_screen*)rctx->b.b.screen; 731af69d88dSmrg struct r600_texture *tmp = (struct r600_texture*)texture; 7323464ebd5Sriastradh unsigned format, endian; 7333464ebd5Sriastradh uint32_t word4 = 0, yuv_format = 0, pitch = 0; 73401e04c3fSmrg unsigned char array_mode = 0, non_disp_tiling = 0; 735af69d88dSmrg unsigned height, depth, width; 736af69d88dSmrg unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh; 73701e04c3fSmrg struct legacy_surf_level *surflevel; 738af69d88dSmrg unsigned base_level, first_level, last_level; 73901e04c3fSmrg unsigned dim, last_layer; 740af69d88dSmrg uint64_t va; 74101e04c3fSmrg bool do_endian_swap = FALSE; 742af69d88dSmrg 74301e04c3fSmrg tile_split = tmp->surface.u.legacy.tile_split; 74401e04c3fSmrg surflevel = tmp->surface.u.legacy.level; 745af69d88dSmrg 746af69d88dSmrg /* Texturing with separate depth and stencil. */ 74701e04c3fSmrg if (tmp->db_compatible) { 74801e04c3fSmrg switch (params->pipe_format) { 749af69d88dSmrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 75001e04c3fSmrg params->pipe_format = PIPE_FORMAT_Z32_FLOAT; 751af69d88dSmrg break; 752af69d88dSmrg case PIPE_FORMAT_X8Z24_UNORM: 753af69d88dSmrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 75401e04c3fSmrg /* Z24 is always stored like this for DB 75501e04c3fSmrg * compatibility. 75601e04c3fSmrg */ 75701e04c3fSmrg params->pipe_format = PIPE_FORMAT_Z24X8_UNORM; 758af69d88dSmrg break; 759af69d88dSmrg case PIPE_FORMAT_X24S8_UINT: 760af69d88dSmrg case PIPE_FORMAT_S8X24_UINT: 761af69d88dSmrg case PIPE_FORMAT_X32_S8X24_UINT: 76201e04c3fSmrg params->pipe_format = PIPE_FORMAT_S8_UINT; 76301e04c3fSmrg tile_split = tmp->surface.u.legacy.stencil_tile_split; 7647ec681f3Smrg surflevel = tmp->surface.u.legacy.zs.stencil_level; 765af69d88dSmrg break; 766af69d88dSmrg default:; 767af69d88dSmrg } 768af69d88dSmrg } 769af69d88dSmrg 77001e04c3fSmrg if (R600_BIG_ENDIAN) 77101e04c3fSmrg do_endian_swap = !tmp->db_compatible; 77201e04c3fSmrg 77301e04c3fSmrg format = r600_translate_texformat(rctx->b.b.screen, params->pipe_format, 77401e04c3fSmrg params->swizzle, 77501e04c3fSmrg &word4, &yuv_format, do_endian_swap); 776af69d88dSmrg assert(format != ~0); 7773464ebd5Sriastradh if (format == ~0) { 77801e04c3fSmrg return -1; 7793464ebd5Sriastradh } 7803464ebd5Sriastradh 78101e04c3fSmrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 7823464ebd5Sriastradh 783af69d88dSmrg base_level = 0; 78401e04c3fSmrg first_level = params->first_level; 78501e04c3fSmrg last_level = params->last_level; 78601e04c3fSmrg width = params->width0; 78701e04c3fSmrg height = params->height0; 788af69d88dSmrg depth = texture->depth0; 789af69d88dSmrg 79001e04c3fSmrg if (params->force_level) { 79101e04c3fSmrg base_level = params->force_level; 792af69d88dSmrg first_level = 0; 793af69d88dSmrg last_level = 0; 79401e04c3fSmrg width = u_minify(width, params->force_level); 79501e04c3fSmrg height = u_minify(height, params->force_level); 79601e04c3fSmrg depth = u_minify(depth, params->force_level); 7973464ebd5Sriastradh } 7983464ebd5Sriastradh 79901e04c3fSmrg pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(params->pipe_format); 800af69d88dSmrg non_disp_tiling = tmp->non_disp_tiling; 801af69d88dSmrg 802af69d88dSmrg switch (surflevel[base_level].mode) { 80301e04c3fSmrg default: 804af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 805af69d88dSmrg array_mode = V_028C70_ARRAY_LINEAR_ALIGNED; 806af69d88dSmrg break; 807af69d88dSmrg case RADEON_SURF_MODE_2D: 808af69d88dSmrg array_mode = V_028C70_ARRAY_2D_TILED_THIN1; 809af69d88dSmrg break; 810af69d88dSmrg case RADEON_SURF_MODE_1D: 811af69d88dSmrg array_mode = V_028C70_ARRAY_1D_TILED_THIN1; 812af69d88dSmrg break; 8133464ebd5Sriastradh } 81401e04c3fSmrg macro_aspect = tmp->surface.u.legacy.mtilea; 81501e04c3fSmrg bankw = tmp->surface.u.legacy.bankw; 81601e04c3fSmrg bankh = tmp->surface.u.legacy.bankh; 817af69d88dSmrg tile_split = eg_tile_split(tile_split); 818af69d88dSmrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 819af69d88dSmrg bankw = eg_bank_wh(bankw); 820af69d88dSmrg bankh = eg_bank_wh(bankh); 821af69d88dSmrg fmask_bankh = eg_bank_wh(tmp->fmask.bank_height); 822af69d88dSmrg 823af69d88dSmrg /* 128 bit formats require tile type = 1 */ 824af69d88dSmrg if (rscreen->b.chip_class == CAYMAN) { 82501e04c3fSmrg if (util_format_get_blocksize(params->pipe_format) >= 16) 826af69d88dSmrg non_disp_tiling = 1; 827af69d88dSmrg } 82801e04c3fSmrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 82901e04c3fSmrg 83001e04c3fSmrg 83101e04c3fSmrg va = tmp->resource.gpu_address; 83201e04c3fSmrg 83301e04c3fSmrg /* array type views and views into array types need to use layer offset */ 83401e04c3fSmrg dim = r600_tex_dim(tmp, params->target, texture->nr_samples); 835af69d88dSmrg 83601e04c3fSmrg if (dim == V_030000_SQ_TEX_DIM_1D_ARRAY) { 837af69d88dSmrg height = 1; 838af69d88dSmrg depth = texture->array_size; 83901e04c3fSmrg } else if (dim == V_030000_SQ_TEX_DIM_2D_ARRAY || 84001e04c3fSmrg dim == V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA) { 841af69d88dSmrg depth = texture->array_size; 84201e04c3fSmrg } else if (dim == V_030000_SQ_TEX_DIM_CUBEMAP) 843af69d88dSmrg depth = texture->array_size / 6; 844af69d88dSmrg 84501e04c3fSmrg tex_resource_words[0] = (S_030000_DIM(dim) | 84601e04c3fSmrg S_030000_PITCH((pitch / 8) - 1) | 84701e04c3fSmrg S_030000_TEX_WIDTH(width - 1)); 848af69d88dSmrg if (rscreen->b.chip_class == CAYMAN) 84901e04c3fSmrg tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); 850af69d88dSmrg else 85101e04c3fSmrg tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); 85201e04c3fSmrg tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) | 853af69d88dSmrg S_030004_TEX_DEPTH(depth - 1) | 854af69d88dSmrg S_030004_ARRAY_MODE(array_mode)); 8557ec681f3Smrg tex_resource_words[2] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8; 856af69d88dSmrg 85701e04c3fSmrg *skip_mip_address_reloc = false; 858af69d88dSmrg /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ 859af69d88dSmrg if (texture->nr_samples > 1 && rscreen->has_compressed_msaa_texturing) { 860af69d88dSmrg if (tmp->is_depth) { 861af69d88dSmrg /* disable FMASK (0 = disabled) */ 86201e04c3fSmrg tex_resource_words[3] = 0; 86301e04c3fSmrg *skip_mip_address_reloc = true; 864af69d88dSmrg } else { 865af69d88dSmrg /* FMASK should be in MIP_ADDRESS for multisample textures */ 86601e04c3fSmrg tex_resource_words[3] = (tmp->fmask.offset + va) >> 8; 8673464ebd5Sriastradh } 868af69d88dSmrg } else if (last_level && texture->nr_samples <= 1) { 8697ec681f3Smrg tex_resource_words[3] = ((uint64_t)surflevel[1].offset_256B * 256 + va) >> 8; 870af69d88dSmrg } else { 8717ec681f3Smrg tex_resource_words[3] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8; 8723464ebd5Sriastradh } 8733464ebd5Sriastradh 87401e04c3fSmrg last_layer = params->last_layer; 87501e04c3fSmrg if (params->target != texture->target && depth == 1) { 87601e04c3fSmrg last_layer = params->first_layer; 87701e04c3fSmrg } 87801e04c3fSmrg tex_resource_words[4] = (word4 | 87901e04c3fSmrg S_030010_ENDIAN_SWAP(endian)); 88001e04c3fSmrg tex_resource_words[5] = S_030014_BASE_ARRAY(params->first_layer) | 88101e04c3fSmrg S_030014_LAST_ARRAY(last_layer); 88201e04c3fSmrg tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split); 8833464ebd5Sriastradh 884af69d88dSmrg if (texture->nr_samples > 1) { 885af69d88dSmrg unsigned log_samples = util_logbase2(texture->nr_samples); 886af69d88dSmrg if (rscreen->b.chip_class == CAYMAN) { 88701e04c3fSmrg tex_resource_words[4] |= S_030010_LOG2_NUM_FRAGMENTS(log_samples); 888af69d88dSmrg } 889af69d88dSmrg /* LAST_LEVEL holds log2(nr_samples) for multisample textures */ 89001e04c3fSmrg tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples); 89101e04c3fSmrg tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh); 892af69d88dSmrg } else { 89301e04c3fSmrg bool no_mip = first_level == last_level; 89401e04c3fSmrg 89501e04c3fSmrg tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level); 89601e04c3fSmrg tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level); 897af69d88dSmrg /* aniso max 16 samples */ 89801e04c3fSmrg tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(no_mip ? 0 : 4); 8993464ebd5Sriastradh } 900af69d88dSmrg 90101e04c3fSmrg tex_resource_words[7] = S_03001C_DATA_FORMAT(format) | 902af69d88dSmrg S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE) | 903af69d88dSmrg S_03001C_BANK_WIDTH(bankw) | 904af69d88dSmrg S_03001C_BANK_HEIGHT(bankh) | 905af69d88dSmrg S_03001C_MACRO_TILE_ASPECT(macro_aspect) | 906af69d88dSmrg S_03001C_NUM_BANKS(nbanks) | 90701e04c3fSmrg S_03001C_DEPTH_SAMPLE_ORDER(tmp->db_compatible); 90801e04c3fSmrg return 0; 90901e04c3fSmrg} 91001e04c3fSmrg 91101e04c3fSmrgstruct pipe_sampler_view * 91201e04c3fSmrgevergreen_create_sampler_view_custom(struct pipe_context *ctx, 91301e04c3fSmrg struct pipe_resource *texture, 91401e04c3fSmrg const struct pipe_sampler_view *state, 91501e04c3fSmrg unsigned width0, unsigned height0, 91601e04c3fSmrg unsigned force_level) 91701e04c3fSmrg{ 91801e04c3fSmrg struct r600_context *rctx = (struct r600_context*)ctx; 91901e04c3fSmrg struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); 92001e04c3fSmrg struct r600_texture *tmp = (struct r600_texture*)texture; 92101e04c3fSmrg struct eg_tex_res_params params; 92201e04c3fSmrg int ret; 92301e04c3fSmrg 92401e04c3fSmrg if (!view) 92501e04c3fSmrg return NULL; 92601e04c3fSmrg 92701e04c3fSmrg /* initialize base object */ 92801e04c3fSmrg view->base = *state; 92901e04c3fSmrg view->base.texture = NULL; 93001e04c3fSmrg pipe_reference(NULL, &texture->reference); 93101e04c3fSmrg view->base.texture = texture; 93201e04c3fSmrg view->base.reference.count = 1; 93301e04c3fSmrg view->base.context = ctx; 93401e04c3fSmrg 93501e04c3fSmrg if (state->target == PIPE_BUFFER) 93601e04c3fSmrg return texture_buffer_sampler_view(rctx, view, width0, height0); 93701e04c3fSmrg 93801e04c3fSmrg memset(¶ms, 0, sizeof(params)); 93901e04c3fSmrg params.pipe_format = state->format; 94001e04c3fSmrg params.force_level = force_level; 94101e04c3fSmrg params.width0 = width0; 94201e04c3fSmrg params.height0 = height0; 94301e04c3fSmrg params.first_level = state->u.tex.first_level; 94401e04c3fSmrg params.last_level = state->u.tex.last_level; 94501e04c3fSmrg params.first_layer = state->u.tex.first_layer; 94601e04c3fSmrg params.last_layer = state->u.tex.last_layer; 94701e04c3fSmrg params.target = state->target; 94801e04c3fSmrg params.swizzle[0] = state->swizzle_r; 94901e04c3fSmrg params.swizzle[1] = state->swizzle_g; 95001e04c3fSmrg params.swizzle[2] = state->swizzle_b; 95101e04c3fSmrg params.swizzle[3] = state->swizzle_a; 95201e04c3fSmrg 95301e04c3fSmrg ret = evergreen_fill_tex_resource_words(rctx, texture, ¶ms, 95401e04c3fSmrg &view->skip_mip_address_reloc, 95501e04c3fSmrg view->tex_resource_words); 95601e04c3fSmrg if (ret != 0) { 95701e04c3fSmrg FREE(view); 95801e04c3fSmrg return NULL; 95901e04c3fSmrg } 96001e04c3fSmrg 96101e04c3fSmrg if (state->format == PIPE_FORMAT_X24S8_UINT || 96201e04c3fSmrg state->format == PIPE_FORMAT_S8X24_UINT || 96301e04c3fSmrg state->format == PIPE_FORMAT_X32_S8X24_UINT || 96401e04c3fSmrg state->format == PIPE_FORMAT_S8_UINT) 96501e04c3fSmrg view->is_stencil_sampler = true; 96601e04c3fSmrg 96701e04c3fSmrg view->tex_resource = &tmp->resource; 96801e04c3fSmrg 969af69d88dSmrg return &view->base; 9703464ebd5Sriastradh} 9713464ebd5Sriastradh 972af69d88dSmrgstatic struct pipe_sampler_view * 973af69d88dSmrgevergreen_create_sampler_view(struct pipe_context *ctx, 974af69d88dSmrg struct pipe_resource *tex, 975af69d88dSmrg const struct pipe_sampler_view *state) 9763464ebd5Sriastradh{ 977af69d88dSmrg return evergreen_create_sampler_view_custom(ctx, tex, state, 978af69d88dSmrg tex->width0, tex->height0, 0); 9793464ebd5Sriastradh} 9803464ebd5Sriastradh 98101e04c3fSmrgstatic void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) 98201e04c3fSmrg{ 9837ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 98401e04c3fSmrg struct r600_config_state *a = (struct r600_config_state*)atom; 98501e04c3fSmrg 98601e04c3fSmrg radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); 98701e04c3fSmrg if (a->dyn_gpr_enabled) { 98801e04c3fSmrg radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs)); 98901e04c3fSmrg radeon_emit(cs, 0); 99001e04c3fSmrg radeon_emit(cs, 0); 99101e04c3fSmrg } else { 99201e04c3fSmrg radeon_emit(cs, a->sq_gpr_resource_mgmt_1); 99301e04c3fSmrg radeon_emit(cs, a->sq_gpr_resource_mgmt_2); 99401e04c3fSmrg radeon_emit(cs, a->sq_gpr_resource_mgmt_3); 99501e04c3fSmrg } 99601e04c3fSmrg radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8)); 99701e04c3fSmrg if (a->dyn_gpr_enabled) { 99801e04c3fSmrg radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 99901e04c3fSmrg S_028838_PS_GPRS(0x1e) | 100001e04c3fSmrg S_028838_VS_GPRS(0x1e) | 100101e04c3fSmrg S_028838_GS_GPRS(0x1e) | 100201e04c3fSmrg S_028838_ES_GPRS(0x1e) | 100301e04c3fSmrg S_028838_HS_GPRS(0x1e) | 100401e04c3fSmrg S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ 100501e04c3fSmrg } 100601e04c3fSmrg} 100701e04c3fSmrg 1008af69d88dSmrgstatic void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) 10093464ebd5Sriastradh{ 10107ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1011af69d88dSmrg struct pipe_clip_state *state = &rctx->clip_state.state; 10123464ebd5Sriastradh 101301e04c3fSmrg radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); 1014af69d88dSmrg radeon_emit_array(cs, (unsigned*)state, 6*4); 10153464ebd5Sriastradh} 10163464ebd5Sriastradh 10173464ebd5Sriastradhstatic void evergreen_set_polygon_stipple(struct pipe_context *ctx, 10183464ebd5Sriastradh const struct pipe_poly_stipple *state) 10193464ebd5Sriastradh{ 10203464ebd5Sriastradh} 10213464ebd5Sriastradh 1022af69d88dSmrgstatic void evergreen_get_scissor_rect(struct r600_context *rctx, 1023af69d88dSmrg unsigned tl_x, unsigned tl_y, unsigned br_x, unsigned br_y, 1024af69d88dSmrg uint32_t *tl, uint32_t *br) 10253464ebd5Sriastradh{ 102601e04c3fSmrg struct pipe_scissor_state scissor = {tl_x, tl_y, br_x, br_y}; 10273464ebd5Sriastradh 102801e04c3fSmrg evergreen_apply_scissor_bug_workaround(&rctx->b, &scissor); 10293464ebd5Sriastradh 103001e04c3fSmrg *tl = S_028240_TL_X(scissor.minx) | S_028240_TL_Y(scissor.miny); 103101e04c3fSmrg *br = S_028244_BR_X(scissor.maxx) | S_028244_BR_Y(scissor.maxy); 1032af69d88dSmrg} 10333464ebd5Sriastradh 103401e04c3fSmrgstruct r600_tex_color_info { 103501e04c3fSmrg unsigned info; 103601e04c3fSmrg unsigned view; 103701e04c3fSmrg unsigned dim; 103801e04c3fSmrg unsigned pitch; 103901e04c3fSmrg unsigned slice; 104001e04c3fSmrg unsigned attrib; 104101e04c3fSmrg unsigned ntype; 104201e04c3fSmrg unsigned fmask; 104301e04c3fSmrg unsigned fmask_slice; 104401e04c3fSmrg uint64_t offset; 104501e04c3fSmrg boolean export_16bpc; 104601e04c3fSmrg}; 10473464ebd5Sriastradh 104801e04c3fSmrgstatic void evergreen_set_color_surface_buffer(struct r600_context *rctx, 104901e04c3fSmrg struct r600_resource *res, 105001e04c3fSmrg enum pipe_format pformat, 105101e04c3fSmrg unsigned first_element, 105201e04c3fSmrg unsigned last_element, 105301e04c3fSmrg struct r600_tex_color_info *color) 10543464ebd5Sriastradh{ 105501e04c3fSmrg unsigned format, swap, ntype, endian; 105601e04c3fSmrg const struct util_format_description *desc; 105701e04c3fSmrg unsigned block_size = util_format_get_blocksize(res->b.b.format); 1058af69d88dSmrg unsigned pitch_alignment = 105901e04c3fSmrg MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size); 106001e04c3fSmrg unsigned pitch = align(res->b.b.width0, pitch_alignment); 106101e04c3fSmrg int i; 106201e04c3fSmrg unsigned width_elements; 10633464ebd5Sriastradh 106401e04c3fSmrg width_elements = last_element - first_element + 1; 10653464ebd5Sriastradh 106601e04c3fSmrg format = r600_translate_colorformat(rctx->b.chip_class, pformat, FALSE); 106701e04c3fSmrg swap = r600_translate_colorswap(pformat, FALSE); 1068af69d88dSmrg 106901e04c3fSmrg endian = r600_colorformat_endian_swap(format, FALSE); 1070af69d88dSmrg 107101e04c3fSmrg desc = util_format_description(pformat); 107201e04c3fSmrg for (i = 0; i < 4; i++) { 107301e04c3fSmrg if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 107401e04c3fSmrg break; 107501e04c3fSmrg } 107601e04c3fSmrg } 107701e04c3fSmrg ntype = V_028C70_NUMBER_UNORM; 107801e04c3fSmrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 107901e04c3fSmrg ntype = V_028C70_NUMBER_SRGB; 108001e04c3fSmrg else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 108101e04c3fSmrg if (desc->channel[i].normalized) 108201e04c3fSmrg ntype = V_028C70_NUMBER_SNORM; 108301e04c3fSmrg else if (desc->channel[i].pure_integer) 108401e04c3fSmrg ntype = V_028C70_NUMBER_SINT; 108501e04c3fSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 108601e04c3fSmrg if (desc->channel[i].normalized) 108701e04c3fSmrg ntype = V_028C70_NUMBER_UNORM; 108801e04c3fSmrg else if (desc->channel[i].pure_integer) 108901e04c3fSmrg ntype = V_028C70_NUMBER_UINT; 109001e04c3fSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 109101e04c3fSmrg ntype = V_028C70_NUMBER_FLOAT; 109201e04c3fSmrg } 1093af69d88dSmrg 109401e04c3fSmrg pitch = (pitch / 8) - 1; 109501e04c3fSmrg color->pitch = S_028C64_PITCH_TILE_MAX(pitch); 109601e04c3fSmrg 109701e04c3fSmrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED); 109801e04c3fSmrg color->info |= S_028C70_FORMAT(format) | 109901e04c3fSmrg S_028C70_COMP_SWAP(swap) | 110001e04c3fSmrg S_028C70_BLEND_CLAMP(0) | 110101e04c3fSmrg S_028C70_BLEND_BYPASS(1) | 110201e04c3fSmrg S_028C70_NUMBER_TYPE(ntype) | 110301e04c3fSmrg S_028C70_ENDIAN(endian); 110401e04c3fSmrg color->attrib = S_028C74_NON_DISP_TILING_ORDER(1); 110501e04c3fSmrg color->ntype = ntype; 110601e04c3fSmrg color->export_16bpc = false; 110701e04c3fSmrg color->dim = width_elements - 1; 110801e04c3fSmrg color->slice = 0; /* (width_elements / 64) - 1;*/ 110901e04c3fSmrg color->view = 0; 111001e04c3fSmrg color->offset = (res->gpu_address + first_element) >> 8; 111101e04c3fSmrg 111201e04c3fSmrg color->fmask = color->offset; 111301e04c3fSmrg color->fmask_slice = 0; 11143464ebd5Sriastradh} 11153464ebd5Sriastradh 111601e04c3fSmrgstatic void evergreen_set_color_surface_common(struct r600_context *rctx, 111701e04c3fSmrg struct r600_texture *rtex, 111801e04c3fSmrg unsigned level, 111901e04c3fSmrg unsigned first_layer, 112001e04c3fSmrg unsigned last_layer, 112101e04c3fSmrg enum pipe_format pformat, 112201e04c3fSmrg struct r600_tex_color_info *color) 11233464ebd5Sriastradh{ 1124af69d88dSmrg struct r600_screen *rscreen = rctx->screen; 11253464ebd5Sriastradh unsigned pitch, slice; 1126af69d88dSmrg unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks; 112701e04c3fSmrg unsigned format, swap, ntype, endian; 11283464ebd5Sriastradh const struct util_format_description *desc; 112901e04c3fSmrg bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE; 11303464ebd5Sriastradh int i; 113101e04c3fSmrg 11327ec681f3Smrg color->offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; 113301e04c3fSmrg color->view = S_028C6C_SLICE_START(first_layer) | 113401e04c3fSmrg S_028C6C_SLICE_MAX(last_layer); 113501e04c3fSmrg 113601e04c3fSmrg color->offset += rtex->resource.gpu_address; 113701e04c3fSmrg color->offset >>= 8; 113801e04c3fSmrg 113901e04c3fSmrg color->dim = 0; 114001e04c3fSmrg pitch = (rtex->surface.u.legacy.level[level].nblk_x) / 8 - 1; 114101e04c3fSmrg slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64; 1142af69d88dSmrg if (slice) { 1143af69d88dSmrg slice = slice - 1; 11443464ebd5Sriastradh } 114501e04c3fSmrg 114601e04c3fSmrg color->info = 0; 114701e04c3fSmrg switch (rtex->surface.u.legacy.level[level].mode) { 114801e04c3fSmrg default: 1149af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 115001e04c3fSmrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED); 1151af69d88dSmrg non_disp_tiling = 1; 1152af69d88dSmrg break; 1153af69d88dSmrg case RADEON_SURF_MODE_1D: 115401e04c3fSmrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1); 1155af69d88dSmrg non_disp_tiling = rtex->non_disp_tiling; 1156af69d88dSmrg break; 1157af69d88dSmrg case RADEON_SURF_MODE_2D: 115801e04c3fSmrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1); 1159af69d88dSmrg non_disp_tiling = rtex->non_disp_tiling; 1160af69d88dSmrg break; 1161af69d88dSmrg } 116201e04c3fSmrg tile_split = rtex->surface.u.legacy.tile_split; 116301e04c3fSmrg macro_aspect = rtex->surface.u.legacy.mtilea; 116401e04c3fSmrg bankw = rtex->surface.u.legacy.bankw; 116501e04c3fSmrg bankh = rtex->surface.u.legacy.bankh; 116601e04c3fSmrg if (rtex->fmask.size) 116701e04c3fSmrg fmask_bankh = rtex->fmask.bank_height; 116801e04c3fSmrg else 116901e04c3fSmrg fmask_bankh = rtex->surface.u.legacy.bankh; 1170af69d88dSmrg tile_split = eg_tile_split(tile_split); 1171af69d88dSmrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 1172af69d88dSmrg bankw = eg_bank_wh(bankw); 1173af69d88dSmrg bankh = eg_bank_wh(bankh); 1174af69d88dSmrg fmask_bankh = eg_bank_wh(fmask_bankh); 1175af69d88dSmrg 1176af69d88dSmrg if (rscreen->b.chip_class == CAYMAN) { 117701e04c3fSmrg if (util_format_get_blocksize(pformat) >= 16) 1178af69d88dSmrg non_disp_tiling = 1; 1179af69d88dSmrg } 118001e04c3fSmrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 118101e04c3fSmrg desc = util_format_description(pformat); 11823464ebd5Sriastradh for (i = 0; i < 4; i++) { 11833464ebd5Sriastradh if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 11843464ebd5Sriastradh break; 11853464ebd5Sriastradh } 11863464ebd5Sriastradh } 118701e04c3fSmrg color->attrib = S_028C74_TILE_SPLIT(tile_split)| 118801e04c3fSmrg S_028C74_NUM_BANKS(nbanks) | 118901e04c3fSmrg S_028C74_BANK_WIDTH(bankw) | 119001e04c3fSmrg S_028C74_BANK_HEIGHT(bankh) | 119101e04c3fSmrg S_028C74_MACRO_TILE_ASPECT(macro_aspect) | 119201e04c3fSmrg S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) | 119301e04c3fSmrg S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1194af69d88dSmrg 1195af69d88dSmrg if (rctx->b.chip_class == CAYMAN) { 119601e04c3fSmrg color->attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == 119701e04c3fSmrg PIPE_SWIZZLE_1); 1198af69d88dSmrg 1199af69d88dSmrg if (rtex->resource.b.b.nr_samples > 1) { 1200af69d88dSmrg unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 120101e04c3fSmrg color->attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1202af69d88dSmrg S_028C74_NUM_FRAGMENTS(log_samples); 1203af69d88dSmrg } 1204af69d88dSmrg } 1205af69d88dSmrg 12063464ebd5Sriastradh ntype = V_028C70_NUMBER_UNORM; 12073464ebd5Sriastradh if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 12083464ebd5Sriastradh ntype = V_028C70_NUMBER_SRGB; 1209af69d88dSmrg else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1210af69d88dSmrg if (desc->channel[i].normalized) 1211af69d88dSmrg ntype = V_028C70_NUMBER_SNORM; 1212af69d88dSmrg else if (desc->channel[i].pure_integer) 1213af69d88dSmrg ntype = V_028C70_NUMBER_SINT; 1214af69d88dSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1215af69d88dSmrg if (desc->channel[i].normalized) 1216af69d88dSmrg ntype = V_028C70_NUMBER_UNORM; 1217af69d88dSmrg else if (desc->channel[i].pure_integer) 1218af69d88dSmrg ntype = V_028C70_NUMBER_UINT; 121901e04c3fSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 122001e04c3fSmrg ntype = V_028C70_NUMBER_FLOAT; 1221af69d88dSmrg } 1222af69d88dSmrg 122301e04c3fSmrg if (R600_BIG_ENDIAN) 122401e04c3fSmrg do_endian_swap = !rtex->db_compatible; 12253464ebd5Sriastradh 122601e04c3fSmrg format = r600_translate_colorformat(rctx->b.chip_class, pformat, do_endian_swap); 122701e04c3fSmrg assert(format != ~0); 122801e04c3fSmrg swap = r600_translate_colorswap(pformat, do_endian_swap); 1229af69d88dSmrg assert(swap != ~0); 1230af69d88dSmrg 123101e04c3fSmrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 12323464ebd5Sriastradh 1233af69d88dSmrg /* blend clamp should be set for all NORM/SRGB types */ 1234af69d88dSmrg if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 1235af69d88dSmrg ntype == V_028C70_NUMBER_SRGB) 1236af69d88dSmrg blend_clamp = 1; 1237af69d88dSmrg 1238af69d88dSmrg /* set blend bypass according to docs if SINT/UINT or 1239af69d88dSmrg 8/24 COLOR variants */ 1240af69d88dSmrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1241af69d88dSmrg format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1242af69d88dSmrg format == V_028C70_COLOR_X24_8_32_FLOAT) { 1243af69d88dSmrg blend_clamp = 0; 1244af69d88dSmrg blend_bypass = 1; 1245af69d88dSmrg } 1246af69d88dSmrg 124701e04c3fSmrg color->ntype = ntype; 124801e04c3fSmrg color->info |= S_028C70_FORMAT(format) | 12493464ebd5Sriastradh S_028C70_COMP_SWAP(swap) | 1250af69d88dSmrg S_028C70_BLEND_CLAMP(blend_clamp) | 1251af69d88dSmrg S_028C70_BLEND_BYPASS(blend_bypass) | 125201e04c3fSmrg S_028C70_SIMPLE_FLOAT(1) | 12533464ebd5Sriastradh S_028C70_NUMBER_TYPE(ntype) | 12543464ebd5Sriastradh S_028C70_ENDIAN(endian); 12553464ebd5Sriastradh 125601e04c3fSmrg if (rtex->fmask.size) { 125701e04c3fSmrg color->info |= S_028C70_COMPRESSION(1); 125801e04c3fSmrg } 125901e04c3fSmrg 12607ec681f3Smrg /* EXPORT_NORM is an optimization that can be enabled for better 12613464ebd5Sriastradh * performance in certain cases. 12623464ebd5Sriastradh * EXPORT_NORM can be enabled if: 12633464ebd5Sriastradh * - 11-bit or smaller UNORM/SNORM/SRGB 12643464ebd5Sriastradh * - 16-bit or smaller FLOAT 12653464ebd5Sriastradh */ 126601e04c3fSmrg color->export_16bpc = false; 12673464ebd5Sriastradh if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && 12683464ebd5Sriastradh ((desc->channel[i].size < 12 && 12693464ebd5Sriastradh desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && 12703464ebd5Sriastradh ntype != V_028C70_NUMBER_UINT && ntype != V_028C70_NUMBER_SINT) || 12713464ebd5Sriastradh (desc->channel[i].size < 17 && 12723464ebd5Sriastradh desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) { 127301e04c3fSmrg color->info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); 127401e04c3fSmrg color->export_16bpc = true; 1275af69d88dSmrg } 1276af69d88dSmrg 127701e04c3fSmrg color->pitch = S_028C64_PITCH_TILE_MAX(pitch); 127801e04c3fSmrg color->slice = S_028C68_SLICE_TILE_MAX(slice); 127901e04c3fSmrg 1280af69d88dSmrg if (rtex->fmask.size) { 128101e04c3fSmrg color->fmask = (rtex->resource.gpu_address + rtex->fmask.offset) >> 8; 128201e04c3fSmrg color->fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 128301e04c3fSmrg } else { 128401e04c3fSmrg color->fmask = color->offset; 128501e04c3fSmrg color->fmask_slice = S_028C88_TILE_MAX(slice); 1286af69d88dSmrg } 128701e04c3fSmrg} 128801e04c3fSmrg 128901e04c3fSmrg/** 12907ec681f3Smrg * This function initializes the CB* register values for RATs. It is meant 129101e04c3fSmrg * to be used for 1D aligned buffers that do not have an associated 129201e04c3fSmrg * radeon_surf. 129301e04c3fSmrg */ 129401e04c3fSmrgvoid evergreen_init_color_surface_rat(struct r600_context *rctx, 129501e04c3fSmrg struct r600_surface *surf) 129601e04c3fSmrg{ 129701e04c3fSmrg struct pipe_resource *pipe_buffer = surf->base.texture; 129801e04c3fSmrg struct r600_tex_color_info color; 129901e04c3fSmrg 130001e04c3fSmrg evergreen_set_color_surface_buffer(rctx, (struct r600_resource *)surf->base.texture, 130101e04c3fSmrg surf->base.format, 0, pipe_buffer->width0, 130201e04c3fSmrg &color); 130301e04c3fSmrg 130401e04c3fSmrg surf->cb_color_base = color.offset; 130501e04c3fSmrg surf->cb_color_dim = color.dim; 130601e04c3fSmrg surf->cb_color_info = color.info | S_028C70_RAT(1); 130701e04c3fSmrg surf->cb_color_pitch = color.pitch; 130801e04c3fSmrg surf->cb_color_slice = color.slice; 130901e04c3fSmrg surf->cb_color_view = color.view; 131001e04c3fSmrg surf->cb_color_attrib = color.attrib; 131101e04c3fSmrg surf->cb_color_fmask = color.fmask; 131201e04c3fSmrg surf->cb_color_fmask_slice = color.fmask_slice; 131301e04c3fSmrg 131401e04c3fSmrg surf->cb_color_view = 0; 1315af69d88dSmrg 131601e04c3fSmrg /* Set the buffer range the GPU will have access to: */ 13177ec681f3Smrg util_range_add(pipe_buffer, &r600_resource(pipe_buffer)->valid_buffer_range, 131801e04c3fSmrg 0, pipe_buffer->width0); 131901e04c3fSmrg} 132001e04c3fSmrg 132101e04c3fSmrg 132201e04c3fSmrgvoid evergreen_init_color_surface(struct r600_context *rctx, 132301e04c3fSmrg struct r600_surface *surf) 132401e04c3fSmrg{ 132501e04c3fSmrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 132601e04c3fSmrg unsigned level = surf->base.u.tex.level; 132701e04c3fSmrg struct r600_tex_color_info color; 132801e04c3fSmrg 132901e04c3fSmrg evergreen_set_color_surface_common(rctx, rtex, level, 133001e04c3fSmrg surf->base.u.tex.first_layer, 133101e04c3fSmrg surf->base.u.tex.last_layer, 133201e04c3fSmrg surf->base.format, 133301e04c3fSmrg &color); 133401e04c3fSmrg 133501e04c3fSmrg surf->alphatest_bypass = color.ntype == V_028C70_NUMBER_UINT || 133601e04c3fSmrg color.ntype == V_028C70_NUMBER_SINT; 133701e04c3fSmrg surf->export_16bpc = color.export_16bpc; 1338af69d88dSmrg 1339af69d88dSmrg /* XXX handle enabling of CB beyond BASE8 which has different offset */ 134001e04c3fSmrg surf->cb_color_base = color.offset; 134101e04c3fSmrg surf->cb_color_dim = color.dim; 134201e04c3fSmrg surf->cb_color_info = color.info; 134301e04c3fSmrg surf->cb_color_pitch = color.pitch; 134401e04c3fSmrg surf->cb_color_slice = color.slice; 134501e04c3fSmrg surf->cb_color_view = color.view; 134601e04c3fSmrg surf->cb_color_attrib = color.attrib; 134701e04c3fSmrg surf->cb_color_fmask = color.fmask; 134801e04c3fSmrg surf->cb_color_fmask_slice = color.fmask_slice; 1349af69d88dSmrg 1350af69d88dSmrg surf->color_initialized = true; 1351af69d88dSmrg} 1352af69d88dSmrg 1353af69d88dSmrgstatic void evergreen_init_depth_surface(struct r600_context *rctx, 1354af69d88dSmrg struct r600_surface *surf) 1355af69d88dSmrg{ 1356af69d88dSmrg struct r600_screen *rscreen = rctx->screen; 1357af69d88dSmrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 135801e04c3fSmrg unsigned level = surf->base.u.tex.level; 135901e04c3fSmrg struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level]; 1360af69d88dSmrg uint64_t offset; 136101e04c3fSmrg unsigned format, array_mode; 1362af69d88dSmrg unsigned macro_aspect, tile_split, bankh, bankw, nbanks; 1363af69d88dSmrg 136401e04c3fSmrg 1365af69d88dSmrg format = r600_translate_dbformat(surf->base.format); 1366af69d88dSmrg assert(format != ~0); 1367af69d88dSmrg 1368af69d88dSmrg offset = rtex->resource.gpu_address; 13697ec681f3Smrg offset += (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; 137001e04c3fSmrg 137101e04c3fSmrg switch (rtex->surface.u.legacy.level[level].mode) { 1372af69d88dSmrg case RADEON_SURF_MODE_2D: 1373af69d88dSmrg array_mode = V_028C70_ARRAY_2D_TILED_THIN1; 1374af69d88dSmrg break; 1375af69d88dSmrg case RADEON_SURF_MODE_1D: 1376af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1377af69d88dSmrg default: 1378af69d88dSmrg array_mode = V_028C70_ARRAY_1D_TILED_THIN1; 1379af69d88dSmrg break; 1380af69d88dSmrg } 138101e04c3fSmrg tile_split = rtex->surface.u.legacy.tile_split; 138201e04c3fSmrg macro_aspect = rtex->surface.u.legacy.mtilea; 138301e04c3fSmrg bankw = rtex->surface.u.legacy.bankw; 138401e04c3fSmrg bankh = rtex->surface.u.legacy.bankh; 1385af69d88dSmrg tile_split = eg_tile_split(tile_split); 1386af69d88dSmrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 1387af69d88dSmrg bankw = eg_bank_wh(bankw); 1388af69d88dSmrg bankh = eg_bank_wh(bankh); 138901e04c3fSmrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 1390af69d88dSmrg offset >>= 8; 1391af69d88dSmrg 1392af69d88dSmrg surf->db_z_info = S_028040_ARRAY_MODE(array_mode) | 1393af69d88dSmrg S_028040_FORMAT(format) | 1394af69d88dSmrg S_028040_TILE_SPLIT(tile_split)| 1395af69d88dSmrg S_028040_NUM_BANKS(nbanks) | 1396af69d88dSmrg S_028040_BANK_WIDTH(bankw) | 1397af69d88dSmrg S_028040_BANK_HEIGHT(bankh) | 1398af69d88dSmrg S_028040_MACRO_TILE_ASPECT(macro_aspect); 1399af69d88dSmrg if (rscreen->b.chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) { 1400af69d88dSmrg surf->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1401af69d88dSmrg } 140201e04c3fSmrg 140301e04c3fSmrg assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 140401e04c3fSmrg 1405af69d88dSmrg surf->db_depth_base = offset; 1406af69d88dSmrg surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1407af69d88dSmrg S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 140801e04c3fSmrg surf->db_depth_size = S_028058_PITCH_TILE_MAX(levelinfo->nblk_x / 8 - 1) | 140901e04c3fSmrg S_028058_HEIGHT_TILE_MAX(levelinfo->nblk_y / 8 - 1); 141001e04c3fSmrg surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(levelinfo->nblk_x * 141101e04c3fSmrg levelinfo->nblk_y / 64 - 1); 1412af69d88dSmrg 141301e04c3fSmrg if (rtex->surface.has_stencil) { 1414af69d88dSmrg uint64_t stencil_offset; 141501e04c3fSmrg unsigned stile_split = rtex->surface.u.legacy.stencil_tile_split; 1416af69d88dSmrg 1417af69d88dSmrg stile_split = eg_tile_split(stile_split); 1418af69d88dSmrg 14197ec681f3Smrg stencil_offset = (uint64_t)rtex->surface.u.legacy.zs.stencil_level[level].offset_256B * 256; 1420af69d88dSmrg stencil_offset += rtex->resource.gpu_address; 1421af69d88dSmrg 1422af69d88dSmrg surf->db_stencil_base = stencil_offset >> 8; 1423af69d88dSmrg surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) | 1424af69d88dSmrg S_028044_TILE_SPLIT(stile_split); 1425af69d88dSmrg } else { 1426af69d88dSmrg surf->db_stencil_base = offset; 1427af69d88dSmrg /* DRM 2.6.18 allows the INVALID format to disable stencil. 1428af69d88dSmrg * Older kernels are out of luck. */ 1429af69d88dSmrg surf->db_stencil_info = rctx->screen->b.info.drm_minor >= 18 ? 1430af69d88dSmrg S_028044_FORMAT(V_028044_STENCIL_INVALID) : 1431af69d88dSmrg S_028044_FORMAT(V_028044_STENCIL_8); 1432af69d88dSmrg } 1433af69d88dSmrg 143401e04c3fSmrg if (r600_htile_enabled(rtex, level)) { 143501e04c3fSmrg uint64_t va = rtex->resource.gpu_address + rtex->htile_offset; 1436af69d88dSmrg surf->db_htile_data_base = va >> 8; 1437af69d88dSmrg surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) | 143801e04c3fSmrg S_028ABC_HTILE_HEIGHT(1) | 143901e04c3fSmrg S_028ABC_FULL_CACHE(1); 1440af69d88dSmrg surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); 1441af69d88dSmrg surf->db_preload_control = 0; 1442af69d88dSmrg } 1443af69d88dSmrg 1444af69d88dSmrg surf->depth_initialized = true; 1445af69d88dSmrg} 1446af69d88dSmrg 1447af69d88dSmrgstatic void evergreen_set_framebuffer_state(struct pipe_context *ctx, 1448af69d88dSmrg const struct pipe_framebuffer_state *state) 1449af69d88dSmrg{ 1450af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 14513464ebd5Sriastradh struct r600_surface *surf; 1452af69d88dSmrg struct r600_texture *rtex; 1453af69d88dSmrg uint32_t i, log_samples; 145401e04c3fSmrg uint32_t target_mask = 0; 145501e04c3fSmrg /* Flush TC when changing the framebuffer state, because the only 145601e04c3fSmrg * client not using TC that can change textures is the framebuffer. 145701e04c3fSmrg * Other places don't typically have to flush TC. 145801e04c3fSmrg */ 145901e04c3fSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | 146001e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV | 146101e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_CB | 146201e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_CB_META | 146301e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_DB | 146401e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_DB_META | 146501e04c3fSmrg R600_CONTEXT_INV_TEX_CACHE; 14663464ebd5Sriastradh 1467af69d88dSmrg util_copy_framebuffer_state(&rctx->framebuffer.state, state); 14683464ebd5Sriastradh 1469af69d88dSmrg /* Colorbuffers. */ 1470af69d88dSmrg rctx->framebuffer.export_16bpc = state->nr_cbufs != 0; 1471af69d88dSmrg rctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 1472af69d88dSmrg util_format_is_pure_integer(state->cbufs[0]->format); 1473af69d88dSmrg rctx->framebuffer.compressed_cb_mask = 0; 1474af69d88dSmrg rctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 14753464ebd5Sriastradh 1476af69d88dSmrg for (i = 0; i < state->nr_cbufs; i++) { 1477af69d88dSmrg surf = (struct r600_surface*)state->cbufs[i]; 1478af69d88dSmrg if (!surf) 1479af69d88dSmrg continue; 14803464ebd5Sriastradh 148101e04c3fSmrg target_mask |= (0xf << (i * 4)); 148201e04c3fSmrg 1483af69d88dSmrg rtex = (struct r600_texture*)surf->base.texture; 14843464ebd5Sriastradh 1485af69d88dSmrg r600_context_add_resource_size(ctx, state->cbufs[i]->texture); 14863464ebd5Sriastradh 1487af69d88dSmrg if (!surf->color_initialized) { 1488af69d88dSmrg evergreen_init_color_surface(rctx, surf); 1489af69d88dSmrg } 14903464ebd5Sriastradh 1491af69d88dSmrg if (!surf->export_16bpc) { 1492af69d88dSmrg rctx->framebuffer.export_16bpc = false; 1493af69d88dSmrg } 14943464ebd5Sriastradh 149501e04c3fSmrg if (rtex->fmask.size) { 1496af69d88dSmrg rctx->framebuffer.compressed_cb_mask |= 1 << i; 1497af69d88dSmrg } 14983464ebd5Sriastradh } 14993464ebd5Sriastradh 1500af69d88dSmrg /* Update alpha-test state dependencies. 1501af69d88dSmrg * Alpha-test is done on the first colorbuffer only. */ 1502af69d88dSmrg if (state->nr_cbufs) { 1503af69d88dSmrg bool alphatest_bypass = false; 1504af69d88dSmrg bool export_16bpc = true; 1505af69d88dSmrg 1506af69d88dSmrg surf = (struct r600_surface*)state->cbufs[0]; 1507af69d88dSmrg if (surf) { 1508af69d88dSmrg alphatest_bypass = surf->alphatest_bypass; 1509af69d88dSmrg export_16bpc = surf->export_16bpc; 1510af69d88dSmrg } 1511af69d88dSmrg 1512af69d88dSmrg if (rctx->alphatest_state.bypass != alphatest_bypass) { 1513af69d88dSmrg rctx->alphatest_state.bypass = alphatest_bypass; 151401e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1515af69d88dSmrg } 1516af69d88dSmrg if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) { 1517af69d88dSmrg rctx->alphatest_state.cb0_export_16bpc = export_16bpc; 151801e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1519af69d88dSmrg } 1520af69d88dSmrg } 1521af69d88dSmrg 1522af69d88dSmrg /* ZS buffer. */ 1523af69d88dSmrg if (state->zsbuf) { 1524af69d88dSmrg surf = (struct r600_surface*)state->zsbuf; 15253464ebd5Sriastradh 1526af69d88dSmrg r600_context_add_resource_size(ctx, state->zsbuf->texture); 1527af69d88dSmrg 1528af69d88dSmrg if (!surf->depth_initialized) { 1529af69d88dSmrg evergreen_init_depth_surface(rctx, surf); 1530af69d88dSmrg } 1531af69d88dSmrg 1532af69d88dSmrg if (state->zsbuf->format != rctx->poly_offset_state.zs_format) { 1533af69d88dSmrg rctx->poly_offset_state.zs_format = state->zsbuf->format; 153401e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); 1535af69d88dSmrg } 1536af69d88dSmrg 1537af69d88dSmrg if (rctx->db_state.rsurf != surf) { 1538af69d88dSmrg rctx->db_state.rsurf = surf; 153901e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 154001e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1541af69d88dSmrg } 1542af69d88dSmrg } else if (rctx->db_state.rsurf) { 1543af69d88dSmrg rctx->db_state.rsurf = NULL; 154401e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 154501e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1546af69d88dSmrg } 1547af69d88dSmrg 154801e04c3fSmrg if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs || 154901e04c3fSmrg rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) { 155001e04c3fSmrg rctx->cb_misc_state.bound_cbufs_target_mask = target_mask; 1551af69d88dSmrg rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; 155201e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 1553af69d88dSmrg } 1554af69d88dSmrg 1555af69d88dSmrg if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) { 1556af69d88dSmrg rctx->alphatest_state.bypass = false; 155701e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1558af69d88dSmrg } 1559af69d88dSmrg 1560af69d88dSmrg log_samples = util_logbase2(rctx->framebuffer.nr_samples); 1561af69d88dSmrg /* This is for Cayman to program SAMPLE_RATE, and for RV770 to fix a hw bug. */ 1562af69d88dSmrg if ((rctx->b.chip_class == CAYMAN || 1563af69d88dSmrg rctx->b.family == CHIP_RV770) && 1564af69d88dSmrg rctx->db_misc_state.log_samples != log_samples) { 1565af69d88dSmrg rctx->db_misc_state.log_samples = log_samples; 156601e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1567af69d88dSmrg } 1568af69d88dSmrg 1569af69d88dSmrg 1570af69d88dSmrg /* Calculate the CS size. */ 1571af69d88dSmrg rctx->framebuffer.atom.num_dw = 4; /* SCISSOR */ 1572af69d88dSmrg 1573af69d88dSmrg /* MSAA. */ 1574af69d88dSmrg if (rctx->b.chip_class == EVERGREEN) 157501e04c3fSmrg rctx->framebuffer.atom.num_dw += 17; /* Evergreen */ 1576af69d88dSmrg else 1577af69d88dSmrg rctx->framebuffer.atom.num_dw += 28; /* Cayman */ 1578af69d88dSmrg 1579af69d88dSmrg /* Colorbuffers. */ 1580af69d88dSmrg rctx->framebuffer.atom.num_dw += state->nr_cbufs * 23; 158101e04c3fSmrg rctx->framebuffer.atom.num_dw += state->nr_cbufs * 2; 1582af69d88dSmrg rctx->framebuffer.atom.num_dw += (12 - state->nr_cbufs) * 3; 1583af69d88dSmrg 1584af69d88dSmrg /* ZS buffer. */ 1585af69d88dSmrg if (state->zsbuf) { 1586af69d88dSmrg rctx->framebuffer.atom.num_dw += 24; 158701e04c3fSmrg rctx->framebuffer.atom.num_dw += 2; 1588af69d88dSmrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1589af69d88dSmrg rctx->framebuffer.atom.num_dw += 4; 1590af69d88dSmrg } 1591af69d88dSmrg 159201e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 159301e04c3fSmrg 159401e04c3fSmrg r600_set_sample_locations_constant_buffer(rctx); 159501e04c3fSmrg rctx->framebuffer.do_update_surf_dirtiness = true; 15963464ebd5Sriastradh} 15973464ebd5Sriastradh 159801e04c3fSmrgstatic void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 159901e04c3fSmrg{ 160001e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 1601af69d88dSmrg 160201e04c3fSmrg if (rctx->ps_iter_samples == min_samples) 160301e04c3fSmrg return; 160401e04c3fSmrg 160501e04c3fSmrg rctx->ps_iter_samples = min_samples; 160601e04c3fSmrg if (rctx->framebuffer.nr_samples > 1) { 160701e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 160801e04c3fSmrg } 160901e04c3fSmrg} 161001e04c3fSmrg 161101e04c3fSmrg/* 8xMSAA */ 161201e04c3fSmrgstatic const uint32_t sample_locs_8x[] = { 161301e04c3fSmrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1614af69d88dSmrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1615af69d88dSmrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1616af69d88dSmrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1617af69d88dSmrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1618af69d88dSmrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1619af69d88dSmrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1620af69d88dSmrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1621af69d88dSmrg}; 1622af69d88dSmrgstatic unsigned max_dist_8x = 7; 1623af69d88dSmrg 1624af69d88dSmrgstatic void evergreen_get_sample_position(struct pipe_context *ctx, 1625af69d88dSmrg unsigned sample_count, 1626af69d88dSmrg unsigned sample_index, 1627af69d88dSmrg float *out_value) 16283464ebd5Sriastradh{ 1629af69d88dSmrg int offset, index; 1630af69d88dSmrg struct { 1631af69d88dSmrg int idx:4; 1632af69d88dSmrg } val; 1633af69d88dSmrg switch (sample_count) { 1634af69d88dSmrg case 1: 1635af69d88dSmrg default: 1636af69d88dSmrg out_value[0] = out_value[1] = 0.5; 1637af69d88dSmrg break; 1638af69d88dSmrg case 2: 1639af69d88dSmrg offset = 4 * (sample_index * 2); 1640af69d88dSmrg val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; 1641af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1642af69d88dSmrg val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; 1643af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1644af69d88dSmrg break; 1645af69d88dSmrg case 4: 1646af69d88dSmrg offset = 4 * (sample_index * 2); 1647af69d88dSmrg val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; 1648af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1649af69d88dSmrg val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; 1650af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1651af69d88dSmrg break; 1652af69d88dSmrg case 8: 1653af69d88dSmrg offset = 4 * (sample_index % 4 * 2); 1654af69d88dSmrg index = (sample_index / 4); 1655af69d88dSmrg val.idx = (sample_locs_8x[index] >> offset) & 0xf; 1656af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1657af69d88dSmrg val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf; 1658af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1659af69d88dSmrg break; 1660af69d88dSmrg } 1661af69d88dSmrg} 16623464ebd5Sriastradh 166301e04c3fSmrgstatic void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) 1664af69d88dSmrg{ 1665af69d88dSmrg 16667ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1667af69d88dSmrg unsigned max_dist = 0; 1668af69d88dSmrg 1669af69d88dSmrg switch (nr_samples) { 1670af69d88dSmrg default: 1671af69d88dSmrg nr_samples = 0; 1672af69d88dSmrg break; 1673af69d88dSmrg case 2: 167401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_2x)); 167501e04c3fSmrg radeon_emit_array(cs, eg_sample_locs_2x, ARRAY_SIZE(eg_sample_locs_2x)); 1676af69d88dSmrg max_dist = eg_max_dist_2x; 1677af69d88dSmrg break; 1678af69d88dSmrg case 4: 167901e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_4x)); 168001e04c3fSmrg radeon_emit_array(cs, eg_sample_locs_4x, ARRAY_SIZE(eg_sample_locs_4x)); 1681af69d88dSmrg max_dist = eg_max_dist_4x; 1682af69d88dSmrg break; 1683af69d88dSmrg case 8: 168401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(sample_locs_8x)); 168501e04c3fSmrg radeon_emit_array(cs, sample_locs_8x, ARRAY_SIZE(sample_locs_8x)); 1686af69d88dSmrg max_dist = max_dist_8x; 1687af69d88dSmrg break; 1688af69d88dSmrg } 1689af69d88dSmrg 1690af69d88dSmrg if (nr_samples > 1) { 169101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1692af69d88dSmrg radeon_emit(cs, S_028C00_LAST_PIXEL(1) | 1693af69d88dSmrg S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1694af69d88dSmrg radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) | 1695af69d88dSmrg S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */ 169601e04c3fSmrg radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 169701e04c3fSmrg EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | 169801e04c3fSmrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 169901e04c3fSmrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1)); 1700af69d88dSmrg } else { 170101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1702af69d88dSmrg radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1703af69d88dSmrg radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */ 170401e04c3fSmrg radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 170501e04c3fSmrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 170601e04c3fSmrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1)); 170701e04c3fSmrg } 170801e04c3fSmrg} 170901e04c3fSmrg 171001e04c3fSmrgstatic void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom, 171101e04c3fSmrg int immed_id_base, int res_id_base, int offset, uint32_t pkt_flags) 171201e04c3fSmrg{ 171301e04c3fSmrg struct r600_image_state *state = (struct r600_image_state *)atom; 171401e04c3fSmrg struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; 17157ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 171601e04c3fSmrg struct r600_texture *rtex; 171701e04c3fSmrg struct r600_resource *resource; 171801e04c3fSmrg int i; 171901e04c3fSmrg 172001e04c3fSmrg for (i = 0; i < R600_MAX_IMAGES; i++) { 172101e04c3fSmrg struct r600_image_view *image = &state->views[i]; 172201e04c3fSmrg unsigned reloc, immed_reloc; 172301e04c3fSmrg int idx = i + offset; 172401e04c3fSmrg 172501e04c3fSmrg if (!pkt_flags) 172601e04c3fSmrg idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0); 172701e04c3fSmrg if (!image->base.resource) 172801e04c3fSmrg continue; 172901e04c3fSmrg 173001e04c3fSmrg resource = (struct r600_resource *)image->base.resource; 173101e04c3fSmrg if (resource->b.b.target != PIPE_BUFFER) 173201e04c3fSmrg rtex = (struct r600_texture *)image->base.resource; 173301e04c3fSmrg else 173401e04c3fSmrg rtex = NULL; 173501e04c3fSmrg 173601e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, 173701e04c3fSmrg &rctx->b.gfx, 173801e04c3fSmrg resource, 173901e04c3fSmrg RADEON_USAGE_READWRITE, 174001e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 174101e04c3fSmrg 174201e04c3fSmrg immed_reloc = radeon_add_to_buffer_list(&rctx->b, 174301e04c3fSmrg &rctx->b.gfx, 174401e04c3fSmrg resource->immed_buffer, 174501e04c3fSmrg RADEON_USAGE_READWRITE, 174601e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 174701e04c3fSmrg 174801e04c3fSmrg if (pkt_flags) 174901e04c3fSmrg radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); 175001e04c3fSmrg else 175101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); 175201e04c3fSmrg 175301e04c3fSmrg radeon_emit(cs, image->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 175401e04c3fSmrg radeon_emit(cs, image->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 175501e04c3fSmrg radeon_emit(cs, image->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 175601e04c3fSmrg radeon_emit(cs, image->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 175701e04c3fSmrg radeon_emit(cs, image->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 175801e04c3fSmrg radeon_emit(cs, image->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 175901e04c3fSmrg radeon_emit(cs, image->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ 176001e04c3fSmrg radeon_emit(cs, rtex ? rtex->cmask.base_address_reg : image->cb_color_base); /* R_028C7C_CB_COLOR0_CMASK */ 176101e04c3fSmrg radeon_emit(cs, rtex ? rtex->cmask.slice_tile_max : 0); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 176201e04c3fSmrg radeon_emit(cs, image->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 176301e04c3fSmrg radeon_emit(cs, image->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 176401e04c3fSmrg radeon_emit(cs, rtex ? rtex->color_clear_value[0] : 0); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 176501e04c3fSmrg radeon_emit(cs, rtex ? rtex->color_clear_value[1] : 0); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 176601e04c3fSmrg 176701e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ 176801e04c3fSmrg radeon_emit(cs, reloc); 176901e04c3fSmrg 177001e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ 177101e04c3fSmrg radeon_emit(cs, reloc); 177201e04c3fSmrg 177301e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */ 177401e04c3fSmrg radeon_emit(cs, reloc); 177501e04c3fSmrg 177601e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */ 177701e04c3fSmrg radeon_emit(cs, reloc); 177801e04c3fSmrg 177901e04c3fSmrg if (pkt_flags) 178001e04c3fSmrg radeon_compute_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); 178101e04c3fSmrg else 178201e04c3fSmrg radeon_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); 178301e04c3fSmrg 178401e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /**/ 178501e04c3fSmrg radeon_emit(cs, immed_reloc); 178601e04c3fSmrg 178701e04c3fSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 178801e04c3fSmrg radeon_emit(cs, (immed_id_base + i + offset) * 8); 178901e04c3fSmrg radeon_emit_array(cs, image->immed_resource_words, 8); 179001e04c3fSmrg 179101e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 179201e04c3fSmrg radeon_emit(cs, immed_reloc); 179301e04c3fSmrg 179401e04c3fSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 179501e04c3fSmrg radeon_emit(cs, (res_id_base + i + offset) * 8); 179601e04c3fSmrg radeon_emit_array(cs, image->resource_words, 8); 179701e04c3fSmrg 179801e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 179901e04c3fSmrg radeon_emit(cs, reloc); 180001e04c3fSmrg 180101e04c3fSmrg if (!image->skip_mip_address_reloc) { 180201e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 180301e04c3fSmrg radeon_emit(cs, reloc); 180401e04c3fSmrg } 1805af69d88dSmrg } 1806af69d88dSmrg} 18073464ebd5Sriastradh 180801e04c3fSmrgstatic void evergreen_emit_fragment_image_state(struct r600_context *rctx, struct r600_atom *atom) 180901e04c3fSmrg{ 181001e04c3fSmrg evergreen_emit_image_state(rctx, atom, 181101e04c3fSmrg R600_IMAGE_IMMED_RESOURCE_OFFSET, 181201e04c3fSmrg R600_IMAGE_REAL_RESOURCE_OFFSET, 0, 0); 181301e04c3fSmrg} 181401e04c3fSmrg 181501e04c3fSmrgstatic void evergreen_emit_compute_image_state(struct r600_context *rctx, struct r600_atom *atom) 181601e04c3fSmrg{ 181701e04c3fSmrg evergreen_emit_image_state(rctx, atom, 181801e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, 181901e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, 182001e04c3fSmrg 0, RADEON_CP_PACKET3_COMPUTE_MODE); 182101e04c3fSmrg} 182201e04c3fSmrg 182301e04c3fSmrgstatic void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom) 182401e04c3fSmrg{ 182501e04c3fSmrg int offset = util_bitcount(rctx->fragment_images.enabled_mask); 182601e04c3fSmrg evergreen_emit_image_state(rctx, atom, 182701e04c3fSmrg R600_IMAGE_IMMED_RESOURCE_OFFSET, 182801e04c3fSmrg R600_IMAGE_REAL_RESOURCE_OFFSET, offset, 0); 182901e04c3fSmrg} 183001e04c3fSmrg 183101e04c3fSmrgstatic void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struct r600_atom *atom) 183201e04c3fSmrg{ 183301e04c3fSmrg int offset = util_bitcount(rctx->compute_images.enabled_mask); 183401e04c3fSmrg evergreen_emit_image_state(rctx, atom, 183501e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, 183601e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, 183701e04c3fSmrg offset, RADEON_CP_PACKET3_COMPUTE_MODE); 183801e04c3fSmrg} 183901e04c3fSmrg 1840af69d88dSmrgstatic void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) 1841af69d88dSmrg{ 18427ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1843af69d88dSmrg struct pipe_framebuffer_state *state = &rctx->framebuffer.state; 1844af69d88dSmrg unsigned nr_cbufs = state->nr_cbufs; 1845af69d88dSmrg unsigned i, tl, br; 1846af69d88dSmrg struct r600_texture *tex = NULL; 1847af69d88dSmrg struct r600_surface *cb = NULL; 1848af69d88dSmrg 1849af69d88dSmrg /* XXX support more colorbuffers once we need them */ 1850af69d88dSmrg assert(nr_cbufs <= 8); 1851af69d88dSmrg if (nr_cbufs > 8) 1852af69d88dSmrg nr_cbufs = 8; 1853af69d88dSmrg 1854af69d88dSmrg /* Colorbuffers. */ 1855af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1856af69d88dSmrg unsigned reloc, cmask_reloc; 1857af69d88dSmrg 1858af69d88dSmrg cb = (struct r600_surface*)state->cbufs[i]; 1859af69d88dSmrg if (!cb) { 186001e04c3fSmrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1861af69d88dSmrg S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1862af69d88dSmrg continue; 1863af69d88dSmrg } 1864af69d88dSmrg 1865af69d88dSmrg tex = (struct r600_texture *)cb->base.texture; 186601e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, 186701e04c3fSmrg &rctx->b.gfx, 1868af69d88dSmrg (struct r600_resource*)cb->base.texture, 1869af69d88dSmrg RADEON_USAGE_READWRITE, 187001e04c3fSmrg tex->resource.b.b.nr_samples > 1 ? 1871af69d88dSmrg RADEON_PRIO_COLOR_BUFFER_MSAA : 1872af69d88dSmrg RADEON_PRIO_COLOR_BUFFER); 1873af69d88dSmrg 1874af69d88dSmrg if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 187501e04c3fSmrg cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 1876af69d88dSmrg tex->cmask_buffer, RADEON_USAGE_READWRITE, 187701e04c3fSmrg RADEON_PRIO_SEPARATE_META); 1878af69d88dSmrg } else { 1879af69d88dSmrg cmask_reloc = reloc; 1880af69d88dSmrg } 18813464ebd5Sriastradh 188201e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 1883af69d88dSmrg radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 1884af69d88dSmrg radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 1885af69d88dSmrg radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 1886af69d88dSmrg radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 1887af69d88dSmrg radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 1888af69d88dSmrg radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 1889af69d88dSmrg radeon_emit(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ 1890af69d88dSmrg radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 1891af69d88dSmrg radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 1892af69d88dSmrg radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 1893af69d88dSmrg radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 1894af69d88dSmrg radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 1895af69d88dSmrg radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 1896af69d88dSmrg 1897af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ 1898af69d88dSmrg radeon_emit(cs, reloc); 1899af69d88dSmrg 1900af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ 1901af69d88dSmrg radeon_emit(cs, reloc); 19023464ebd5Sriastradh 1903af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */ 1904af69d88dSmrg radeon_emit(cs, cmask_reloc); 19053464ebd5Sriastradh 1906af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */ 1907af69d88dSmrg radeon_emit(cs, reloc); 19083464ebd5Sriastradh } 1909af69d88dSmrg /* set CB_COLOR1_INFO for possible dual-src blending */ 191001e04c3fSmrg if (rctx->framebuffer.dual_src_blend && i == 1 && state->cbufs[0]) { 191101e04c3fSmrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 1912af69d88dSmrg cb->cb_color_info | tex->cb_color_info); 1913af69d88dSmrg i++; 1914af69d88dSmrg } 191501e04c3fSmrg i += util_bitcount(rctx->fragment_images.enabled_mask); 191601e04c3fSmrg i += util_bitcount(rctx->fragment_buffers.enabled_mask); 191701e04c3fSmrg for (; i < 8 ; i++) 191801e04c3fSmrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 191901e04c3fSmrg for (; i < 12; i++) 192001e04c3fSmrg radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0); 1921af69d88dSmrg 1922af69d88dSmrg /* ZS buffer. */ 19233464ebd5Sriastradh if (state->zsbuf) { 1924af69d88dSmrg struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 192501e04c3fSmrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, 192601e04c3fSmrg &rctx->b.gfx, 1927af69d88dSmrg (struct r600_resource*)state->zsbuf->texture, 1928af69d88dSmrg RADEON_USAGE_READWRITE, 1929af69d88dSmrg zb->base.texture->nr_samples > 1 ? 1930af69d88dSmrg RADEON_PRIO_DEPTH_BUFFER_MSAA : 1931af69d88dSmrg RADEON_PRIO_DEPTH_BUFFER); 1932af69d88dSmrg 193301e04c3fSmrg radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 1934af69d88dSmrg 193501e04c3fSmrg radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8); 1936af69d88dSmrg radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 1937af69d88dSmrg radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 1938af69d88dSmrg radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 1939af69d88dSmrg radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 1940af69d88dSmrg radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 1941af69d88dSmrg radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 1942af69d88dSmrg radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 1943af69d88dSmrg radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 1944af69d88dSmrg 1945af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028048_DB_Z_READ_BASE */ 1946af69d88dSmrg radeon_emit(cs, reloc); 1947af69d88dSmrg 1948af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_02804C_DB_STENCIL_READ_BASE */ 1949af69d88dSmrg radeon_emit(cs, reloc); 1950af69d88dSmrg 1951af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028050_DB_Z_WRITE_BASE */ 1952af69d88dSmrg radeon_emit(cs, reloc); 1953af69d88dSmrg 1954af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028054_DB_STENCIL_WRITE_BASE */ 1955af69d88dSmrg radeon_emit(cs, reloc); 1956af69d88dSmrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1957af69d88dSmrg /* DRM 2.6.18 allows the INVALID format to disable depth/stencil. 1958af69d88dSmrg * Older kernels are out of luck. */ 195901e04c3fSmrg radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 1960af69d88dSmrg radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 1961af69d88dSmrg radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 19623464ebd5Sriastradh } 19633464ebd5Sriastradh 1964af69d88dSmrg /* Framebuffer dimensions. */ 1965af69d88dSmrg evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br); 1966af69d88dSmrg 196701e04c3fSmrg radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2); 1968af69d88dSmrg radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */ 1969af69d88dSmrg radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */ 1970af69d88dSmrg 1971af69d88dSmrg if (rctx->b.chip_class == EVERGREEN) { 197201e04c3fSmrg evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); 1973af69d88dSmrg } else { 197401e04c3fSmrg cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples, 197501e04c3fSmrg rctx->ps_iter_samples, 0); 19763464ebd5Sriastradh } 1977af69d88dSmrg} 1978af69d88dSmrg 1979af69d88dSmrgstatic void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) 1980af69d88dSmrg{ 19817ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1982af69d88dSmrg struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; 1983af69d88dSmrg float offset_units = state->offset_units; 1984af69d88dSmrg float offset_scale = state->offset_scale; 198501e04c3fSmrg uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 1986af69d88dSmrg 198701e04c3fSmrg if (!state->offset_units_unscaled) { 198801e04c3fSmrg switch (state->zs_format) { 198901e04c3fSmrg case PIPE_FORMAT_Z24X8_UNORM: 199001e04c3fSmrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 199101e04c3fSmrg case PIPE_FORMAT_X8Z24_UNORM: 199201e04c3fSmrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 199301e04c3fSmrg offset_units *= 2.0f; 199401e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 199501e04c3fSmrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24); 199601e04c3fSmrg break; 199701e04c3fSmrg case PIPE_FORMAT_Z16_UNORM: 199801e04c3fSmrg offset_units *= 4.0f; 199901e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 200001e04c3fSmrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16); 200101e04c3fSmrg break; 200201e04c3fSmrg default: 200301e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 200401e04c3fSmrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) | 200501e04c3fSmrg S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 200601e04c3fSmrg } 20073464ebd5Sriastradh } 2008af69d88dSmrg 200901e04c3fSmrg radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4); 2010af69d88dSmrg radeon_emit(cs, fui(offset_scale)); 2011af69d88dSmrg radeon_emit(cs, fui(offset_units)); 2012af69d88dSmrg radeon_emit(cs, fui(offset_scale)); 2013af69d88dSmrg radeon_emit(cs, fui(offset_units)); 201401e04c3fSmrg 201501e04c3fSmrg radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 201601e04c3fSmrg pa_su_poly_offset_db_fmt_cntl); 201701e04c3fSmrg} 201801e04c3fSmrg 201901e04c3fSmrguint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a, 202001e04c3fSmrg unsigned nr_cbufs) 202101e04c3fSmrg{ 202201e04c3fSmrg unsigned base_mask = 0; 202301e04c3fSmrg unsigned dirty_mask = a->image_rat_enabled_mask; 202401e04c3fSmrg while (dirty_mask) { 202501e04c3fSmrg unsigned idx = u_bit_scan(&dirty_mask); 202601e04c3fSmrg base_mask |= (0xf << (idx * 4)); 202701e04c3fSmrg } 202801e04c3fSmrg unsigned offset = util_last_bit(a->image_rat_enabled_mask); 202901e04c3fSmrg dirty_mask = a->buffer_rat_enabled_mask; 203001e04c3fSmrg while (dirty_mask) { 203101e04c3fSmrg unsigned idx = u_bit_scan(&dirty_mask); 203201e04c3fSmrg base_mask |= (0xf << (idx + offset) * 4); 203301e04c3fSmrg } 203401e04c3fSmrg return base_mask << (nr_cbufs * 4); 2035af69d88dSmrg} 2036af69d88dSmrg 2037af69d88dSmrgstatic void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) 2038af69d88dSmrg{ 20397ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2040af69d88dSmrg struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; 204101e04c3fSmrg unsigned fb_colormask = a->bound_cbufs_target_mask; 204201e04c3fSmrg unsigned ps_colormask = a->ps_color_export_mask; 204301e04c3fSmrg unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, a->nr_cbufs); 204401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); 204501e04c3fSmrg radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */ 204601e04c3fSmrg /* This must match the used export instructions exactly. 204701e04c3fSmrg * Other values may lead to undefined behavior and hangs. 204801e04c3fSmrg */ 204901e04c3fSmrg radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */ 2050af69d88dSmrg} 2051af69d88dSmrg 2052af69d88dSmrgstatic void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) 2053af69d88dSmrg{ 20547ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2055af69d88dSmrg struct r600_db_state *a = (struct r600_db_state*)atom; 2056af69d88dSmrg 2057af69d88dSmrg if (a->rsurf && a->rsurf->db_htile_surface) { 2058af69d88dSmrg struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture; 2059af69d88dSmrg unsigned reloc_idx; 2060af69d88dSmrg 206101e04c3fSmrg radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 206201e04c3fSmrg radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); 206301e04c3fSmrg radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); 206401e04c3fSmrg radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); 206501e04c3fSmrg reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource, 206601e04c3fSmrg RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); 206701e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 206801e04c3fSmrg radeon_emit(cs, reloc_idx); 20693464ebd5Sriastradh } else { 207001e04c3fSmrg radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0); 207101e04c3fSmrg radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0); 20723464ebd5Sriastradh } 2073af69d88dSmrg} 20743464ebd5Sriastradh 2075af69d88dSmrgstatic void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) 2076af69d88dSmrg{ 20777ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2078af69d88dSmrg struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; 2079af69d88dSmrg unsigned db_render_control = 0; 2080af69d88dSmrg unsigned db_count_control = 0; 2081af69d88dSmrg unsigned db_render_override = 2082af69d88dSmrg S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 2083af69d88dSmrg S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); 20843464ebd5Sriastradh 208501e04c3fSmrg if (rctx->b.num_occlusion_queries > 0 && 208601e04c3fSmrg !a->occlusion_queries_disabled) { 2087af69d88dSmrg db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); 2088af69d88dSmrg if (rctx->b.chip_class == CAYMAN) { 2089af69d88dSmrg db_count_control |= S_028004_SAMPLE_RATE(a->log_samples); 2090af69d88dSmrg } 2091af69d88dSmrg db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); 2092af69d88dSmrg } else { 209301e04c3fSmrg db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1); 2094af69d88dSmrg } 209501e04c3fSmrg 209601e04c3fSmrg /* This is to fix a lockup when hyperz and alpha test are enabled at 209701e04c3fSmrg * the same time somehow GPU get confuse on which order to pick for 209801e04c3fSmrg * z test 209901e04c3fSmrg */ 210001e04c3fSmrg if (rctx->alphatest_state.sx_alpha_test_control) 210101e04c3fSmrg db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1); 210201e04c3fSmrg 2103af69d88dSmrg if (a->flush_depthstencil_through_cb) { 2104af69d88dSmrg assert(a->copy_depth || a->copy_stencil); 2105af69d88dSmrg 2106af69d88dSmrg db_render_control |= S_028000_DEPTH_COPY_ENABLE(a->copy_depth) | 2107af69d88dSmrg S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) | 2108af69d88dSmrg S_028000_COPY_CENTROID(1) | 2109af69d88dSmrg S_028000_COPY_SAMPLE(a->copy_sample); 211001e04c3fSmrg } else if (a->flush_depth_inplace || a->flush_stencil_inplace) { 211101e04c3fSmrg db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) | 211201e04c3fSmrg S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace); 2113af69d88dSmrg db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1); 2114af69d88dSmrg } 2115af69d88dSmrg if (a->htile_clear) { 2116af69d88dSmrg /* FIXME we might want to disable cliprect here */ 2117af69d88dSmrg db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1); 2118af69d88dSmrg } 21193464ebd5Sriastradh 212001e04c3fSmrg radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 2121af69d88dSmrg radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */ 2122af69d88dSmrg radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */ 212301e04c3fSmrg radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); 212401e04c3fSmrg radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control); 2125af69d88dSmrg} 2126af69d88dSmrg 2127af69d88dSmrgstatic void evergreen_emit_vertex_buffers(struct r600_context *rctx, 2128af69d88dSmrg struct r600_vertexbuf_state *state, 2129af69d88dSmrg unsigned resource_offset, 2130af69d88dSmrg unsigned pkt_flags) 2131af69d88dSmrg{ 21327ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2133af69d88dSmrg uint32_t dirty_mask = state->dirty_mask; 2134af69d88dSmrg 2135af69d88dSmrg while (dirty_mask) { 2136af69d88dSmrg struct pipe_vertex_buffer *vb; 2137af69d88dSmrg struct r600_resource *rbuffer; 2138af69d88dSmrg uint64_t va; 2139af69d88dSmrg unsigned buffer_index = u_bit_scan(&dirty_mask); 2140af69d88dSmrg 2141af69d88dSmrg vb = &state->vb[buffer_index]; 214201e04c3fSmrg rbuffer = (struct r600_resource*)vb->buffer.resource; 2143af69d88dSmrg assert(rbuffer); 2144af69d88dSmrg 2145af69d88dSmrg va = rbuffer->gpu_address + vb->buffer_offset; 2146af69d88dSmrg 2147af69d88dSmrg /* fetch resources start at index 992 */ 2148af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2149af69d88dSmrg radeon_emit(cs, (resource_offset + buffer_index) * 8); 2150af69d88dSmrg radeon_emit(cs, va); /* RESOURCEi_WORD0 */ 215101e04c3fSmrg radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */ 2152af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 2153af69d88dSmrg S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | 2154af69d88dSmrg S_030008_STRIDE(vb->stride) | 2155af69d88dSmrg S_030008_BASE_ADDRESS_HI(va >> 32UL)); 2156af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD3 */ 2157af69d88dSmrg S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | 2158af69d88dSmrg S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | 2159af69d88dSmrg S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | 2160af69d88dSmrg S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); 2161af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 2162af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 2163af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ 2164af69d88dSmrg radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ 2165af69d88dSmrg 2166af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 216701e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 216801e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); 2169af69d88dSmrg } 2170af69d88dSmrg state->dirty_mask = 0; 2171af69d88dSmrg} 2172af69d88dSmrg 2173af69d88dSmrgstatic void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) 2174af69d88dSmrg{ 217501e04c3fSmrg evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0); 2176af69d88dSmrg} 2177af69d88dSmrg 2178af69d88dSmrgstatic void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) 2179af69d88dSmrg{ 218001e04c3fSmrg evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS, 2181af69d88dSmrg RADEON_CP_PACKET3_COMPUTE_MODE); 2182af69d88dSmrg} 2183af69d88dSmrg 2184af69d88dSmrgstatic void evergreen_emit_constant_buffers(struct r600_context *rctx, 2185af69d88dSmrg struct r600_constbuf_state *state, 2186af69d88dSmrg unsigned buffer_id_base, 2187af69d88dSmrg unsigned reg_alu_constbuf_size, 2188af69d88dSmrg unsigned reg_alu_const_cache, 2189af69d88dSmrg unsigned pkt_flags) 2190af69d88dSmrg{ 21917ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2192af69d88dSmrg uint32_t dirty_mask = state->dirty_mask; 2193af69d88dSmrg 2194af69d88dSmrg while (dirty_mask) { 2195af69d88dSmrg struct pipe_constant_buffer *cb; 2196af69d88dSmrg struct r600_resource *rbuffer; 2197af69d88dSmrg uint64_t va; 2198af69d88dSmrg unsigned buffer_index = ffs(dirty_mask) - 1; 2199af69d88dSmrg unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER); 2200af69d88dSmrg 2201af69d88dSmrg cb = &state->cb[buffer_index]; 2202af69d88dSmrg rbuffer = (struct r600_resource*)cb->buffer; 2203af69d88dSmrg assert(rbuffer); 2204af69d88dSmrg 2205af69d88dSmrg va = rbuffer->gpu_address + cb->buffer_offset; 2206af69d88dSmrg 220701e04c3fSmrg if (buffer_index < R600_MAX_HW_CONST_BUFFERS) { 220801e04c3fSmrg radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, 220901e04c3fSmrg DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); 221001e04c3fSmrg radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, 2211af69d88dSmrg pkt_flags); 221201e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 221301e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 221401e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 2215af69d88dSmrg } 2216af69d88dSmrg 2217af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2218af69d88dSmrg radeon_emit(cs, (buffer_id_base + buffer_index) * 8); 2219af69d88dSmrg radeon_emit(cs, va); /* RESOURCEi_WORD0 */ 222001e04c3fSmrg radeon_emit(cs, cb->buffer_size -1); /* RESOURCEi_WORD1 */ 2221af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 2222af69d88dSmrg S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | 2223af69d88dSmrg S_030008_STRIDE(gs_ring_buffer ? 4 : 16) | 2224af69d88dSmrg S_030008_BASE_ADDRESS_HI(va >> 32UL) | 2225af69d88dSmrg S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT)); 2226af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD3 */ 2227af69d88dSmrg S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) | 2228af69d88dSmrg S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | 2229af69d88dSmrg S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | 2230af69d88dSmrg S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | 2231af69d88dSmrg S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); 2232af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 2233af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 2234af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ 2235af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD7 */ 2236af69d88dSmrg S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); 2237af69d88dSmrg 2238af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 223901e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 224001e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 2241af69d88dSmrg 2242af69d88dSmrg dirty_mask &= ~(1 << buffer_index); 2243af69d88dSmrg } 2244af69d88dSmrg state->dirty_mask = 0; 2245af69d88dSmrg} 2246af69d88dSmrg 224701e04c3fSmrg/* VS constants can be in VS/ES (same space) or LS if tess is enabled */ 2248af69d88dSmrgstatic void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2249af69d88dSmrg{ 225001e04c3fSmrg if (rctx->vs_shader->current->shader.vs_as_ls) { 225101e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 225201e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_LS, 225301e04c3fSmrg R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 225401e04c3fSmrg R_028F40_ALU_CONST_CACHE_LS_0, 225501e04c3fSmrg 0 /* PKT3 flags */); 225601e04c3fSmrg } else { 225701e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 225801e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_VS, 225901e04c3fSmrg R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 226001e04c3fSmrg R_028980_ALU_CONST_CACHE_VS_0, 226101e04c3fSmrg 0 /* PKT3 flags */); 226201e04c3fSmrg } 2263af69d88dSmrg} 2264af69d88dSmrg 2265af69d88dSmrgstatic void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2266af69d88dSmrg{ 226701e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 226801e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_GS, 2269af69d88dSmrg R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 2270af69d88dSmrg R_0289C0_ALU_CONST_CACHE_GS_0, 2271af69d88dSmrg 0 /* PKT3 flags */); 2272af69d88dSmrg} 2273af69d88dSmrg 2274af69d88dSmrgstatic void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2275af69d88dSmrg{ 227601e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 227701e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_PS, 227801e04c3fSmrg R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 227901e04c3fSmrg R_028940_ALU_CONST_CACHE_PS_0, 228001e04c3fSmrg 0 /* PKT3 flags */); 2281af69d88dSmrg} 2282af69d88dSmrg 2283af69d88dSmrgstatic void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2284af69d88dSmrg{ 228501e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 228601e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS, 2287af69d88dSmrg R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 2288af69d88dSmrg R_028F40_ALU_CONST_CACHE_LS_0, 2289af69d88dSmrg RADEON_CP_PACKET3_COMPUTE_MODE); 2290af69d88dSmrg} 2291af69d88dSmrg 229201e04c3fSmrg/* tes constants can be emitted to VS or ES - which are common */ 229301e04c3fSmrgstatic void evergreen_emit_tes_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 229401e04c3fSmrg{ 229501e04c3fSmrg if (!rctx->tes_shader) 229601e04c3fSmrg return; 229701e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL], 229801e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_VS, 229901e04c3fSmrg R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 230001e04c3fSmrg R_028980_ALU_CONST_CACHE_VS_0, 230101e04c3fSmrg 0); 230201e04c3fSmrg} 230301e04c3fSmrg 230401e04c3fSmrgstatic void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 230501e04c3fSmrg{ 230601e04c3fSmrg if (!rctx->tes_shader) 230701e04c3fSmrg return; 230801e04c3fSmrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL], 230901e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_HS, 231001e04c3fSmrg R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 231101e04c3fSmrg R_028F00_ALU_CONST_CACHE_HS_0, 231201e04c3fSmrg 0); 231301e04c3fSmrg} 231401e04c3fSmrg 231501e04c3fSmrgvoid evergreen_setup_scratch_buffers(struct r600_context *rctx) { 231601e04c3fSmrg static const struct { 231701e04c3fSmrg unsigned ring_base; 231801e04c3fSmrg unsigned item_size; 231901e04c3fSmrg unsigned ring_size; 232001e04c3fSmrg } regs[EG_NUM_HW_STAGES] = { 232101e04c3fSmrg [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, 232201e04c3fSmrg [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, 232301e04c3fSmrg [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, 232401e04c3fSmrg [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }, 232501e04c3fSmrg [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE }, 232601e04c3fSmrg [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE } 232701e04c3fSmrg }; 232801e04c3fSmrg 232901e04c3fSmrg for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) { 233001e04c3fSmrg struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; 233101e04c3fSmrg 233201e04c3fSmrg if (stage && unlikely(stage->scratch_space_needed)) { 233301e04c3fSmrg r600_setup_scratch_area_for_shader(rctx, stage, 233401e04c3fSmrg &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); 233501e04c3fSmrg } 233601e04c3fSmrg } 233701e04c3fSmrg} 233801e04c3fSmrg 2339af69d88dSmrgstatic void evergreen_emit_sampler_views(struct r600_context *rctx, 2340af69d88dSmrg struct r600_samplerview_state *state, 234101e04c3fSmrg unsigned resource_id_base, unsigned pkt_flags) 2342af69d88dSmrg{ 23437ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2344af69d88dSmrg uint32_t dirty_mask = state->dirty_mask; 2345af69d88dSmrg 2346af69d88dSmrg while (dirty_mask) { 2347af69d88dSmrg struct r600_pipe_sampler_view *rview; 2348af69d88dSmrg unsigned resource_index = u_bit_scan(&dirty_mask); 2349af69d88dSmrg unsigned reloc; 2350af69d88dSmrg 2351af69d88dSmrg rview = state->views[resource_index]; 2352af69d88dSmrg assert(rview); 2353af69d88dSmrg 235401e04c3fSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2355af69d88dSmrg radeon_emit(cs, (resource_id_base + resource_index) * 8); 2356af69d88dSmrg radeon_emit_array(cs, rview->tex_resource_words, 8); 2357af69d88dSmrg 235801e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, 2359af69d88dSmrg RADEON_USAGE_READ, 236001e04c3fSmrg r600_get_sampler_view_priority(rview->tex_resource)); 236101e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2362af69d88dSmrg radeon_emit(cs, reloc); 2363af69d88dSmrg 2364af69d88dSmrg if (!rview->skip_mip_address_reloc) { 236501e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2366af69d88dSmrg radeon_emit(cs, reloc); 2367af69d88dSmrg } 2368af69d88dSmrg } 2369af69d88dSmrg state->dirty_mask = 0; 2370af69d88dSmrg} 2371af69d88dSmrg 2372af69d88dSmrgstatic void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2373af69d88dSmrg{ 237401e04c3fSmrg if (rctx->vs_shader->current->shader.vs_as_ls) { 237501e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 237601e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_LS + R600_MAX_CONST_BUFFERS, 0); 237701e04c3fSmrg } else { 237801e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 237901e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0); 238001e04c3fSmrg } 2381af69d88dSmrg} 2382af69d88dSmrg 2383af69d88dSmrgstatic void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2384af69d88dSmrg{ 238501e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 238601e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0); 238701e04c3fSmrg} 238801e04c3fSmrg 238901e04c3fSmrgstatic void evergreen_emit_tcs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 239001e04c3fSmrg{ 239101e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views, 239201e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_HS + R600_MAX_CONST_BUFFERS, 0); 239301e04c3fSmrg} 239401e04c3fSmrg 239501e04c3fSmrgstatic void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 239601e04c3fSmrg{ 239701e04c3fSmrg if (!rctx->tes_shader) 239801e04c3fSmrg return; 239901e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views, 240001e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0); 2401af69d88dSmrg} 2402af69d88dSmrg 2403af69d88dSmrgstatic void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2404af69d88dSmrg{ 240501e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, 240601e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0); 240701e04c3fSmrg} 240801e04c3fSmrg 240901e04c3fSmrgstatic void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 241001e04c3fSmrg{ 241101e04c3fSmrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views, 241201e04c3fSmrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_MAX_CONST_BUFFERS, RADEON_CP_PACKET3_COMPUTE_MODE); 241301e04c3fSmrg} 241401e04c3fSmrg 241501e04c3fSmrgstatic void evergreen_convert_border_color(union pipe_color_union *in, 241601e04c3fSmrg union pipe_color_union *out, 241701e04c3fSmrg enum pipe_format format) 241801e04c3fSmrg{ 241901e04c3fSmrg if (util_format_is_pure_integer(format) && 242001e04c3fSmrg !util_format_is_depth_or_stencil(format)) { 242101e04c3fSmrg const struct util_format_description *d = util_format_description(format); 242201e04c3fSmrg 242301e04c3fSmrg for (int i = 0; i < d->nr_channels; ++i) { 242401e04c3fSmrg int cs = d->channel[i].size; 242501e04c3fSmrg if (d->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) 242601e04c3fSmrg out->f[i] = (double)(in->i[i]) / ((1ul << (cs - 1)) - 1 ); 242701e04c3fSmrg else if (d->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) 242801e04c3fSmrg out->f[i] = (double)(in->ui[i]) / ((1ul << cs) - 1 ); 242901e04c3fSmrg else 243001e04c3fSmrg out->f[i] = 0; 243101e04c3fSmrg } 243201e04c3fSmrg 243301e04c3fSmrg } else { 243401e04c3fSmrg switch (format) { 243501e04c3fSmrg case PIPE_FORMAT_X24S8_UINT: 243601e04c3fSmrg case PIPE_FORMAT_X32_S8X24_UINT: 243701e04c3fSmrg out->f[0] = (double)(in->ui[0]) / 255.0; 243801e04c3fSmrg out->f[1] = out->f[2] = out->f[3] = 0.0f; 243901e04c3fSmrg break; 244001e04c3fSmrg default: 244101e04c3fSmrg memcpy(out->f, in->f, 4 * sizeof(float)); 244201e04c3fSmrg } 244301e04c3fSmrg } 2444af69d88dSmrg} 2445af69d88dSmrg 2446af69d88dSmrgstatic void evergreen_emit_sampler_states(struct r600_context *rctx, 2447af69d88dSmrg struct r600_textures_info *texinfo, 2448af69d88dSmrg unsigned resource_id_base, 244901e04c3fSmrg unsigned border_index_reg, 245001e04c3fSmrg unsigned pkt_flags) 2451af69d88dSmrg{ 24527ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2453af69d88dSmrg uint32_t dirty_mask = texinfo->states.dirty_mask; 245401e04c3fSmrg union pipe_color_union border_color = {{0,0,0,1}}; 245501e04c3fSmrg union pipe_color_union *border_color_ptr = &border_color; 2456af69d88dSmrg 2457af69d88dSmrg while (dirty_mask) { 2458af69d88dSmrg struct r600_pipe_sampler_state *rstate; 2459af69d88dSmrg unsigned i = u_bit_scan(&dirty_mask); 2460af69d88dSmrg 2461af69d88dSmrg rstate = texinfo->states.states[i]; 2462af69d88dSmrg assert(rstate); 2463af69d88dSmrg 246401e04c3fSmrg if (rstate->border_color_use) { 246501e04c3fSmrg struct r600_pipe_sampler_view *rview = texinfo->views.views[i]; 246601e04c3fSmrg if (rview) { 246701e04c3fSmrg evergreen_convert_border_color(&rstate->border_color, 246801e04c3fSmrg &border_color, rview->base.format); 246901e04c3fSmrg } else { 247001e04c3fSmrg border_color_ptr = &rstate->border_color; 247101e04c3fSmrg } 247201e04c3fSmrg } 247301e04c3fSmrg 247401e04c3fSmrg radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags); 2475af69d88dSmrg radeon_emit(cs, (resource_id_base + i) * 3); 2476af69d88dSmrg radeon_emit_array(cs, rstate->tex_sampler_words, 3); 2477af69d88dSmrg 2478af69d88dSmrg if (rstate->border_color_use) { 247901e04c3fSmrg radeon_set_config_reg_seq(cs, border_index_reg, 5); 2480af69d88dSmrg radeon_emit(cs, i); 248101e04c3fSmrg radeon_emit_array(cs, border_color_ptr->ui, 4); 2482af69d88dSmrg } 2483af69d88dSmrg } 2484af69d88dSmrg texinfo->states.dirty_mask = 0; 2485af69d88dSmrg} 2486af69d88dSmrg 2487af69d88dSmrgstatic void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2488af69d88dSmrg{ 248901e04c3fSmrg if (rctx->vs_shader->current->shader.vs_as_ls) { 249001e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 72, 249101e04c3fSmrg R_00A450_TD_LS_SAMPLER0_BORDER_COLOR_INDEX, 0); 249201e04c3fSmrg } else { 249301e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, 249401e04c3fSmrg R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); 249501e04c3fSmrg } 2496af69d88dSmrg} 2497af69d88dSmrg 2498af69d88dSmrgstatic void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2499af69d88dSmrg{ 250001e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, 250101e04c3fSmrg R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0); 250201e04c3fSmrg} 250301e04c3fSmrg 250401e04c3fSmrgstatic void evergreen_emit_tcs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 250501e04c3fSmrg{ 250601e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL], 54, 250701e04c3fSmrg R_00A43C_TD_HS_SAMPLER0_BORDER_COLOR_INDEX, 0); 250801e04c3fSmrg} 250901e04c3fSmrg 251001e04c3fSmrgstatic void evergreen_emit_tes_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 251101e04c3fSmrg{ 251201e04c3fSmrg if (!rctx->tes_shader) 251301e04c3fSmrg return; 251401e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL], 18, 251501e04c3fSmrg R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); 2516af69d88dSmrg} 2517af69d88dSmrg 2518af69d88dSmrgstatic void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2519af69d88dSmrg{ 252001e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, 252101e04c3fSmrg R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0); 252201e04c3fSmrg} 252301e04c3fSmrg 252401e04c3fSmrgstatic void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 252501e04c3fSmrg{ 252601e04c3fSmrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90, 252701e04c3fSmrg R_00A464_TD_CS_SAMPLER0_BORDER_INDEX, 252801e04c3fSmrg RADEON_CP_PACKET3_COMPUTE_MODE); 2529af69d88dSmrg} 2530af69d88dSmrg 2531af69d88dSmrgstatic void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) 2532af69d88dSmrg{ 2533af69d88dSmrg struct r600_sample_mask *s = (struct r600_sample_mask*)a; 2534af69d88dSmrg uint8_t mask = s->sample_mask; 2535af69d88dSmrg 25367ec681f3Smrg radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, 2537af69d88dSmrg mask | (mask << 8) | (mask << 16) | (mask << 24)); 2538af69d88dSmrg} 2539af69d88dSmrg 2540af69d88dSmrgstatic void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) 2541af69d88dSmrg{ 2542af69d88dSmrg struct r600_sample_mask *s = (struct r600_sample_mask*)a; 25437ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2544af69d88dSmrg uint16_t mask = s->sample_mask; 2545af69d88dSmrg 254601e04c3fSmrg radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 2547af69d88dSmrg radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */ 2548af69d88dSmrg radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */ 2549af69d88dSmrg} 2550af69d88dSmrg 2551af69d88dSmrgstatic void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) 2552af69d88dSmrg{ 25537ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2554af69d88dSmrg struct r600_cso_state *state = (struct r600_cso_state*)a; 2555af69d88dSmrg struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; 2556af69d88dSmrg 255701e04c3fSmrg if (!shader) 255801e04c3fSmrg return; 255901e04c3fSmrg 256001e04c3fSmrg radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS, 2561af69d88dSmrg (shader->buffer->gpu_address + shader->offset) >> 8); 2562af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 256301e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, 256401e04c3fSmrg RADEON_USAGE_READ, 256501e04c3fSmrg RADEON_PRIO_SHADER_BINARY)); 2566af69d88dSmrg} 2567af69d88dSmrg 2568af69d88dSmrgstatic void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) 2569af69d88dSmrg{ 25707ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2571af69d88dSmrg struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; 2572af69d88dSmrg 257301e04c3fSmrg uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0; 257401e04c3fSmrg 257501e04c3fSmrg if (rctx->vs_shader->current->shader.vs_as_gs_a) { 257601e04c3fSmrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A); 257701e04c3fSmrg primid = 1; 257801e04c3fSmrg } 2579af69d88dSmrg 2580af69d88dSmrg if (state->geom_enable) { 2581af69d88dSmrg uint32_t cut_val; 2582af69d88dSmrg 258301e04c3fSmrg if (rctx->gs_shader->gs_max_out_vertices <= 128) 2584af69d88dSmrg cut_val = V_028A40_GS_CUT_128; 258501e04c3fSmrg else if (rctx->gs_shader->gs_max_out_vertices <= 256) 2586af69d88dSmrg cut_val = V_028A40_GS_CUT_256; 258701e04c3fSmrg else if (rctx->gs_shader->gs_max_out_vertices <= 512) 2588af69d88dSmrg cut_val = V_028A40_GS_CUT_512; 2589af69d88dSmrg else 2590af69d88dSmrg cut_val = V_028A40_GS_CUT_1024; 259101e04c3fSmrg 259201e04c3fSmrg v = S_028B54_GS_EN(1) | 259301e04c3fSmrg S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); 259401e04c3fSmrg if (!rctx->tes_shader) 259501e04c3fSmrg v |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); 2596af69d88dSmrg 2597af69d88dSmrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) | 2598af69d88dSmrg S_028A40_CUT_MODE(cut_val); 2599af69d88dSmrg 2600af69d88dSmrg if (rctx->gs_shader->current->shader.gs_prim_id_input) 2601af69d88dSmrg primid = 1; 2602af69d88dSmrg } 2603af69d88dSmrg 260401e04c3fSmrg if (rctx->tes_shader) { 260501e04c3fSmrg uint32_t type, partitioning, topology; 260601e04c3fSmrg struct tgsi_shader_info *info = &rctx->tes_shader->current->selector->info; 260701e04c3fSmrg unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; 260801e04c3fSmrg unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; 260901e04c3fSmrg bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; 261001e04c3fSmrg bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; 261101e04c3fSmrg switch (tes_prim_mode) { 261201e04c3fSmrg case PIPE_PRIM_LINES: 261301e04c3fSmrg type = V_028B6C_TESS_ISOLINE; 261401e04c3fSmrg break; 261501e04c3fSmrg case PIPE_PRIM_TRIANGLES: 261601e04c3fSmrg type = V_028B6C_TESS_TRIANGLE; 261701e04c3fSmrg break; 261801e04c3fSmrg case PIPE_PRIM_QUADS: 261901e04c3fSmrg type = V_028B6C_TESS_QUAD; 262001e04c3fSmrg break; 262101e04c3fSmrg default: 262201e04c3fSmrg assert(0); 262301e04c3fSmrg return; 262401e04c3fSmrg } 262501e04c3fSmrg 262601e04c3fSmrg switch (tes_spacing) { 262701e04c3fSmrg case PIPE_TESS_SPACING_FRACTIONAL_ODD: 262801e04c3fSmrg partitioning = V_028B6C_PART_FRAC_ODD; 262901e04c3fSmrg break; 263001e04c3fSmrg case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 263101e04c3fSmrg partitioning = V_028B6C_PART_FRAC_EVEN; 263201e04c3fSmrg break; 263301e04c3fSmrg case PIPE_TESS_SPACING_EQUAL: 263401e04c3fSmrg partitioning = V_028B6C_PART_INTEGER; 263501e04c3fSmrg break; 263601e04c3fSmrg default: 263701e04c3fSmrg assert(0); 263801e04c3fSmrg return; 263901e04c3fSmrg } 264001e04c3fSmrg 264101e04c3fSmrg if (tes_point_mode) 264201e04c3fSmrg topology = V_028B6C_OUTPUT_POINT; 264301e04c3fSmrg else if (tes_prim_mode == PIPE_PRIM_LINES) 264401e04c3fSmrg topology = V_028B6C_OUTPUT_LINE; 264501e04c3fSmrg else if (tes_vertex_order_cw) 264601e04c3fSmrg /* XXX follow radeonsi and invert */ 264701e04c3fSmrg topology = V_028B6C_OUTPUT_TRIANGLE_CCW; 264801e04c3fSmrg else 264901e04c3fSmrg topology = V_028B6C_OUTPUT_TRIANGLE_CW; 265001e04c3fSmrg 265101e04c3fSmrg tf_param = S_028B6C_TYPE(type) | 265201e04c3fSmrg S_028B6C_PARTITIONING(partitioning) | 265301e04c3fSmrg S_028B6C_TOPOLOGY(topology); 265401e04c3fSmrg } 265501e04c3fSmrg 265601e04c3fSmrg if (rctx->tes_shader) { 265701e04c3fSmrg v |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | 265801e04c3fSmrg S_028B54_HS_EN(1); 265901e04c3fSmrg if (!state->geom_enable) 266001e04c3fSmrg v |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); 266101e04c3fSmrg else 266201e04c3fSmrg v |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); 266301e04c3fSmrg } 266401e04c3fSmrg 266501e04c3fSmrg radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, v ? 1 : 0 ); 266601e04c3fSmrg radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v); 266701e04c3fSmrg radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2); 266801e04c3fSmrg radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid); 266901e04c3fSmrg radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, tf_param); 2670af69d88dSmrg} 2671af69d88dSmrg 2672af69d88dSmrgstatic void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) 2673af69d88dSmrg{ 26747ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 2675af69d88dSmrg struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; 2676af69d88dSmrg struct r600_resource *rbuffer; 2677af69d88dSmrg 267801e04c3fSmrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 2679af69d88dSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2680af69d88dSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 2681af69d88dSmrg 2682af69d88dSmrg if (state->enable) { 2683af69d88dSmrg rbuffer =(struct r600_resource*)state->esgs_ring.buffer; 268401e04c3fSmrg radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 2685af69d88dSmrg rbuffer->gpu_address >> 8); 2686af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 268701e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2688af69d88dSmrg RADEON_USAGE_READWRITE, 268901e04c3fSmrg RADEON_PRIO_SHADER_RINGS)); 269001e04c3fSmrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 2691af69d88dSmrg state->esgs_ring.buffer_size >> 8); 2692af69d88dSmrg 2693af69d88dSmrg rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; 269401e04c3fSmrg radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 2695af69d88dSmrg rbuffer->gpu_address >> 8); 2696af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 269701e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2698af69d88dSmrg RADEON_USAGE_READWRITE, 269901e04c3fSmrg RADEON_PRIO_SHADER_RINGS)); 270001e04c3fSmrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 2701af69d88dSmrg state->gsvs_ring.buffer_size >> 8); 2702af69d88dSmrg } else { 270301e04c3fSmrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0); 270401e04c3fSmrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0); 2705af69d88dSmrg } 27063464ebd5Sriastradh 270701e04c3fSmrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 2708af69d88dSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2709af69d88dSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 2710af69d88dSmrg} 2711af69d88dSmrg 2712af69d88dSmrgvoid cayman_init_common_regs(struct r600_command_buffer *cb, 2713af69d88dSmrg enum chip_class ctx_chip_class, 2714af69d88dSmrg enum radeon_family ctx_family, 2715af69d88dSmrg int ctx_drm_minor) 2716af69d88dSmrg{ 2717af69d88dSmrg r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); 2718af69d88dSmrg r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */ 27193464ebd5Sriastradh /* always set the temp clauses */ 2720af69d88dSmrg r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ 2721af69d88dSmrg 2722af69d88dSmrg r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); 2723af69d88dSmrg r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ 2724af69d88dSmrg r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ 2725af69d88dSmrg 2726af69d88dSmrg r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8)); 2727af69d88dSmrg 2728af69d88dSmrg r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2); 2729af69d88dSmrg r600_store_value(cb, 0); 2730af69d88dSmrg r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf)); 2731af69d88dSmrg 2732af69d88dSmrg r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); 2733af69d88dSmrg} 2734af69d88dSmrg 2735af69d88dSmrgstatic void cayman_init_atom_start_cs(struct r600_context *rctx) 2736af69d88dSmrg{ 2737af69d88dSmrg struct r600_command_buffer *cb = &rctx->start_cs_cmd; 273801e04c3fSmrg int i; 2739af69d88dSmrg 274001e04c3fSmrg r600_init_command_buffer(cb, 338); 2741af69d88dSmrg 2742af69d88dSmrg /* This must be first. */ 2743af69d88dSmrg r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 2744af69d88dSmrg r600_store_value(cb, 0x80000000); 2745af69d88dSmrg r600_store_value(cb, 0x80000000); 2746af69d88dSmrg 2747af69d88dSmrg /* We're setting config registers here. */ 2748af69d88dSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2749af69d88dSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2750af69d88dSmrg 275101e04c3fSmrg /* This enables pipeline stat & streamout queries. 275201e04c3fSmrg * They are only disabled by blits. 275301e04c3fSmrg */ 275401e04c3fSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 275501e04c3fSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0)); 275601e04c3fSmrg 2757af69d88dSmrg cayman_init_common_regs(cb, rctx->b.chip_class, 2758af69d88dSmrg rctx->b.family, rctx->screen->b.info.drm_minor); 2759af69d88dSmrg 2760af69d88dSmrg r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0); 2761af69d88dSmrg r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4)); 2762af69d88dSmrg 276301e04c3fSmrg /* remove LS/HS from one SIMD for hw workaround */ 276401e04c3fSmrg r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3); 276501e04c3fSmrg r600_store_value(cb, 0xffffffff); 276601e04c3fSmrg r600_store_value(cb, 0xffffffff); 276701e04c3fSmrg r600_store_value(cb, 0xfffffffe); 276801e04c3fSmrg 2769af69d88dSmrg r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6); 2770af69d88dSmrg r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */ 2771af69d88dSmrg r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */ 2772af69d88dSmrg r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */ 2773af69d88dSmrg r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */ 2774af69d88dSmrg r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */ 2775af69d88dSmrg r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */ 2776af69d88dSmrg 2777af69d88dSmrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 2778af69d88dSmrg r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */ 2779af69d88dSmrg r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */ 2780af69d88dSmrg r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */ 2781af69d88dSmrg r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */ 2782af69d88dSmrg 2783af69d88dSmrg r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); 2784af69d88dSmrg r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ 2785af69d88dSmrg r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ 278601e04c3fSmrg r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */ 278701e04c3fSmrg r600_store_value(cb, fui(0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */ 278801e04c3fSmrg r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */ 2789af69d88dSmrg r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */ 2790af69d88dSmrg r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */ 2791af69d88dSmrg r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */ 2792af69d88dSmrg r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */ 2793af69d88dSmrg r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */ 2794af69d88dSmrg r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */ 2795af69d88dSmrg r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ 2796af69d88dSmrg r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ 2797af69d88dSmrg 2798af69d88dSmrg r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0); 2799af69d88dSmrg 2800af69d88dSmrg r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1); 2801af69d88dSmrg 2802af69d88dSmrg r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 2803af69d88dSmrg r600_store_value(cb, 0x76543210); /* CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */ 2804af69d88dSmrg r600_store_value(cb, 0xfedcba98); /* CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */ 2805af69d88dSmrg 280601e04c3fSmrg r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff); 280701e04c3fSmrg r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2); 280801e04c3fSmrg r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */ 2809af69d88dSmrg r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */ 2810af69d88dSmrg 2811af69d88dSmrg r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0); 2812af69d88dSmrg 2813af69d88dSmrg r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2); 2814af69d88dSmrg r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */ 2815af69d88dSmrg r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */ 2816af69d88dSmrg 2817af69d88dSmrg r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 2818af69d88dSmrg 2819af69d88dSmrg r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0); 2820af69d88dSmrg 2821af69d88dSmrg r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); 2822af69d88dSmrg 2823af69d88dSmrg r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); 2824af69d88dSmrg r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ 2825af69d88dSmrg r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ 2826af69d88dSmrg r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ 2827af69d88dSmrg 2828af69d88dSmrg r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); 2829af69d88dSmrg r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 2830af69d88dSmrg 2831af69d88dSmrg r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 2832af69d88dSmrg r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); 2833af69d88dSmrg 2834af69d88dSmrg r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2); 2835af69d88dSmrg r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */ 2836af69d88dSmrg r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */ 2837af69d88dSmrg 2838af69d88dSmrg r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2); 2839af69d88dSmrg r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */ 2840af69d88dSmrg r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */ 2841af69d88dSmrg 2842af69d88dSmrg r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2843af69d88dSmrg r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 284401e04c3fSmrg r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 284501e04c3fSmrg r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 284601e04c3fSmrg r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 284701e04c3fSmrg r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 284801e04c3fSmrg 2849af69d88dSmrg r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); 2850af69d88dSmrg 2851af69d88dSmrg /* to avoid GPU doing any preloading of constant from random address */ 2852af69d88dSmrg r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16); 2853af69d88dSmrg for (i = 0; i < 16; i++) 2854af69d88dSmrg r600_store_value(cb, 0); 2855af69d88dSmrg 2856af69d88dSmrg r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16); 2857af69d88dSmrg for (i = 0; i < 16; i++) 2858af69d88dSmrg r600_store_value(cb, 0); 2859af69d88dSmrg 2860af69d88dSmrg r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16); 2861af69d88dSmrg for (i = 0; i < 16; i++) 2862af69d88dSmrg r600_store_value(cb, 0); 2863af69d88dSmrg 2864af69d88dSmrg r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16); 2865af69d88dSmrg for (i = 0; i < 16; i++) 2866af69d88dSmrg r600_store_value(cb, 0); 2867af69d88dSmrg 2868af69d88dSmrg r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16); 2869af69d88dSmrg for (i = 0; i < 16; i++) 2870af69d88dSmrg r600_store_value(cb, 0); 2871af69d88dSmrg 2872af69d88dSmrg if (rctx->screen->b.has_streamout) { 2873af69d88dSmrg r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 2874af69d88dSmrg } 2875af69d88dSmrg 2876af69d88dSmrg r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); 2877af69d88dSmrg r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 2878af69d88dSmrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); 2879af69d88dSmrg r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); 2880af69d88dSmrg r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ 2881af69d88dSmrg r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ 2882af69d88dSmrg 288301e04c3fSmrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2); 288401e04c3fSmrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 288501e04c3fSmrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 288601e04c3fSmrg r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0); 2887af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF); 2888af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF); 2889af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF); 289001e04c3fSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF); 289101e04c3fSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF); 2892af69d88dSmrg} 2893af69d88dSmrg 289401e04c3fSmrgvoid evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb, 289501e04c3fSmrg enum chip_class ctx_chip_class, 289601e04c3fSmrg enum radeon_family ctx_family, 289701e04c3fSmrg int ctx_drm_minor) 2898af69d88dSmrg{ 28993464ebd5Sriastradh int ps_prio; 29003464ebd5Sriastradh int vs_prio; 29013464ebd5Sriastradh int gs_prio; 29023464ebd5Sriastradh int es_prio; 2903af69d88dSmrg 2904af69d88dSmrg int hs_prio; 2905af69d88dSmrg int cs_prio; 2906af69d88dSmrg int ls_prio; 2907af69d88dSmrg 2908af69d88dSmrg unsigned tmp; 2909af69d88dSmrg 2910af69d88dSmrg ps_prio = 0; 2911af69d88dSmrg vs_prio = 1; 2912af69d88dSmrg gs_prio = 2; 2913af69d88dSmrg es_prio = 3; 291401e04c3fSmrg hs_prio = 3; 291501e04c3fSmrg ls_prio = 3; 2916af69d88dSmrg cs_prio = 0; 2917af69d88dSmrg 291801e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_PS] = 93; 291901e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_VS] = 46; 292001e04c3fSmrg rctx->r6xx_num_clause_temp_gprs = 4; 292101e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_GS] = 31; 292201e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_ES] = 31; 292301e04c3fSmrg rctx->default_gprs[EG_HW_STAGE_HS] = 23; 292401e04c3fSmrg rctx->default_gprs[EG_HW_STAGE_LS] = 23; 2925af69d88dSmrg 2926af69d88dSmrg tmp = 0; 2927af69d88dSmrg switch (ctx_family) { 2928af69d88dSmrg case CHIP_CEDAR: 2929af69d88dSmrg case CHIP_PALM: 2930af69d88dSmrg case CHIP_SUMO: 2931af69d88dSmrg case CHIP_SUMO2: 2932af69d88dSmrg case CHIP_CAICOS: 2933af69d88dSmrg break; 2934af69d88dSmrg default: 2935af69d88dSmrg tmp |= S_008C00_VC_ENABLE(1); 2936af69d88dSmrg break; 2937af69d88dSmrg } 2938af69d88dSmrg tmp |= S_008C00_EXPORT_SRC_C(1); 2939af69d88dSmrg tmp |= S_008C00_CS_PRIO(cs_prio); 2940af69d88dSmrg tmp |= S_008C00_LS_PRIO(ls_prio); 2941af69d88dSmrg tmp |= S_008C00_HS_PRIO(hs_prio); 2942af69d88dSmrg tmp |= S_008C00_PS_PRIO(ps_prio); 2943af69d88dSmrg tmp |= S_008C00_VS_PRIO(vs_prio); 2944af69d88dSmrg tmp |= S_008C00_GS_PRIO(gs_prio); 2945af69d88dSmrg tmp |= S_008C00_ES_PRIO(es_prio); 2946af69d88dSmrg 294701e04c3fSmrg r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1); 294801e04c3fSmrg r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */ 2949af69d88dSmrg 295001e04c3fSmrg r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); 295101e04c3fSmrg r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ 295201e04c3fSmrg r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ 2953af69d88dSmrg 2954af69d88dSmrg /* The cs checker requires this register to be set. */ 2955af69d88dSmrg r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); 2956af69d88dSmrg 2957af69d88dSmrg r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2); 2958af69d88dSmrg r600_store_value(cb, 0); 2959af69d88dSmrg r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf)); 2960af69d88dSmrg 2961af69d88dSmrg return; 2962af69d88dSmrg} 2963af69d88dSmrg 2964af69d88dSmrgvoid evergreen_init_atom_start_cs(struct r600_context *rctx) 2965af69d88dSmrg{ 2966af69d88dSmrg struct r600_command_buffer *cb = &rctx->start_cs_cmd; 29673464ebd5Sriastradh int num_ps_threads; 29683464ebd5Sriastradh int num_vs_threads; 29693464ebd5Sriastradh int num_gs_threads; 29703464ebd5Sriastradh int num_es_threads; 29713464ebd5Sriastradh int num_hs_threads; 29723464ebd5Sriastradh int num_ls_threads; 2973af69d88dSmrg 29743464ebd5Sriastradh int num_ps_stack_entries; 29753464ebd5Sriastradh int num_vs_stack_entries; 29763464ebd5Sriastradh int num_gs_stack_entries; 29773464ebd5Sriastradh int num_es_stack_entries; 29783464ebd5Sriastradh int num_hs_stack_entries; 29793464ebd5Sriastradh int num_ls_stack_entries; 29803464ebd5Sriastradh enum radeon_family family; 2981af69d88dSmrg unsigned tmp, i; 29823464ebd5Sriastradh 2983af69d88dSmrg if (rctx->b.chip_class == CAYMAN) { 2984af69d88dSmrg cayman_init_atom_start_cs(rctx); 29853464ebd5Sriastradh return; 29863464ebd5Sriastradh } 29873464ebd5Sriastradh 298801e04c3fSmrg r600_init_command_buffer(cb, 338); 2989af69d88dSmrg 2990af69d88dSmrg /* This must be first. */ 2991af69d88dSmrg r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 2992af69d88dSmrg r600_store_value(cb, 0x80000000); 2993af69d88dSmrg r600_store_value(cb, 0x80000000); 2994af69d88dSmrg 2995af69d88dSmrg /* We're setting config registers here. */ 2996af69d88dSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2997af69d88dSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2998af69d88dSmrg 299901e04c3fSmrg /* This enables pipeline stat & streamout queries. 300001e04c3fSmrg * They are only disabled by blits. 300101e04c3fSmrg */ 300201e04c3fSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 300301e04c3fSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0)); 300401e04c3fSmrg 300501e04c3fSmrg evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, 3006af69d88dSmrg rctx->b.family, rctx->screen->b.info.drm_minor); 3007af69d88dSmrg 3008af69d88dSmrg family = rctx->b.family; 30093464ebd5Sriastradh switch (family) { 30103464ebd5Sriastradh case CHIP_CEDAR: 30113464ebd5Sriastradh default: 30123464ebd5Sriastradh num_ps_threads = 96; 30133464ebd5Sriastradh num_vs_threads = 16; 30143464ebd5Sriastradh num_gs_threads = 16; 30153464ebd5Sriastradh num_es_threads = 16; 30163464ebd5Sriastradh num_hs_threads = 16; 30173464ebd5Sriastradh num_ls_threads = 16; 30183464ebd5Sriastradh num_ps_stack_entries = 42; 30193464ebd5Sriastradh num_vs_stack_entries = 42; 30203464ebd5Sriastradh num_gs_stack_entries = 42; 30213464ebd5Sriastradh num_es_stack_entries = 42; 30223464ebd5Sriastradh num_hs_stack_entries = 42; 30233464ebd5Sriastradh num_ls_stack_entries = 42; 30243464ebd5Sriastradh break; 30253464ebd5Sriastradh case CHIP_REDWOOD: 30263464ebd5Sriastradh num_ps_threads = 128; 30273464ebd5Sriastradh num_vs_threads = 20; 30283464ebd5Sriastradh num_gs_threads = 20; 30293464ebd5Sriastradh num_es_threads = 20; 30303464ebd5Sriastradh num_hs_threads = 20; 30313464ebd5Sriastradh num_ls_threads = 20; 30323464ebd5Sriastradh num_ps_stack_entries = 42; 30333464ebd5Sriastradh num_vs_stack_entries = 42; 30343464ebd5Sriastradh num_gs_stack_entries = 42; 30353464ebd5Sriastradh num_es_stack_entries = 42; 30363464ebd5Sriastradh num_hs_stack_entries = 42; 30373464ebd5Sriastradh num_ls_stack_entries = 42; 30383464ebd5Sriastradh break; 30393464ebd5Sriastradh case CHIP_JUNIPER: 30403464ebd5Sriastradh num_ps_threads = 128; 30413464ebd5Sriastradh num_vs_threads = 20; 30423464ebd5Sriastradh num_gs_threads = 20; 30433464ebd5Sriastradh num_es_threads = 20; 30443464ebd5Sriastradh num_hs_threads = 20; 30453464ebd5Sriastradh num_ls_threads = 20; 30463464ebd5Sriastradh num_ps_stack_entries = 85; 30473464ebd5Sriastradh num_vs_stack_entries = 85; 30483464ebd5Sriastradh num_gs_stack_entries = 85; 30493464ebd5Sriastradh num_es_stack_entries = 85; 30503464ebd5Sriastradh num_hs_stack_entries = 85; 30513464ebd5Sriastradh num_ls_stack_entries = 85; 30523464ebd5Sriastradh break; 30533464ebd5Sriastradh case CHIP_CYPRESS: 30543464ebd5Sriastradh case CHIP_HEMLOCK: 30553464ebd5Sriastradh num_ps_threads = 128; 30563464ebd5Sriastradh num_vs_threads = 20; 30573464ebd5Sriastradh num_gs_threads = 20; 30583464ebd5Sriastradh num_es_threads = 20; 30593464ebd5Sriastradh num_hs_threads = 20; 30603464ebd5Sriastradh num_ls_threads = 20; 30613464ebd5Sriastradh num_ps_stack_entries = 85; 30623464ebd5Sriastradh num_vs_stack_entries = 85; 30633464ebd5Sriastradh num_gs_stack_entries = 85; 30643464ebd5Sriastradh num_es_stack_entries = 85; 30653464ebd5Sriastradh num_hs_stack_entries = 85; 30663464ebd5Sriastradh num_ls_stack_entries = 85; 30673464ebd5Sriastradh break; 30683464ebd5Sriastradh case CHIP_PALM: 30693464ebd5Sriastradh num_ps_threads = 96; 30703464ebd5Sriastradh num_vs_threads = 16; 30713464ebd5Sriastradh num_gs_threads = 16; 30723464ebd5Sriastradh num_es_threads = 16; 30733464ebd5Sriastradh num_hs_threads = 16; 30743464ebd5Sriastradh num_ls_threads = 16; 30753464ebd5Sriastradh num_ps_stack_entries = 42; 30763464ebd5Sriastradh num_vs_stack_entries = 42; 30773464ebd5Sriastradh num_gs_stack_entries = 42; 30783464ebd5Sriastradh num_es_stack_entries = 42; 30793464ebd5Sriastradh num_hs_stack_entries = 42; 30803464ebd5Sriastradh num_ls_stack_entries = 42; 30813464ebd5Sriastradh break; 30823464ebd5Sriastradh case CHIP_SUMO: 30833464ebd5Sriastradh num_ps_threads = 96; 30843464ebd5Sriastradh num_vs_threads = 25; 30853464ebd5Sriastradh num_gs_threads = 25; 30863464ebd5Sriastradh num_es_threads = 25; 308701e04c3fSmrg num_hs_threads = 16; 308801e04c3fSmrg num_ls_threads = 16; 30893464ebd5Sriastradh num_ps_stack_entries = 42; 30903464ebd5Sriastradh num_vs_stack_entries = 42; 30913464ebd5Sriastradh num_gs_stack_entries = 42; 30923464ebd5Sriastradh num_es_stack_entries = 42; 30933464ebd5Sriastradh num_hs_stack_entries = 42; 30943464ebd5Sriastradh num_ls_stack_entries = 42; 30953464ebd5Sriastradh break; 30963464ebd5Sriastradh case CHIP_SUMO2: 30973464ebd5Sriastradh num_ps_threads = 96; 30983464ebd5Sriastradh num_vs_threads = 25; 30993464ebd5Sriastradh num_gs_threads = 25; 31003464ebd5Sriastradh num_es_threads = 25; 310101e04c3fSmrg num_hs_threads = 16; 310201e04c3fSmrg num_ls_threads = 16; 31033464ebd5Sriastradh num_ps_stack_entries = 85; 31043464ebd5Sriastradh num_vs_stack_entries = 85; 31053464ebd5Sriastradh num_gs_stack_entries = 85; 31063464ebd5Sriastradh num_es_stack_entries = 85; 31073464ebd5Sriastradh num_hs_stack_entries = 85; 31083464ebd5Sriastradh num_ls_stack_entries = 85; 31093464ebd5Sriastradh break; 31103464ebd5Sriastradh case CHIP_BARTS: 31113464ebd5Sriastradh num_ps_threads = 128; 31123464ebd5Sriastradh num_vs_threads = 20; 31133464ebd5Sriastradh num_gs_threads = 20; 31143464ebd5Sriastradh num_es_threads = 20; 31153464ebd5Sriastradh num_hs_threads = 20; 31163464ebd5Sriastradh num_ls_threads = 20; 31173464ebd5Sriastradh num_ps_stack_entries = 85; 31183464ebd5Sriastradh num_vs_stack_entries = 85; 31193464ebd5Sriastradh num_gs_stack_entries = 85; 31203464ebd5Sriastradh num_es_stack_entries = 85; 31213464ebd5Sriastradh num_hs_stack_entries = 85; 31223464ebd5Sriastradh num_ls_stack_entries = 85; 31233464ebd5Sriastradh break; 31243464ebd5Sriastradh case CHIP_TURKS: 31253464ebd5Sriastradh num_ps_threads = 128; 31263464ebd5Sriastradh num_vs_threads = 20; 31273464ebd5Sriastradh num_gs_threads = 20; 31283464ebd5Sriastradh num_es_threads = 20; 31293464ebd5Sriastradh num_hs_threads = 20; 31303464ebd5Sriastradh num_ls_threads = 20; 31313464ebd5Sriastradh num_ps_stack_entries = 42; 31323464ebd5Sriastradh num_vs_stack_entries = 42; 31333464ebd5Sriastradh num_gs_stack_entries = 42; 31343464ebd5Sriastradh num_es_stack_entries = 42; 31353464ebd5Sriastradh num_hs_stack_entries = 42; 31363464ebd5Sriastradh num_ls_stack_entries = 42; 31373464ebd5Sriastradh break; 31383464ebd5Sriastradh case CHIP_CAICOS: 313901e04c3fSmrg num_ps_threads = 96; 31403464ebd5Sriastradh num_vs_threads = 10; 31413464ebd5Sriastradh num_gs_threads = 10; 31423464ebd5Sriastradh num_es_threads = 10; 31433464ebd5Sriastradh num_hs_threads = 10; 31443464ebd5Sriastradh num_ls_threads = 10; 31453464ebd5Sriastradh num_ps_stack_entries = 42; 31463464ebd5Sriastradh num_vs_stack_entries = 42; 31473464ebd5Sriastradh num_gs_stack_entries = 42; 31483464ebd5Sriastradh num_es_stack_entries = 42; 31493464ebd5Sriastradh num_hs_stack_entries = 42; 31503464ebd5Sriastradh num_ls_stack_entries = 42; 31513464ebd5Sriastradh break; 31523464ebd5Sriastradh } 31533464ebd5Sriastradh 3154af69d88dSmrg tmp = S_008C18_NUM_PS_THREADS(num_ps_threads); 31553464ebd5Sriastradh tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); 31563464ebd5Sriastradh tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); 31573464ebd5Sriastradh tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); 31583464ebd5Sriastradh 3159af69d88dSmrg r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5); 3160af69d88dSmrg r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */ 3161af69d88dSmrg 3162af69d88dSmrg tmp = S_008C1C_NUM_HS_THREADS(num_hs_threads); 31633464ebd5Sriastradh tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); 3164af69d88dSmrg r600_store_value(cb, tmp); /* R_008C1C_SQ_THREAD_RESOURCE_MGMT_2 */ 31653464ebd5Sriastradh 3166af69d88dSmrg tmp = S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); 31673464ebd5Sriastradh tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); 3168af69d88dSmrg r600_store_value(cb, tmp); /* R_008C20_SQ_STACK_RESOURCE_MGMT_1 */ 31693464ebd5Sriastradh 3170af69d88dSmrg tmp = S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); 31713464ebd5Sriastradh tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); 3172af69d88dSmrg r600_store_value(cb, tmp); /* R_008C24_SQ_STACK_RESOURCE_MGMT_2 */ 31733464ebd5Sriastradh 3174af69d88dSmrg tmp = S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); 31753464ebd5Sriastradh tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); 3176af69d88dSmrg r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */ 3177af69d88dSmrg 3178af69d88dSmrg r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT, 3179af69d88dSmrg S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000)); 3180af69d88dSmrg 318101e04c3fSmrg /* remove LS/HS from one SIMD for hw workaround */ 318201e04c3fSmrg r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3); 318301e04c3fSmrg r600_store_value(cb, 0xffffffff); 318401e04c3fSmrg r600_store_value(cb, 0xffffffff); 318501e04c3fSmrg r600_store_value(cb, 0xfffffffe); 318601e04c3fSmrg 3187af69d88dSmrg r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0); 3188af69d88dSmrg r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4)); 3189af69d88dSmrg 3190af69d88dSmrg r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6); 3191af69d88dSmrg r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */ 3192af69d88dSmrg r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */ 3193af69d88dSmrg r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */ 3194af69d88dSmrg r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */ 3195af69d88dSmrg r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */ 3196af69d88dSmrg r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */ 3197af69d88dSmrg 3198af69d88dSmrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 3199af69d88dSmrg r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */ 3200af69d88dSmrg r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */ 3201af69d88dSmrg r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */ 3202af69d88dSmrg r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */ 3203af69d88dSmrg 3204af69d88dSmrg r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); 3205af69d88dSmrg r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ 3206af69d88dSmrg r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ 320701e04c3fSmrg r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */ 320801e04c3fSmrg r600_store_value(cb, fui(1.0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */ 320901e04c3fSmrg r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */ 3210af69d88dSmrg r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */ 3211af69d88dSmrg r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */ 3212af69d88dSmrg r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */ 3213af69d88dSmrg r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */ 3214af69d88dSmrg r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */ 3215af69d88dSmrg r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */ 3216af69d88dSmrg r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ 3217af69d88dSmrg r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ 3218af69d88dSmrg 3219af69d88dSmrg r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1); 3220af69d88dSmrg 3221af69d88dSmrg r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0); 3222af69d88dSmrg 3223af69d88dSmrg r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2); 3224af69d88dSmrg r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */ 3225af69d88dSmrg r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */ 3226af69d88dSmrg 3227af69d88dSmrg r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 3228af69d88dSmrg 3229af69d88dSmrg r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0); 3230af69d88dSmrg 3231af69d88dSmrg r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); 3232af69d88dSmrg r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3233af69d88dSmrg r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3234af69d88dSmrg 3235af69d88dSmrg r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); 3236af69d88dSmrg r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); 3237af69d88dSmrg 3238af69d88dSmrg r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); 3239af69d88dSmrg r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ 3240af69d88dSmrg r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ 3241af69d88dSmrg r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ 3242af69d88dSmrg 3243af69d88dSmrg r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2); 3244af69d88dSmrg r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */ 3245af69d88dSmrg r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */ 3246af69d88dSmrg 3247af69d88dSmrg r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2); 3248af69d88dSmrg r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */ 3249af69d88dSmrg r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */ 3250af69d88dSmrg 3251af69d88dSmrg r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3252af69d88dSmrg r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 325301e04c3fSmrg r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 325401e04c3fSmrg r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3255af69d88dSmrg r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); 325601e04c3fSmrg r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 325701e04c3fSmrg r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3258af69d88dSmrg 3259af69d88dSmrg /* to avoid GPU doing any preloading of constant from random address */ 3260af69d88dSmrg r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16); 3261af69d88dSmrg for (i = 0; i < 16; i++) 3262af69d88dSmrg r600_store_value(cb, 0); 3263af69d88dSmrg 3264af69d88dSmrg r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16); 3265af69d88dSmrg for (i = 0; i < 16; i++) 3266af69d88dSmrg r600_store_value(cb, 0); 3267af69d88dSmrg 3268af69d88dSmrg r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16); 3269af69d88dSmrg for (i = 0; i < 16; i++) 3270af69d88dSmrg r600_store_value(cb, 0); 3271af69d88dSmrg 3272af69d88dSmrg r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16); 3273af69d88dSmrg for (i = 0; i < 16; i++) 3274af69d88dSmrg r600_store_value(cb, 0); 3275af69d88dSmrg 3276af69d88dSmrg r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16); 3277af69d88dSmrg for (i = 0; i < 16; i++) 3278af69d88dSmrg r600_store_value(cb, 0); 3279af69d88dSmrg 3280af69d88dSmrg r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0); 3281af69d88dSmrg 3282af69d88dSmrg if (rctx->screen->b.has_streamout) { 3283af69d88dSmrg r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3284af69d88dSmrg } 3285af69d88dSmrg 3286af69d88dSmrg r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); 3287af69d88dSmrg r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3288af69d88dSmrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); 3289af69d88dSmrg r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); 3290af69d88dSmrg r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ 3291af69d88dSmrg r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ 329201e04c3fSmrg 329301e04c3fSmrg r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2); 329401e04c3fSmrg r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */ 329501e04c3fSmrg r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */ 329601e04c3fSmrg 329701e04c3fSmrg if (rctx->b.family == CHIP_CAICOS) { 329801e04c3fSmrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2); 329901e04c3fSmrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 330001e04c3fSmrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 330101e04c3fSmrg r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0); 330201e04c3fSmrg } else { 330301e04c3fSmrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 7); 330401e04c3fSmrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 330501e04c3fSmrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 330601e04c3fSmrg r600_store_value(cb, 0); /* R028B5C_VGT_LS_SIZE */ 330701e04c3fSmrg r600_store_value(cb, 0); /* R028B60_VGT_HS_SIZE */ 330801e04c3fSmrg r600_store_value(cb, 0); /* R028B64_VGT_LS_HS_ALLOC */ 330901e04c3fSmrg r600_store_value(cb, 0); /* R028B68_VGT_HS_PATCH_CONST */ 331001e04c3fSmrg r600_store_value(cb, 0); /* R028B68_VGT_TF_PARAM */ 331101e04c3fSmrg } 3312af69d88dSmrg 3313af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF); 3314af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF); 3315af69d88dSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF); 331601e04c3fSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF); 331701e04c3fSmrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF); 3318af69d88dSmrg} 3319af69d88dSmrg 3320af69d88dSmrgvoid evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3321af69d88dSmrg{ 3322af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 3323af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 33243464ebd5Sriastradh struct r600_shader *rshader = &shader->shader; 3325af69d88dSmrg unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0; 332601e04c3fSmrg int pos_index = -1, face_index = -1, fixed_pt_position_index = -1; 33273464ebd5Sriastradh int ninterp = 0; 332801e04c3fSmrg boolean have_perspective = FALSE, have_linear = FALSE; 332901e04c3fSmrg static const unsigned spi_baryc_enable_bit[6] = { 333001e04c3fSmrg S_0286E0_PERSP_SAMPLE_ENA(1), 333101e04c3fSmrg S_0286E0_PERSP_CENTER_ENA(1), 333201e04c3fSmrg S_0286E0_PERSP_CENTROID_ENA(1), 333301e04c3fSmrg S_0286E0_LINEAR_SAMPLE_ENA(1), 333401e04c3fSmrg S_0286E0_LINEAR_CENTER_ENA(1), 333501e04c3fSmrg S_0286E0_LINEAR_CENTROID_ENA(1) 333601e04c3fSmrg }; 333701e04c3fSmrg unsigned spi_baryc_cntl = 0, sid, tmp, num = 0; 333801e04c3fSmrg unsigned z_export = 0, stencil_export = 0, mask_export = 0; 3339af69d88dSmrg unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; 3340af69d88dSmrg uint32_t spi_ps_input_cntl[32]; 33413464ebd5Sriastradh 3342af69d88dSmrg if (!cb->buf) { 3343af69d88dSmrg r600_init_command_buffer(cb, 64); 3344af69d88dSmrg } else { 3345af69d88dSmrg cb->num_dw = 0; 3346af69d88dSmrg } 33473464ebd5Sriastradh 33483464ebd5Sriastradh for (i = 0; i < rshader->ninput; i++) { 33493464ebd5Sriastradh /* evergreen NUM_INTERP only contains values interpolated into the LDS, 33503464ebd5Sriastradh POSITION goes via GPRs from the SC so isn't counted */ 33513464ebd5Sriastradh if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 33523464ebd5Sriastradh pos_index = i; 3353af69d88dSmrg else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) { 3354af69d88dSmrg if (face_index == -1) 3355af69d88dSmrg face_index = i; 3356af69d88dSmrg } 3357af69d88dSmrg else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3358af69d88dSmrg if (face_index == -1) 3359af69d88dSmrg face_index = i; /* lives in same register, same enable bit */ 3360af69d88dSmrg } 336101e04c3fSmrg else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) { 336201e04c3fSmrg fixed_pt_position_index = i; 336301e04c3fSmrg } 33643464ebd5Sriastradh else { 3365af69d88dSmrg ninterp++; 336601e04c3fSmrg int k = eg_get_interpolator_index( 336701e04c3fSmrg rshader->input[i].interpolate, 336801e04c3fSmrg rshader->input[i].interpolate_location); 336901e04c3fSmrg if (k >= 0) { 337001e04c3fSmrg spi_baryc_cntl |= spi_baryc_enable_bit[k]; 337101e04c3fSmrg have_perspective |= k < 3; 337201e04c3fSmrg have_linear |= !(k < 3); 33737ec681f3Smrg if (rshader->input[i].uses_interpolate_at_centroid) { 33747ec681f3Smrg k = eg_get_interpolator_index( 33757ec681f3Smrg rshader->input[i].interpolate, 33767ec681f3Smrg TGSI_INTERPOLATE_LOC_CENTROID); 33777ec681f3Smrg spi_baryc_cntl |= spi_baryc_enable_bit[k]; 33787ec681f3Smrg } 337901e04c3fSmrg } 33803464ebd5Sriastradh } 3381af69d88dSmrg 3382af69d88dSmrg sid = rshader->input[i].spi_sid; 3383af69d88dSmrg 3384af69d88dSmrg if (sid) { 3385af69d88dSmrg tmp = S_028644_SEMANTIC(sid); 3386af69d88dSmrg 338701e04c3fSmrg /* D3D 9 behaviour. GL is undefined */ 338801e04c3fSmrg if (rshader->input[i].name == TGSI_SEMANTIC_COLOR && rshader->input[i].sid == 0) 338901e04c3fSmrg tmp |= S_028644_DEFAULT_VAL(3); 339001e04c3fSmrg 3391af69d88dSmrg if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || 3392af69d88dSmrg rshader->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || 3393af69d88dSmrg (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && 3394af69d88dSmrg rctx->rasterizer && rctx->rasterizer->flatshade)) { 3395af69d88dSmrg tmp |= S_028644_FLAT_SHADE(1); 3396af69d88dSmrg } 3397af69d88dSmrg 33987ec681f3Smrg if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD || 33997ec681f3Smrg (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD && 34007ec681f3Smrg (sprite_coord_enable & (1 << rshader->input[i].sid)))) { 3401af69d88dSmrg tmp |= S_028644_PT_SPRITE_TEX(1); 3402af69d88dSmrg } 3403af69d88dSmrg 3404af69d88dSmrg spi_ps_input_cntl[num++] = tmp; 3405af69d88dSmrg } 34063464ebd5Sriastradh } 3407af69d88dSmrg 3408af69d88dSmrg r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num); 3409af69d88dSmrg r600_store_array(cb, num, spi_ps_input_cntl); 3410af69d88dSmrg 34113464ebd5Sriastradh for (i = 0; i < rshader->noutput; i++) { 34123464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 3413af69d88dSmrg z_export = 1; 34143464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 3415af69d88dSmrg stencil_export = 1; 341601e04c3fSmrg if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && 341701e04c3fSmrg rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) 341801e04c3fSmrg mask_export = 1; 34193464ebd5Sriastradh } 34203464ebd5Sriastradh if (rshader->uses_kill) 34213464ebd5Sriastradh db_shader_control |= S_02880C_KILL_ENABLE(1); 34223464ebd5Sriastradh 3423af69d88dSmrg db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); 3424af69d88dSmrg db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export); 342501e04c3fSmrg db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); 342601e04c3fSmrg 342701e04c3fSmrg if (shader->selector->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) { 342801e04c3fSmrg db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | 342901e04c3fSmrg S_02880C_EXEC_ON_NOOP(shader->selector->info.writes_memory); 343001e04c3fSmrg } else if (shader->selector->info.writes_memory) { 343101e04c3fSmrg db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1); 343201e04c3fSmrg } 343301e04c3fSmrg 343401e04c3fSmrg switch (rshader->ps_conservative_z) { 343501e04c3fSmrg default: /* fall through */ 343601e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_ANY: 343701e04c3fSmrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z); 343801e04c3fSmrg break; 343901e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_GREATER: 344001e04c3fSmrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); 344101e04c3fSmrg break; 344201e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_LESS: 344301e04c3fSmrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); 344401e04c3fSmrg break; 344501e04c3fSmrg } 3446af69d88dSmrg 34473464ebd5Sriastradh exports_ps = 0; 34483464ebd5Sriastradh for (i = 0; i < rshader->noutput; i++) { 34493464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || 345001e04c3fSmrg rshader->output[i].name == TGSI_SEMANTIC_STENCIL || 345101e04c3fSmrg rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) 34523464ebd5Sriastradh exports_ps |= 1; 34533464ebd5Sriastradh } 3454af69d88dSmrg 345501e04c3fSmrg num_cout = rshader->ps_export_highest + 1; 3456af69d88dSmrg 34573464ebd5Sriastradh exports_ps |= S_02884C_EXPORT_COLORS(num_cout); 34583464ebd5Sriastradh if (!exports_ps) { 34593464ebd5Sriastradh /* always at least export 1 component per pixel */ 34603464ebd5Sriastradh exports_ps = 2; 34613464ebd5Sriastradh } 3462af69d88dSmrg shader->nr_ps_color_outputs = num_cout; 346301e04c3fSmrg shader->ps_color_export_mask = rshader->ps_color_export_mask; 34643464ebd5Sriastradh if (ninterp == 0) { 34653464ebd5Sriastradh ninterp = 1; 34663464ebd5Sriastradh have_perspective = TRUE; 34673464ebd5Sriastradh } 346801e04c3fSmrg if (!spi_baryc_cntl) 346901e04c3fSmrg spi_baryc_cntl |= spi_baryc_enable_bit[0]; 34703464ebd5Sriastradh 3471af69d88dSmrg if (!have_perspective && !have_linear) 3472af69d88dSmrg have_perspective = TRUE; 3473af69d88dSmrg 34743464ebd5Sriastradh spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | 34753464ebd5Sriastradh S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | 34763464ebd5Sriastradh S_0286CC_LINEAR_GRADIENT_ENA(have_linear); 34773464ebd5Sriastradh spi_input_z = 0; 34783464ebd5Sriastradh if (pos_index != -1) { 34793464ebd5Sriastradh spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | 348001e04c3fSmrg S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) | 34813464ebd5Sriastradh S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); 3482af69d88dSmrg spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); 34833464ebd5Sriastradh } 34843464ebd5Sriastradh 34853464ebd5Sriastradh spi_ps_in_control_1 = 0; 34863464ebd5Sriastradh if (face_index != -1) { 34873464ebd5Sriastradh spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 34883464ebd5Sriastradh S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 34893464ebd5Sriastradh } 349001e04c3fSmrg if (fixed_pt_position_index != -1) { 349101e04c3fSmrg spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) | 349201e04c3fSmrg S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr); 349301e04c3fSmrg } 34943464ebd5Sriastradh 3495af69d88dSmrg r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); 3496af69d88dSmrg r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ 3497af69d88dSmrg r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */ 3498af69d88dSmrg 3499af69d88dSmrg r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); 3500af69d88dSmrg r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z); 3501af69d88dSmrg r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps); 3502af69d88dSmrg 3503af69d88dSmrg r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2); 3504af69d88dSmrg r600_store_value(cb, shader->bo->gpu_address >> 8); 3505af69d88dSmrg r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ 3506af69d88dSmrg S_028844_NUM_GPRS(rshader->bc.ngpr) | 3507af69d88dSmrg S_028844_PRIME_CACHE_ON_DRAW(1) | 350801e04c3fSmrg S_028844_DX10_CLAMP(1) | 3509af69d88dSmrg S_028844_STACK_SIZE(rshader->bc.nstack)); 3510af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3511af69d88dSmrg 3512af69d88dSmrg shader->db_shader_control = db_shader_control; 351301e04c3fSmrg shader->ps_depth_export = z_export | stencil_export | mask_export; 3514af69d88dSmrg 3515af69d88dSmrg shader->sprite_coord_enable = sprite_coord_enable; 3516af69d88dSmrg if (rctx->rasterizer) 3517af69d88dSmrg shader->flatshade = rctx->rasterizer->flatshade; 3518af69d88dSmrg} 3519af69d88dSmrg 3520af69d88dSmrgvoid evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3521af69d88dSmrg{ 3522af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 35233464ebd5Sriastradh struct r600_shader *rshader = &shader->shader; 35243464ebd5Sriastradh 3525af69d88dSmrg r600_init_command_buffer(cb, 32); 35263464ebd5Sriastradh 3527af69d88dSmrg r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, 3528af69d88dSmrg S_028890_NUM_GPRS(rshader->bc.ngpr) | 352901e04c3fSmrg S_028890_DX10_CLAMP(1) | 3530af69d88dSmrg S_028890_STACK_SIZE(rshader->bc.nstack)); 3531af69d88dSmrg r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, 3532af69d88dSmrg shader->bo->gpu_address >> 8); 3533af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3534af69d88dSmrg} 3535af69d88dSmrg 3536af69d88dSmrgvoid evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3537af69d88dSmrg{ 3538af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 3539af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 3540af69d88dSmrg struct r600_shader *rshader = &shader->shader; 3541af69d88dSmrg struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; 354201e04c3fSmrg unsigned gsvs_itemsizes[4] = { 354301e04c3fSmrg (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2, 354401e04c3fSmrg (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2, 354501e04c3fSmrg (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2, 354601e04c3fSmrg (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2 354701e04c3fSmrg }; 3548af69d88dSmrg 3549af69d88dSmrg r600_init_command_buffer(cb, 64); 3550af69d88dSmrg 3551af69d88dSmrg /* VGT_GS_MODE is written by evergreen_emit_shader_stages */ 3552af69d88dSmrg 3553af69d88dSmrg 3554af69d88dSmrg r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, 355501e04c3fSmrg S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); 3556af69d88dSmrg r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 355701e04c3fSmrg r600_conv_prim_to_gs_out(shader->selector->gs_output_prim)); 3558af69d88dSmrg 3559af69d88dSmrg if (rctx->screen->b.info.drm_minor >= 35) { 3560af69d88dSmrg r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT, 356101e04c3fSmrg S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) | 356201e04c3fSmrg S_028B90_ENABLE(shader->selector->gs_num_invocations > 0)); 35633464ebd5Sriastradh } 3564af69d88dSmrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 356501e04c3fSmrg r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2); 356601e04c3fSmrg r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2); 356701e04c3fSmrg r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2); 356801e04c3fSmrg r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2); 3569af69d88dSmrg 3570af69d88dSmrg r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 357101e04c3fSmrg (rshader->ring_item_sizes[0]) >> 2); 3572af69d88dSmrg 3573af69d88dSmrg r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE, 357401e04c3fSmrg gsvs_itemsizes[0] + 357501e04c3fSmrg gsvs_itemsizes[1] + 357601e04c3fSmrg gsvs_itemsizes[2] + 357701e04c3fSmrg gsvs_itemsizes[3]); 3578af69d88dSmrg 3579af69d88dSmrg r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3); 358001e04c3fSmrg r600_store_value(cb, gsvs_itemsizes[0]); 358101e04c3fSmrg r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]); 358201e04c3fSmrg r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]); 3583af69d88dSmrg 3584af69d88dSmrg /* FIXME calculate these values somehow ??? */ 3585af69d88dSmrg r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3); 3586af69d88dSmrg r600_store_value(cb, 0x80); /* GS_PER_ES */ 3587af69d88dSmrg r600_store_value(cb, 0x100); /* ES_PER_GS */ 3588af69d88dSmrg r600_store_value(cb, 0x2); /* GS_PER_VS */ 3589af69d88dSmrg 3590af69d88dSmrg r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, 3591af69d88dSmrg S_028878_NUM_GPRS(rshader->bc.ngpr) | 359201e04c3fSmrg S_028878_DX10_CLAMP(1) | 3593af69d88dSmrg S_028878_STACK_SIZE(rshader->bc.nstack)); 3594af69d88dSmrg r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, 3595af69d88dSmrg shader->bo->gpu_address >> 8); 3596af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3597af69d88dSmrg} 3598af69d88dSmrg 3599af69d88dSmrg 3600af69d88dSmrgvoid evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3601af69d88dSmrg{ 3602af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 3603af69d88dSmrg struct r600_shader *rshader = &shader->shader; 3604af69d88dSmrg unsigned spi_vs_out_id[10] = {}; 3605af69d88dSmrg unsigned i, tmp, nparams = 0; 3606af69d88dSmrg 3607af69d88dSmrg for (i = 0; i < rshader->noutput; i++) { 3608af69d88dSmrg if (rshader->output[i].spi_sid) { 3609af69d88dSmrg tmp = rshader->output[i].spi_sid << ((nparams & 3) * 8); 3610af69d88dSmrg spi_vs_out_id[nparams / 4] |= tmp; 3611af69d88dSmrg nparams++; 3612af69d88dSmrg } 36133464ebd5Sriastradh } 3614af69d88dSmrg 3615af69d88dSmrg r600_init_command_buffer(cb, 32); 3616af69d88dSmrg 3617af69d88dSmrg r600_store_context_reg_seq(cb, R_02861C_SPI_VS_OUT_ID_0, 10); 36183464ebd5Sriastradh for (i = 0; i < 10; i++) { 3619af69d88dSmrg r600_store_value(cb, spi_vs_out_id[i]); 36203464ebd5Sriastradh } 36213464ebd5Sriastradh 36223464ebd5Sriastradh /* Certain attributes (position, psize, etc.) don't count as params. 36233464ebd5Sriastradh * VS is required to export at least one param and r600_shader_from_tgsi() 36243464ebd5Sriastradh * takes care of adding a dummy export. 36253464ebd5Sriastradh */ 36263464ebd5Sriastradh if (nparams < 1) 36273464ebd5Sriastradh nparams = 1; 36283464ebd5Sriastradh 3629af69d88dSmrg r600_store_context_reg(cb, R_0286C4_SPI_VS_OUT_CONFIG, 3630af69d88dSmrg S_0286C4_VS_EXPORT_COUNT(nparams - 1)); 3631af69d88dSmrg r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS, 3632af69d88dSmrg S_028860_NUM_GPRS(rshader->bc.ngpr) | 363301e04c3fSmrg S_028860_DX10_CLAMP(1) | 3634af69d88dSmrg S_028860_STACK_SIZE(rshader->bc.nstack)); 3635af69d88dSmrg if (rshader->vs_position_window_space) { 3636af69d88dSmrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 3637af69d88dSmrg S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1)); 3638af69d88dSmrg } else { 3639af69d88dSmrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 3640af69d88dSmrg S_028818_VTX_W0_FMT(1) | 3641af69d88dSmrg S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | 3642af69d88dSmrg S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | 3643af69d88dSmrg S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); 3644af69d88dSmrg 3645af69d88dSmrg } 3646af69d88dSmrg r600_store_context_reg(cb, R_02885C_SQ_PGM_START_VS, 3647af69d88dSmrg shader->bo->gpu_address >> 8); 3648af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3649af69d88dSmrg 3650af69d88dSmrg shader->pa_cl_vs_out_cntl = 365101e04c3fSmrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->cc_dist_mask & 0x0F) != 0) | 365201e04c3fSmrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->cc_dist_mask & 0xF0) != 0) | 3653af69d88dSmrg S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) | 3654af69d88dSmrg S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) | 3655af69d88dSmrg S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) | 3656af69d88dSmrg S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport) | 3657af69d88dSmrg S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer); 3658af69d88dSmrg} 3659af69d88dSmrg 366001e04c3fSmrgvoid evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 366101e04c3fSmrg{ 366201e04c3fSmrg struct r600_command_buffer *cb = &shader->command_buffer; 366301e04c3fSmrg struct r600_shader *rshader = &shader->shader; 366401e04c3fSmrg 366501e04c3fSmrg r600_init_command_buffer(cb, 32); 366601e04c3fSmrg r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS, 366701e04c3fSmrg S_0288BC_NUM_GPRS(rshader->bc.ngpr) | 366801e04c3fSmrg S_0288BC_DX10_CLAMP(1) | 366901e04c3fSmrg S_0288BC_STACK_SIZE(rshader->bc.nstack)); 367001e04c3fSmrg r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS, 367101e04c3fSmrg shader->bo->gpu_address >> 8); 367201e04c3fSmrg} 367301e04c3fSmrg 367401e04c3fSmrgvoid evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 367501e04c3fSmrg{ 367601e04c3fSmrg struct r600_command_buffer *cb = &shader->command_buffer; 367701e04c3fSmrg struct r600_shader *rshader = &shader->shader; 367801e04c3fSmrg 367901e04c3fSmrg r600_init_command_buffer(cb, 32); 368001e04c3fSmrg r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS, 368101e04c3fSmrg S_0288D4_NUM_GPRS(rshader->bc.ngpr) | 368201e04c3fSmrg S_0288D4_DX10_CLAMP(1) | 368301e04c3fSmrg S_0288D4_STACK_SIZE(rshader->bc.nstack)); 368401e04c3fSmrg r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS, 368501e04c3fSmrg shader->bo->gpu_address >> 8); 368601e04c3fSmrg} 3687af69d88dSmrgvoid *evergreen_create_resolve_blend(struct r600_context *rctx) 3688af69d88dSmrg{ 3689af69d88dSmrg struct pipe_blend_state blend; 3690af69d88dSmrg 3691af69d88dSmrg memset(&blend, 0, sizeof(blend)); 3692af69d88dSmrg blend.independent_blend_enable = true; 3693af69d88dSmrg blend.rt[0].colormask = 0xf; 3694af69d88dSmrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, V_028808_CB_RESOLVE); 3695af69d88dSmrg} 3696af69d88dSmrg 3697af69d88dSmrgvoid *evergreen_create_decompress_blend(struct r600_context *rctx) 3698af69d88dSmrg{ 3699af69d88dSmrg struct pipe_blend_state blend; 3700af69d88dSmrg unsigned mode = rctx->screen->has_compressed_msaa_texturing ? 3701af69d88dSmrg V_028808_CB_FMASK_DECOMPRESS : V_028808_CB_DECOMPRESS; 3702af69d88dSmrg 3703af69d88dSmrg memset(&blend, 0, sizeof(blend)); 3704af69d88dSmrg blend.independent_blend_enable = true; 3705af69d88dSmrg blend.rt[0].colormask = 0xf; 3706af69d88dSmrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode); 3707af69d88dSmrg} 3708af69d88dSmrg 3709af69d88dSmrgvoid *evergreen_create_fastclear_blend(struct r600_context *rctx) 3710af69d88dSmrg{ 3711af69d88dSmrg struct pipe_blend_state blend; 3712af69d88dSmrg unsigned mode = V_028808_CB_ELIMINATE_FAST_CLEAR; 3713af69d88dSmrg 3714af69d88dSmrg memset(&blend, 0, sizeof(blend)); 3715af69d88dSmrg blend.independent_blend_enable = true; 3716af69d88dSmrg blend.rt[0].colormask = 0xf; 3717af69d88dSmrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode); 3718af69d88dSmrg} 3719af69d88dSmrg 3720af69d88dSmrgvoid *evergreen_create_db_flush_dsa(struct r600_context *rctx) 3721af69d88dSmrg{ 37227ec681f3Smrg struct pipe_depth_stencil_alpha_state dsa = {{{0}}}; 3723af69d88dSmrg 3724af69d88dSmrg return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa); 3725af69d88dSmrg} 3726af69d88dSmrg 3727af69d88dSmrgvoid evergreen_update_db_shader_control(struct r600_context * rctx) 3728af69d88dSmrg{ 3729af69d88dSmrg bool dual_export; 3730af69d88dSmrg unsigned db_shader_control; 3731af69d88dSmrg 3732af69d88dSmrg if (!rctx->ps_shader) { 3733af69d88dSmrg return; 3734af69d88dSmrg } 3735af69d88dSmrg 3736af69d88dSmrg dual_export = rctx->framebuffer.export_16bpc && 3737af69d88dSmrg !rctx->ps_shader->current->ps_depth_export; 3738af69d88dSmrg 3739af69d88dSmrg db_shader_control = rctx->ps_shader->current->db_shader_control | 3740af69d88dSmrg S_02880C_DUAL_EXPORT_ENABLE(dual_export) | 3741af69d88dSmrg S_02880C_DB_SOURCE_FORMAT(dual_export ? V_02880C_EXPORT_DB_TWO : 3742af69d88dSmrg V_02880C_EXPORT_DB_FULL) | 3743af69d88dSmrg S_02880C_ALPHA_TO_MASK_DISABLE(rctx->framebuffer.cb0_is_integer); 3744af69d88dSmrg 3745af69d88dSmrg /* When alpha test is enabled we can't trust the hw to make the proper 3746af69d88dSmrg * decision on the order in which ztest should be run related to fragment 3747af69d88dSmrg * shader execution. 3748af69d88dSmrg * 3749af69d88dSmrg * If alpha test is enabled perform early z rejection (RE_Z) but don't early 3750af69d88dSmrg * write to the zbuffer. Write to zbuffer is delayed after fragment shader 3751af69d88dSmrg * execution and thus after alpha test so if discarded by the alpha test 3752af69d88dSmrg * the z value is not written. 3753af69d88dSmrg * If ReZ is enabled, and the zfunc/zenable/zwrite values change you can 3754af69d88dSmrg * get a hang unless you flush the DB in between. For now just use 3755af69d88dSmrg * LATE_Z. 3756af69d88dSmrg */ 375701e04c3fSmrg if (rctx->alphatest_state.sx_alpha_test_control || rctx->ps_shader->info.writes_memory) { 3758af69d88dSmrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 3759af69d88dSmrg } else { 3760af69d88dSmrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); 3761af69d88dSmrg } 3762af69d88dSmrg 3763af69d88dSmrg if (db_shader_control != rctx->db_misc_state.db_shader_control) { 3764af69d88dSmrg rctx->db_misc_state.db_shader_control = db_shader_control; 376501e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 3766af69d88dSmrg } 3767af69d88dSmrg} 3768af69d88dSmrg 3769af69d88dSmrgstatic void evergreen_dma_copy_tile(struct r600_context *rctx, 3770af69d88dSmrg struct pipe_resource *dst, 3771af69d88dSmrg unsigned dst_level, 3772af69d88dSmrg unsigned dst_x, 3773af69d88dSmrg unsigned dst_y, 3774af69d88dSmrg unsigned dst_z, 3775af69d88dSmrg struct pipe_resource *src, 3776af69d88dSmrg unsigned src_level, 3777af69d88dSmrg unsigned src_x, 3778af69d88dSmrg unsigned src_y, 3779af69d88dSmrg unsigned src_z, 3780af69d88dSmrg unsigned copy_height, 3781af69d88dSmrg unsigned pitch, 3782af69d88dSmrg unsigned bpp) 3783af69d88dSmrg{ 37847ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.dma.cs; 3785af69d88dSmrg struct r600_texture *rsrc = (struct r600_texture*)src; 3786af69d88dSmrg struct r600_texture *rdst = (struct r600_texture*)dst; 3787af69d88dSmrg unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; 3788af69d88dSmrg unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode; 3789af69d88dSmrg unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split, non_disp_tiling = 0; 3790af69d88dSmrg uint64_t base, addr; 3791af69d88dSmrg 379201e04c3fSmrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 379301e04c3fSmrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 3794af69d88dSmrg assert(dst_mode != src_mode); 3795af69d88dSmrg 3796af69d88dSmrg /* non_disp_tiling bit needs to be set for depth, stencil, and fmask surfaces */ 3797af69d88dSmrg if (util_format_has_depth(util_format_description(src->format))) 3798af69d88dSmrg non_disp_tiling = 1; 3799af69d88dSmrg 3800af69d88dSmrg y = 0; 3801af69d88dSmrg sub_cmd = EG_DMA_COPY_TILED; 3802af69d88dSmrg lbpp = util_logbase2(bpp); 3803af69d88dSmrg pitch_tile_max = ((pitch / bpp) / 8) - 1; 380401e04c3fSmrg nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks); 3805af69d88dSmrg 380601e04c3fSmrg if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED) { 3807af69d88dSmrg /* T2L */ 3808af69d88dSmrg array_mode = evergreen_array_mode(src_mode); 380901e04c3fSmrg slice_tile_max = (rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.u.legacy.level[src_level].nblk_y) / (8*8); 3810af69d88dSmrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 3811af69d88dSmrg /* linear height must be the same as the slice tile max height, it's ok even 3812af69d88dSmrg * if the linear destination/source have smaller heigh as the size of the 3813af69d88dSmrg * dma packet will be using the copy_height which is always smaller or equal 3814af69d88dSmrg * to the linear height 3815af69d88dSmrg */ 381601e04c3fSmrg height = u_minify(rsrc->resource.b.b.height0, src_level); 3817af69d88dSmrg detile = 1; 3818af69d88dSmrg x = src_x; 3819af69d88dSmrg y = src_y; 3820af69d88dSmrg z = src_z; 38217ec681f3Smrg base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 38227ec681f3Smrg addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 382301e04c3fSmrg addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 3824af69d88dSmrg addr += dst_y * pitch + dst_x * bpp; 382501e04c3fSmrg bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh); 382601e04c3fSmrg bank_w = eg_bank_wh(rsrc->surface.u.legacy.bankw); 382701e04c3fSmrg mt_aspect = eg_macro_tile_aspect(rsrc->surface.u.legacy.mtilea); 382801e04c3fSmrg tile_split = eg_tile_split(rsrc->surface.u.legacy.tile_split); 3829af69d88dSmrg base += rsrc->resource.gpu_address; 3830af69d88dSmrg addr += rdst->resource.gpu_address; 3831af69d88dSmrg } else { 3832af69d88dSmrg /* L2T */ 3833af69d88dSmrg array_mode = evergreen_array_mode(dst_mode); 383401e04c3fSmrg slice_tile_max = (rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.u.legacy.level[dst_level].nblk_y) / (8*8); 3835af69d88dSmrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 3836af69d88dSmrg /* linear height must be the same as the slice tile max height, it's ok even 3837af69d88dSmrg * if the linear destination/source have smaller heigh as the size of the 3838af69d88dSmrg * dma packet will be using the copy_height which is always smaller or equal 3839af69d88dSmrg * to the linear height 3840af69d88dSmrg */ 384101e04c3fSmrg height = u_minify(rdst->resource.b.b.height0, dst_level); 3842af69d88dSmrg detile = 0; 3843af69d88dSmrg x = dst_x; 3844af69d88dSmrg y = dst_y; 3845af69d88dSmrg z = dst_z; 38467ec681f3Smrg base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 38477ec681f3Smrg addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 384801e04c3fSmrg addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z; 3849af69d88dSmrg addr += src_y * pitch + src_x * bpp; 385001e04c3fSmrg bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh); 385101e04c3fSmrg bank_w = eg_bank_wh(rdst->surface.u.legacy.bankw); 385201e04c3fSmrg mt_aspect = eg_macro_tile_aspect(rdst->surface.u.legacy.mtilea); 385301e04c3fSmrg tile_split = eg_tile_split(rdst->surface.u.legacy.tile_split); 3854af69d88dSmrg base += rdst->resource.gpu_address; 3855af69d88dSmrg addr += rsrc->resource.gpu_address; 3856af69d88dSmrg } 3857af69d88dSmrg 3858af69d88dSmrg size = (copy_height * pitch) / 4; 3859af69d88dSmrg ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); 386001e04c3fSmrg r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); 3861af69d88dSmrg 3862af69d88dSmrg for (i = 0; i < ncopy; i++) { 3863af69d88dSmrg cheight = copy_height; 3864af69d88dSmrg if (((cheight * pitch) / 4) > EG_DMA_COPY_MAX_SIZE) { 3865af69d88dSmrg cheight = (EG_DMA_COPY_MAX_SIZE * 4) / pitch; 3866af69d88dSmrg } 3867af69d88dSmrg size = (cheight * pitch) / 4; 386801e04c3fSmrg /* emit reloc before writing cs so that cs is always in consistent state */ 386901e04c3fSmrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, 387001e04c3fSmrg RADEON_USAGE_READ, 0); 387101e04c3fSmrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, 387201e04c3fSmrg RADEON_USAGE_WRITE, 0); 387301e04c3fSmrg radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size)); 387401e04c3fSmrg radeon_emit(cs, base >> 8); 387501e04c3fSmrg radeon_emit(cs, (detile << 31) | (array_mode << 27) | 387601e04c3fSmrg (lbpp << 24) | (bank_h << 21) | 387701e04c3fSmrg (bank_w << 18) | (mt_aspect << 16)); 387801e04c3fSmrg radeon_emit(cs, (pitch_tile_max << 0) | ((height - 1) << 16)); 387901e04c3fSmrg radeon_emit(cs, (slice_tile_max << 0)); 388001e04c3fSmrg radeon_emit(cs, (x << 0) | (z << 18)); 388101e04c3fSmrg radeon_emit(cs, (y << 0) | (tile_split << 21) | (nbanks << 25) | (non_disp_tiling << 28)); 388201e04c3fSmrg radeon_emit(cs, addr & 0xfffffffc); 388301e04c3fSmrg radeon_emit(cs, (addr >> 32UL) & 0xff); 3884af69d88dSmrg copy_height -= cheight; 3885af69d88dSmrg addr += cheight * pitch; 3886af69d88dSmrg y += cheight; 3887af69d88dSmrg } 3888af69d88dSmrg} 3889af69d88dSmrg 3890af69d88dSmrgstatic void evergreen_dma_copy(struct pipe_context *ctx, 3891af69d88dSmrg struct pipe_resource *dst, 3892af69d88dSmrg unsigned dst_level, 3893af69d88dSmrg unsigned dstx, unsigned dsty, unsigned dstz, 3894af69d88dSmrg struct pipe_resource *src, 3895af69d88dSmrg unsigned src_level, 3896af69d88dSmrg const struct pipe_box *src_box) 3897af69d88dSmrg{ 3898af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 3899af69d88dSmrg struct r600_texture *rsrc = (struct r600_texture*)src; 3900af69d88dSmrg struct r600_texture *rdst = (struct r600_texture*)dst; 3901af69d88dSmrg unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; 3902af69d88dSmrg unsigned src_w, dst_w; 3903af69d88dSmrg unsigned src_x, src_y; 3904af69d88dSmrg unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; 3905af69d88dSmrg 39067ec681f3Smrg if (rctx->b.dma.cs.priv == NULL) { 3907af69d88dSmrg goto fallback; 3908af69d88dSmrg } 3909af69d88dSmrg 391001e04c3fSmrg if (rctx->cmd_buf_is_compute) { 391101e04c3fSmrg rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL); 391201e04c3fSmrg rctx->cmd_buf_is_compute = false; 391301e04c3fSmrg } 391401e04c3fSmrg 3915af69d88dSmrg if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 3916af69d88dSmrg evergreen_dma_copy_buffer(rctx, dst, src, dst_x, src_box->x, src_box->width); 3917af69d88dSmrg return; 3918af69d88dSmrg } 3919af69d88dSmrg 392001e04c3fSmrg if (src_box->depth > 1 || 392101e04c3fSmrg !r600_prepare_for_dma_blit(&rctx->b, rdst, dst_level, dstx, dsty, 392201e04c3fSmrg dstz, rsrc, src_level, src_box)) 3923af69d88dSmrg goto fallback; 3924af69d88dSmrg 3925af69d88dSmrg src_x = util_format_get_nblocksx(src->format, src_box->x); 3926af69d88dSmrg dst_x = util_format_get_nblocksx(src->format, dst_x); 3927af69d88dSmrg src_y = util_format_get_nblocksy(src->format, src_box->y); 3928af69d88dSmrg dst_y = util_format_get_nblocksy(src->format, dst_y); 3929af69d88dSmrg 3930af69d88dSmrg bpp = rdst->surface.bpe; 393101e04c3fSmrg dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.bpe; 393201e04c3fSmrg src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.bpe; 393301e04c3fSmrg src_w = u_minify(rsrc->resource.b.b.width0, src_level); 393401e04c3fSmrg dst_w = u_minify(rdst->resource.b.b.width0, dst_level); 3935af69d88dSmrg copy_height = src_box->height / rsrc->surface.blk_h; 3936af69d88dSmrg 393701e04c3fSmrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 393801e04c3fSmrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 3939af69d88dSmrg 3940af69d88dSmrg if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { 3941af69d88dSmrg /* FIXME evergreen can do partial blit */ 3942af69d88dSmrg goto fallback; 3943af69d88dSmrg } 3944af69d88dSmrg /* the x test here are currently useless (because we don't support partial blit) 3945af69d88dSmrg * but keep them around so we don't forget about those 3946af69d88dSmrg */ 3947af69d88dSmrg if (src_pitch % 8 || src_box->x % 8 || dst_x % 8 || src_box->y % 8 || dst_y % 8) { 3948af69d88dSmrg goto fallback; 3949af69d88dSmrg } 3950af69d88dSmrg 3951af69d88dSmrg /* 128 bpp surfaces require non_disp_tiling for both 3952af69d88dSmrg * tiled and linear buffers on cayman. However, async 3953af69d88dSmrg * DMA only supports it on the tiled side. As such 3954af69d88dSmrg * the tile order is backwards after a L2T/T2L packet. 3955af69d88dSmrg */ 3956af69d88dSmrg if ((rctx->b.chip_class == CAYMAN) && 3957af69d88dSmrg (src_mode != dst_mode) && 3958af69d88dSmrg (util_format_get_blocksize(src->format) >= 16)) { 3959af69d88dSmrg goto fallback; 3960af69d88dSmrg } 3961af69d88dSmrg 3962af69d88dSmrg if (src_mode == dst_mode) { 3963af69d88dSmrg uint64_t dst_offset, src_offset; 3964af69d88dSmrg /* simple dma blit would do NOTE code here assume : 3965af69d88dSmrg * src_box.x/y == 0 3966af69d88dSmrg * dst_x/y == 0 3967af69d88dSmrg * dst_pitch == src_pitch 3968af69d88dSmrg */ 39697ec681f3Smrg src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 397001e04c3fSmrg src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z; 3971af69d88dSmrg src_offset += src_y * src_pitch + src_x * bpp; 39727ec681f3Smrg dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 397301e04c3fSmrg dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 3974af69d88dSmrg dst_offset += dst_y * dst_pitch + dst_x * bpp; 3975af69d88dSmrg evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset, 3976af69d88dSmrg src_box->height * src_pitch); 3977af69d88dSmrg } else { 3978af69d88dSmrg evergreen_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, 3979af69d88dSmrg src, src_level, src_x, src_y, src_box->z, 3980af69d88dSmrg copy_height, dst_pitch, bpp); 3981af69d88dSmrg } 3982af69d88dSmrg return; 3983af69d88dSmrg 3984af69d88dSmrgfallback: 398501e04c3fSmrg r600_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, 3986af69d88dSmrg src, src_level, src_box); 3987af69d88dSmrg} 3988af69d88dSmrg 398901e04c3fSmrgstatic void evergreen_set_tess_state(struct pipe_context *ctx, 399001e04c3fSmrg const float default_outer_level[4], 399101e04c3fSmrg const float default_inner_level[2]) 399201e04c3fSmrg{ 399301e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 399401e04c3fSmrg 399501e04c3fSmrg memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); 399601e04c3fSmrg memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); 399701e04c3fSmrg rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true; 399801e04c3fSmrg} 399901e04c3fSmrg 40007ec681f3Smrgstatic void evergreen_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 40017ec681f3Smrg{ 40027ec681f3Smrg struct r600_context *rctx = (struct r600_context *)ctx; 40037ec681f3Smrg 40047ec681f3Smrg rctx->patch_vertices = patch_vertices; 40057ec681f3Smrg} 40067ec681f3Smrg 400701e04c3fSmrgstatic void evergreen_setup_immed_buffer(struct r600_context *rctx, 400801e04c3fSmrg struct r600_image_view *rview, 400901e04c3fSmrg enum pipe_format pformat) 401001e04c3fSmrg{ 401101e04c3fSmrg struct r600_screen *rscreen = (struct r600_screen *)rctx->b.b.screen; 401201e04c3fSmrg uint32_t immed_size = rscreen->b.info.max_se * 256 * 64 * util_format_get_blocksize(pformat); 401301e04c3fSmrg struct eg_buf_res_params buf_params; 401401e04c3fSmrg bool skip_reloc = false; 401501e04c3fSmrg struct r600_resource *resource = (struct r600_resource *)rview->base.resource; 401601e04c3fSmrg if (!resource->immed_buffer) { 401701e04c3fSmrg eg_resource_alloc_immed(&rscreen->b, resource, immed_size); 401801e04c3fSmrg } 401901e04c3fSmrg 402001e04c3fSmrg memset(&buf_params, 0, sizeof(buf_params)); 402101e04c3fSmrg buf_params.pipe_format = pformat; 402201e04c3fSmrg buf_params.size = resource->immed_buffer->b.b.width0; 402301e04c3fSmrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 402401e04c3fSmrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 402501e04c3fSmrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 402601e04c3fSmrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 402701e04c3fSmrg buf_params.uncached = 1; 402801e04c3fSmrg evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b, 402901e04c3fSmrg &buf_params, &skip_reloc, 403001e04c3fSmrg rview->immed_resource_words); 403101e04c3fSmrg} 403201e04c3fSmrg 403301e04c3fSmrgstatic void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, 403401e04c3fSmrg unsigned start_slot, 403501e04c3fSmrg unsigned count, 403601e04c3fSmrg const struct pipe_shader_buffer *buffers) 403701e04c3fSmrg{ 403801e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 403901e04c3fSmrg struct r600_atomic_buffer_state *astate; 404001e04c3fSmrg unsigned i, idx; 404101e04c3fSmrg 404201e04c3fSmrg astate = &rctx->atomic_buffer_state; 404301e04c3fSmrg 404401e04c3fSmrg /* we'd probably like to expand this to 8 later so put the logic in */ 404501e04c3fSmrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 404601e04c3fSmrg const struct pipe_shader_buffer *buf; 404701e04c3fSmrg struct pipe_shader_buffer *abuf; 404801e04c3fSmrg 404901e04c3fSmrg abuf = &astate->buffer[i]; 405001e04c3fSmrg 405101e04c3fSmrg if (!buffers || !buffers[idx].buffer) { 405201e04c3fSmrg pipe_resource_reference(&abuf->buffer, NULL); 405301e04c3fSmrg continue; 405401e04c3fSmrg } 405501e04c3fSmrg buf = &buffers[idx]; 405601e04c3fSmrg 405701e04c3fSmrg pipe_resource_reference(&abuf->buffer, buf->buffer); 405801e04c3fSmrg abuf->buffer_offset = buf->buffer_offset; 405901e04c3fSmrg abuf->buffer_size = buf->buffer_size; 406001e04c3fSmrg } 406101e04c3fSmrg} 406201e04c3fSmrg 406301e04c3fSmrgstatic void evergreen_set_shader_buffers(struct pipe_context *ctx, 406401e04c3fSmrg enum pipe_shader_type shader, unsigned start_slot, 406501e04c3fSmrg unsigned count, 40669f464c52Smaya const struct pipe_shader_buffer *buffers, 40679f464c52Smaya unsigned writable_bitmask) 406801e04c3fSmrg{ 406901e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 407001e04c3fSmrg struct r600_image_state *istate = NULL; 407101e04c3fSmrg struct r600_image_view *rview; 407201e04c3fSmrg struct r600_tex_color_info color; 407301e04c3fSmrg struct eg_buf_res_params buf_params; 407401e04c3fSmrg struct r600_resource *resource; 407501e04c3fSmrg unsigned i, idx; 407601e04c3fSmrg unsigned old_mask; 407701e04c3fSmrg 407801e04c3fSmrg if (shader != PIPE_SHADER_FRAGMENT && 407901e04c3fSmrg shader != PIPE_SHADER_COMPUTE && count == 0) 408001e04c3fSmrg return; 408101e04c3fSmrg 408201e04c3fSmrg if (shader == PIPE_SHADER_FRAGMENT) 408301e04c3fSmrg istate = &rctx->fragment_buffers; 408401e04c3fSmrg else if (shader == PIPE_SHADER_COMPUTE) 408501e04c3fSmrg istate = &rctx->compute_buffers; 408601e04c3fSmrg 408701e04c3fSmrg old_mask = istate->enabled_mask; 408801e04c3fSmrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 408901e04c3fSmrg const struct pipe_shader_buffer *buf; 409001e04c3fSmrg unsigned res_type; 409101e04c3fSmrg 409201e04c3fSmrg rview = &istate->views[i]; 409301e04c3fSmrg 409401e04c3fSmrg if (!buffers || !buffers[idx].buffer) { 409501e04c3fSmrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); 409601e04c3fSmrg istate->enabled_mask &= ~(1 << i); 409701e04c3fSmrg continue; 409801e04c3fSmrg } 409901e04c3fSmrg 410001e04c3fSmrg buf = &buffers[idx]; 410101e04c3fSmrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer); 410201e04c3fSmrg 410301e04c3fSmrg resource = (struct r600_resource *)rview->base.resource; 410401e04c3fSmrg 410501e04c3fSmrg evergreen_setup_immed_buffer(rctx, rview, PIPE_FORMAT_R32_UINT); 410601e04c3fSmrg 410701e04c3fSmrg color.offset = 0; 410801e04c3fSmrg color.view = 0; 410901e04c3fSmrg evergreen_set_color_surface_buffer(rctx, resource, 411001e04c3fSmrg PIPE_FORMAT_R32_UINT, 411101e04c3fSmrg buf->buffer_offset, 411201e04c3fSmrg buf->buffer_offset + buf->buffer_size, 411301e04c3fSmrg &color); 411401e04c3fSmrg 411501e04c3fSmrg res_type = V_028C70_BUFFER; 411601e04c3fSmrg 411701e04c3fSmrg rview->cb_color_base = color.offset; 411801e04c3fSmrg rview->cb_color_dim = color.dim; 411901e04c3fSmrg rview->cb_color_info = color.info | 412001e04c3fSmrg S_028C70_RAT(1) | 412101e04c3fSmrg S_028C70_RESOURCE_TYPE(res_type); 412201e04c3fSmrg rview->cb_color_pitch = color.pitch; 412301e04c3fSmrg rview->cb_color_slice = color.slice; 412401e04c3fSmrg rview->cb_color_view = color.view; 412501e04c3fSmrg rview->cb_color_attrib = color.attrib; 412601e04c3fSmrg rview->cb_color_fmask = color.fmask; 412701e04c3fSmrg rview->cb_color_fmask_slice = color.fmask_slice; 412801e04c3fSmrg 412901e04c3fSmrg memset(&buf_params, 0, sizeof(buf_params)); 413001e04c3fSmrg buf_params.pipe_format = PIPE_FORMAT_R32_UINT; 413101e04c3fSmrg buf_params.offset = buf->buffer_offset; 413201e04c3fSmrg buf_params.size = buf->buffer_size; 413301e04c3fSmrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 413401e04c3fSmrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 413501e04c3fSmrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 413601e04c3fSmrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 413701e04c3fSmrg buf_params.force_swizzle = true; 413801e04c3fSmrg buf_params.uncached = 1; 413901e04c3fSmrg buf_params.size_in_bytes = true; 414001e04c3fSmrg evergreen_fill_buffer_resource_words(rctx, &resource->b.b, 414101e04c3fSmrg &buf_params, 414201e04c3fSmrg &rview->skip_mip_address_reloc, 414301e04c3fSmrg rview->resource_words); 414401e04c3fSmrg 414501e04c3fSmrg istate->enabled_mask |= (1 << i); 414601e04c3fSmrg } 414701e04c3fSmrg 414801e04c3fSmrg istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; 414901e04c3fSmrg 415001e04c3fSmrg if (old_mask != istate->enabled_mask) 415101e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 415201e04c3fSmrg 415301e04c3fSmrg /* construct the target mask */ 415401e04c3fSmrg if (rctx->cb_misc_state.buffer_rat_enabled_mask != istate->enabled_mask) { 415501e04c3fSmrg rctx->cb_misc_state.buffer_rat_enabled_mask = istate->enabled_mask; 415601e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 415701e04c3fSmrg } 415801e04c3fSmrg 415901e04c3fSmrg if (shader == PIPE_SHADER_FRAGMENT) 416001e04c3fSmrg r600_mark_atom_dirty(rctx, &istate->atom); 416101e04c3fSmrg} 416201e04c3fSmrg 416301e04c3fSmrgstatic void evergreen_set_shader_images(struct pipe_context *ctx, 416401e04c3fSmrg enum pipe_shader_type shader, unsigned start_slot, 41657ec681f3Smrg unsigned count, unsigned unbind_num_trailing_slots, 416601e04c3fSmrg const struct pipe_image_view *images) 416701e04c3fSmrg{ 416801e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 416901e04c3fSmrg unsigned i; 417001e04c3fSmrg struct r600_image_view *rview; 417101e04c3fSmrg struct pipe_resource *image; 417201e04c3fSmrg struct r600_resource *resource; 417301e04c3fSmrg struct r600_tex_color_info color; 417401e04c3fSmrg struct eg_buf_res_params buf_params; 417501e04c3fSmrg struct eg_tex_res_params tex_params; 417601e04c3fSmrg unsigned old_mask; 417701e04c3fSmrg struct r600_image_state *istate = NULL; 417801e04c3fSmrg int idx; 41797ec681f3Smrg if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE) 41807ec681f3Smrg return; 41817ec681f3Smrg if (!count && !unbind_num_trailing_slots) 418201e04c3fSmrg return; 418301e04c3fSmrg 418401e04c3fSmrg if (shader == PIPE_SHADER_FRAGMENT) 418501e04c3fSmrg istate = &rctx->fragment_images; 418601e04c3fSmrg else if (shader == PIPE_SHADER_COMPUTE) 418701e04c3fSmrg istate = &rctx->compute_images; 418801e04c3fSmrg 418901e04c3fSmrg assert (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE); 419001e04c3fSmrg 419101e04c3fSmrg old_mask = istate->enabled_mask; 419201e04c3fSmrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 419301e04c3fSmrg unsigned res_type; 419401e04c3fSmrg const struct pipe_image_view *iview; 419501e04c3fSmrg rview = &istate->views[i]; 419601e04c3fSmrg 419701e04c3fSmrg if (!images || !images[idx].resource) { 419801e04c3fSmrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); 419901e04c3fSmrg istate->enabled_mask &= ~(1 << i); 420001e04c3fSmrg istate->compressed_colortex_mask &= ~(1 << i); 420101e04c3fSmrg istate->compressed_depthtex_mask &= ~(1 << i); 420201e04c3fSmrg continue; 420301e04c3fSmrg } 420401e04c3fSmrg 420501e04c3fSmrg iview = &images[idx]; 420601e04c3fSmrg image = iview->resource; 420701e04c3fSmrg resource = (struct r600_resource *)image; 420801e04c3fSmrg 420901e04c3fSmrg r600_context_add_resource_size(ctx, image); 421001e04c3fSmrg 421101e04c3fSmrg rview->base = *iview; 421201e04c3fSmrg rview->base.resource = NULL; 421301e04c3fSmrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, image); 421401e04c3fSmrg 421501e04c3fSmrg evergreen_setup_immed_buffer(rctx, rview, iview->format); 421601e04c3fSmrg 421701e04c3fSmrg bool is_buffer = image->target == PIPE_BUFFER; 421801e04c3fSmrg struct r600_texture *rtex = (struct r600_texture *)image; 421901e04c3fSmrg if (!is_buffer & rtex->db_compatible) 422001e04c3fSmrg istate->compressed_depthtex_mask |= 1 << i; 422101e04c3fSmrg else 422201e04c3fSmrg istate->compressed_depthtex_mask &= ~(1 << i); 422301e04c3fSmrg 422401e04c3fSmrg if (!is_buffer && rtex->cmask.size) 422501e04c3fSmrg istate->compressed_colortex_mask |= 1 << i; 422601e04c3fSmrg else 422701e04c3fSmrg istate->compressed_colortex_mask &= ~(1 << i); 422801e04c3fSmrg if (!is_buffer) { 422901e04c3fSmrg 423001e04c3fSmrg evergreen_set_color_surface_common(rctx, rtex, 423101e04c3fSmrg iview->u.tex.level, 423201e04c3fSmrg iview->u.tex.first_layer, 423301e04c3fSmrg iview->u.tex.last_layer, 423401e04c3fSmrg iview->format, 423501e04c3fSmrg &color); 423601e04c3fSmrg color.dim = S_028C78_WIDTH_MAX(u_minify(image->width0, iview->u.tex.level) - 1) | 423701e04c3fSmrg S_028C78_HEIGHT_MAX(u_minify(image->height0, iview->u.tex.level) - 1); 423801e04c3fSmrg } else { 423901e04c3fSmrg color.offset = 0; 424001e04c3fSmrg color.view = 0; 424101e04c3fSmrg evergreen_set_color_surface_buffer(rctx, resource, 424201e04c3fSmrg iview->format, 424301e04c3fSmrg iview->u.buf.offset, 424401e04c3fSmrg iview->u.buf.size, 424501e04c3fSmrg &color); 424601e04c3fSmrg } 424701e04c3fSmrg 424801e04c3fSmrg switch (image->target) { 424901e04c3fSmrg case PIPE_BUFFER: 425001e04c3fSmrg res_type = V_028C70_BUFFER; 425101e04c3fSmrg break; 425201e04c3fSmrg case PIPE_TEXTURE_1D: 425301e04c3fSmrg res_type = V_028C70_TEXTURE1D; 425401e04c3fSmrg break; 425501e04c3fSmrg case PIPE_TEXTURE_1D_ARRAY: 425601e04c3fSmrg res_type = V_028C70_TEXTURE1DARRAY; 425701e04c3fSmrg break; 425801e04c3fSmrg case PIPE_TEXTURE_2D: 425901e04c3fSmrg case PIPE_TEXTURE_RECT: 426001e04c3fSmrg res_type = V_028C70_TEXTURE2D; 426101e04c3fSmrg break; 426201e04c3fSmrg case PIPE_TEXTURE_3D: 426301e04c3fSmrg res_type = V_028C70_TEXTURE3D; 426401e04c3fSmrg break; 426501e04c3fSmrg case PIPE_TEXTURE_2D_ARRAY: 426601e04c3fSmrg case PIPE_TEXTURE_CUBE: 426701e04c3fSmrg case PIPE_TEXTURE_CUBE_ARRAY: 426801e04c3fSmrg res_type = V_028C70_TEXTURE2DARRAY; 426901e04c3fSmrg break; 427001e04c3fSmrg default: 427101e04c3fSmrg assert(0); 427201e04c3fSmrg res_type = 0; 427301e04c3fSmrg break; 427401e04c3fSmrg } 427501e04c3fSmrg 427601e04c3fSmrg rview->cb_color_base = color.offset; 427701e04c3fSmrg rview->cb_color_dim = color.dim; 427801e04c3fSmrg rview->cb_color_info = color.info | 427901e04c3fSmrg S_028C70_RAT(1) | 428001e04c3fSmrg S_028C70_RESOURCE_TYPE(res_type); 428101e04c3fSmrg rview->cb_color_pitch = color.pitch; 428201e04c3fSmrg rview->cb_color_slice = color.slice; 428301e04c3fSmrg rview->cb_color_view = color.view; 428401e04c3fSmrg rview->cb_color_attrib = color.attrib; 428501e04c3fSmrg rview->cb_color_fmask = color.fmask; 428601e04c3fSmrg rview->cb_color_fmask_slice = color.fmask_slice; 428701e04c3fSmrg 428801e04c3fSmrg if (image->target != PIPE_BUFFER) { 428901e04c3fSmrg memset(&tex_params, 0, sizeof(tex_params)); 429001e04c3fSmrg tex_params.pipe_format = iview->format; 429101e04c3fSmrg tex_params.force_level = 0; 429201e04c3fSmrg tex_params.width0 = image->width0; 429301e04c3fSmrg tex_params.height0 = image->height0; 429401e04c3fSmrg tex_params.first_level = iview->u.tex.level; 429501e04c3fSmrg tex_params.last_level = iview->u.tex.level; 429601e04c3fSmrg tex_params.first_layer = iview->u.tex.first_layer; 429701e04c3fSmrg tex_params.last_layer = iview->u.tex.last_layer; 429801e04c3fSmrg tex_params.target = image->target; 429901e04c3fSmrg tex_params.swizzle[0] = PIPE_SWIZZLE_X; 430001e04c3fSmrg tex_params.swizzle[1] = PIPE_SWIZZLE_Y; 430101e04c3fSmrg tex_params.swizzle[2] = PIPE_SWIZZLE_Z; 430201e04c3fSmrg tex_params.swizzle[3] = PIPE_SWIZZLE_W; 430301e04c3fSmrg evergreen_fill_tex_resource_words(rctx, &resource->b.b, &tex_params, 430401e04c3fSmrg &rview->skip_mip_address_reloc, 430501e04c3fSmrg rview->resource_words); 430601e04c3fSmrg 430701e04c3fSmrg } else { 430801e04c3fSmrg memset(&buf_params, 0, sizeof(buf_params)); 430901e04c3fSmrg buf_params.pipe_format = iview->format; 431001e04c3fSmrg buf_params.size = iview->u.buf.size; 431101e04c3fSmrg buf_params.offset = iview->u.buf.offset; 431201e04c3fSmrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 431301e04c3fSmrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 431401e04c3fSmrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 431501e04c3fSmrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 431601e04c3fSmrg evergreen_fill_buffer_resource_words(rctx, &resource->b.b, 431701e04c3fSmrg &buf_params, 431801e04c3fSmrg &rview->skip_mip_address_reloc, 431901e04c3fSmrg rview->resource_words); 432001e04c3fSmrg } 432101e04c3fSmrg istate->enabled_mask |= (1 << i); 432201e04c3fSmrg } 432301e04c3fSmrg 43247ec681f3Smrg for (i = start_slot + count, idx = 0; 43257ec681f3Smrg i < start_slot + count + unbind_num_trailing_slots; i++, idx++) { 43267ec681f3Smrg rview = &istate->views[i]; 43277ec681f3Smrg 43287ec681f3Smrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); 43297ec681f3Smrg istate->enabled_mask &= ~(1 << i); 43307ec681f3Smrg istate->compressed_colortex_mask &= ~(1 << i); 43317ec681f3Smrg istate->compressed_depthtex_mask &= ~(1 << i); 43327ec681f3Smrg } 43337ec681f3Smrg 433401e04c3fSmrg istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; 433501e04c3fSmrg istate->dirty_buffer_constants = TRUE; 433601e04c3fSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; 433701e04c3fSmrg rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | 433801e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_CB_META; 433901e04c3fSmrg 434001e04c3fSmrg if (old_mask != istate->enabled_mask) 434101e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 434201e04c3fSmrg 434301e04c3fSmrg if (rctx->cb_misc_state.image_rat_enabled_mask != istate->enabled_mask) { 434401e04c3fSmrg rctx->cb_misc_state.image_rat_enabled_mask = istate->enabled_mask; 434501e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 434601e04c3fSmrg } 434701e04c3fSmrg 434801e04c3fSmrg if (shader == PIPE_SHADER_FRAGMENT) 434901e04c3fSmrg r600_mark_atom_dirty(rctx, &istate->atom); 435001e04c3fSmrg} 435101e04c3fSmrg 435201e04c3fSmrgstatic void evergreen_get_pipe_constant_buffer(struct r600_context *rctx, 435301e04c3fSmrg enum pipe_shader_type shader, uint slot, 435401e04c3fSmrg struct pipe_constant_buffer *cbuf) 435501e04c3fSmrg{ 435601e04c3fSmrg struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; 435701e04c3fSmrg struct pipe_constant_buffer *cb; 435801e04c3fSmrg cbuf->user_buffer = NULL; 435901e04c3fSmrg 436001e04c3fSmrg cb = &state->cb[slot]; 436101e04c3fSmrg 436201e04c3fSmrg cbuf->buffer_size = cb->buffer_size; 436301e04c3fSmrg pipe_resource_reference(&cbuf->buffer, cb->buffer); 436401e04c3fSmrg} 436501e04c3fSmrg 436601e04c3fSmrgstatic void evergreen_get_shader_buffers(struct r600_context *rctx, 436701e04c3fSmrg enum pipe_shader_type shader, 436801e04c3fSmrg uint start_slot, uint count, 436901e04c3fSmrg struct pipe_shader_buffer *sbuf) 437001e04c3fSmrg{ 437101e04c3fSmrg assert(shader == PIPE_SHADER_COMPUTE); 437201e04c3fSmrg int idx, i; 437301e04c3fSmrg struct r600_image_state *istate = &rctx->compute_buffers; 437401e04c3fSmrg struct r600_image_view *rview; 437501e04c3fSmrg 437601e04c3fSmrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 437701e04c3fSmrg 437801e04c3fSmrg rview = &istate->views[i]; 437901e04c3fSmrg 438001e04c3fSmrg pipe_resource_reference(&sbuf[idx].buffer, rview->base.resource); 438101e04c3fSmrg if (rview->base.resource) { 438201e04c3fSmrg uint64_t rview_va = ((struct r600_resource *)rview->base.resource)->gpu_address; 438301e04c3fSmrg 438401e04c3fSmrg uint64_t prog_va = rview->resource_words[0]; 438501e04c3fSmrg 438601e04c3fSmrg prog_va += ((uint64_t)G_030008_BASE_ADDRESS_HI(rview->resource_words[2])) << 32; 438701e04c3fSmrg prog_va -= rview_va; 438801e04c3fSmrg 438901e04c3fSmrg sbuf[idx].buffer_offset = prog_va & 0xffffffff; 439001e04c3fSmrg sbuf[idx].buffer_size = rview->resource_words[1] + 1;; 439101e04c3fSmrg } else { 439201e04c3fSmrg sbuf[idx].buffer_offset = 0; 439301e04c3fSmrg sbuf[idx].buffer_size = 0; 439401e04c3fSmrg } 439501e04c3fSmrg } 439601e04c3fSmrg} 439701e04c3fSmrg 439801e04c3fSmrgstatic void evergreen_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 439901e04c3fSmrg{ 440001e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 440101e04c3fSmrg st->saved_compute = rctx->cs_shader_state.shader; 440201e04c3fSmrg 440301e04c3fSmrg /* save constant buffer 0 */ 440401e04c3fSmrg evergreen_get_pipe_constant_buffer(rctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 440501e04c3fSmrg /* save ssbo 0 */ 440601e04c3fSmrg evergreen_get_shader_buffers(rctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 440701e04c3fSmrg} 440801e04c3fSmrg 440901e04c3fSmrg 4410af69d88dSmrgvoid evergreen_init_state_functions(struct r600_context *rctx) 4411af69d88dSmrg{ 441201e04c3fSmrg unsigned id = 1; 441301e04c3fSmrg unsigned i; 4414af69d88dSmrg /* !!! 441501e04c3fSmrg * To avoid GPU lockup registers must be emitted in a specific order 4416af69d88dSmrg * (no kidding ...). The order below is important and have been 441701e04c3fSmrg * partially inferred from analyzing fglrx command stream. 4418af69d88dSmrg * 4419af69d88dSmrg * Don't reorder atom without carefully checking the effect (GPU lockup 4420af69d88dSmrg * or piglit regression). 4421af69d88dSmrg * !!! 4422af69d88dSmrg */ 442301e04c3fSmrg if (rctx->b.chip_class == EVERGREEN) { 442401e04c3fSmrg r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11); 442501e04c3fSmrg rctx->config_state.dyn_gpr_enabled = true; 442601e04c3fSmrg } 4427af69d88dSmrg r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); 442801e04c3fSmrg r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0); 442901e04c3fSmrg r600_init_atom(rctx, &rctx->compute_images.atom, id++, evergreen_emit_compute_image_state, 0); 443001e04c3fSmrg r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0); 443101e04c3fSmrg r600_init_atom(rctx, &rctx->compute_buffers.atom, id++, evergreen_emit_compute_buffer_state, 0); 4432af69d88dSmrg /* shader const */ 4433af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0); 4434af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0); 4435af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, evergreen_emit_ps_constant_buffers, 0); 443601e04c3fSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL].atom, id++, evergreen_emit_tcs_constant_buffers, 0); 443701e04c3fSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL].atom, id++, evergreen_emit_tes_constant_buffers, 0); 4438af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom, id++, evergreen_emit_cs_constant_buffers, 0); 4439af69d88dSmrg /* shader program */ 4440af69d88dSmrg r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0); 4441af69d88dSmrg /* sampler */ 4442af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0); 4443af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0); 444401e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].states.atom, id++, evergreen_emit_tcs_sampler_states, 0); 444501e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].states.atom, id++, evergreen_emit_tes_sampler_states, 0); 4446af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0); 444701e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0); 4448af69d88dSmrg /* resources */ 4449af69d88dSmrg r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0); 4450af69d88dSmrg r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0); 4451af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0); 4452af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0); 445301e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views.atom, id++, evergreen_emit_tcs_sampler_views, 0); 445401e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views.atom, id++, evergreen_emit_tes_sampler_views, 0); 4455af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0); 445601e04c3fSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0); 4457af69d88dSmrg 445801e04c3fSmrg r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); 4459af69d88dSmrg 4460af69d88dSmrg if (rctx->b.chip_class == EVERGREEN) { 4461af69d88dSmrg r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3); 4462af69d88dSmrg } else { 4463af69d88dSmrg r600_init_atom(rctx, &rctx->sample_mask.atom, id++, cayman_emit_sample_mask, 4); 4464af69d88dSmrg } 4465af69d88dSmrg rctx->sample_mask.sample_mask = ~0; 4466af69d88dSmrg 4467af69d88dSmrg r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6); 4468af69d88dSmrg r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6); 4469af69d88dSmrg r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0); 4470af69d88dSmrg r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4); 447101e04c3fSmrg r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 9); 4472af69d88dSmrg r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26); 4473af69d88dSmrg r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10); 4474af69d88dSmrg r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14); 4475af69d88dSmrg r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0); 447601e04c3fSmrg r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 9); 4477af69d88dSmrg r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0); 447801e04c3fSmrg r600_add_atom(rctx, &rctx->b.scissors.atom, id++); 447901e04c3fSmrg r600_add_atom(rctx, &rctx->b.viewports.atom, id++); 4480af69d88dSmrg r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); 4481af69d88dSmrg r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); 448201e04c3fSmrg r600_add_atom(rctx, &rctx->b.render_cond_atom, id++); 448301e04c3fSmrg r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); 448401e04c3fSmrg r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); 448501e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) 448601e04c3fSmrg r600_init_atom(rctx, &rctx->hw_shader_stages[i].atom, id++, r600_emit_shader, 0); 448701e04c3fSmrg r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 15); 4488af69d88dSmrg r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26); 4489af69d88dSmrg 4490af69d88dSmrg rctx->b.b.create_blend_state = evergreen_create_blend_state; 4491af69d88dSmrg rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state; 4492af69d88dSmrg rctx->b.b.create_rasterizer_state = evergreen_create_rs_state; 4493af69d88dSmrg rctx->b.b.create_sampler_state = evergreen_create_sampler_state; 4494af69d88dSmrg rctx->b.b.create_sampler_view = evergreen_create_sampler_view; 4495af69d88dSmrg rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state; 4496af69d88dSmrg rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple; 449701e04c3fSmrg rctx->b.b.set_min_samples = evergreen_set_min_samples; 449801e04c3fSmrg rctx->b.b.set_tess_state = evergreen_set_tess_state; 44997ec681f3Smrg rctx->b.b.set_patch_vertices = evergreen_set_patch_vertices; 450001e04c3fSmrg rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers; 450101e04c3fSmrg rctx->b.b.set_shader_images = evergreen_set_shader_images; 450201e04c3fSmrg rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers; 4503af69d88dSmrg if (rctx->b.chip_class == EVERGREEN) 4504af69d88dSmrg rctx->b.b.get_sample_position = evergreen_get_sample_position; 4505af69d88dSmrg else 4506af69d88dSmrg rctx->b.b.get_sample_position = cayman_get_sample_position; 4507af69d88dSmrg rctx->b.dma_copy = evergreen_dma_copy; 450801e04c3fSmrg rctx->b.save_qbo_state = evergreen_save_qbo_state; 4509af69d88dSmrg 4510af69d88dSmrg evergreen_init_compute_state_functions(rctx); 45113464ebd5Sriastradh} 451201e04c3fSmrg 451301e04c3fSmrg/** 451401e04c3fSmrg * This calculates the LDS size for tessellation shaders (VS, TCS, TES). 451501e04c3fSmrg * 451601e04c3fSmrg * The information about LDS and other non-compile-time parameters is then 451701e04c3fSmrg * written to the const buffer. 451801e04c3fSmrg 451901e04c3fSmrg * const buffer contains - 452001e04c3fSmrg * uint32_t input_patch_size 452101e04c3fSmrg * uint32_t input_vertex_size 452201e04c3fSmrg * uint32_t num_tcs_input_cp 452301e04c3fSmrg * uint32_t num_tcs_output_cp; 452401e04c3fSmrg * uint32_t output_patch_size 452501e04c3fSmrg * uint32_t output_vertex_size 452601e04c3fSmrg * uint32_t output_patch0_offset 452701e04c3fSmrg * uint32_t perpatch_output_offset 452801e04c3fSmrg * and the same constbuf is bound to LS/HS/VS(ES). 452901e04c3fSmrg */ 453001e04c3fSmrgvoid evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches) 453101e04c3fSmrg{ 453201e04c3fSmrg struct pipe_constant_buffer constbuf = {0}; 453301e04c3fSmrg struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader; 453401e04c3fSmrg struct r600_pipe_shader_selector *ls = rctx->vs_shader; 45357ec681f3Smrg unsigned num_tcs_input_cp = rctx->patch_vertices; 453601e04c3fSmrg unsigned num_tcs_outputs; 453701e04c3fSmrg unsigned num_tcs_output_cp; 453801e04c3fSmrg unsigned num_tcs_patch_outputs; 453901e04c3fSmrg unsigned num_tcs_inputs; 454001e04c3fSmrg unsigned input_vertex_size, output_vertex_size; 454101e04c3fSmrg unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; 454201e04c3fSmrg unsigned output_patch0_offset, perpatch_output_offset, lds_size; 454301e04c3fSmrg uint32_t values[8]; 454401e04c3fSmrg unsigned num_waves; 454501e04c3fSmrg unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; 454601e04c3fSmrg unsigned wave_divisor = (16 * num_pipes); 454701e04c3fSmrg 454801e04c3fSmrg *num_patches = 1; 454901e04c3fSmrg 455001e04c3fSmrg if (!rctx->tes_shader) { 455101e04c3fSmrg rctx->lds_alloc = 0; 455201e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 45537ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, false, NULL); 455401e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, 45557ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, false, NULL); 455601e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 45577ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, false, NULL); 455801e04c3fSmrg return; 455901e04c3fSmrg } 456001e04c3fSmrg 456101e04c3fSmrg if (rctx->lds_alloc != 0 && 456201e04c3fSmrg rctx->last_ls == ls && 456301e04c3fSmrg rctx->last_num_tcs_input_cp == num_tcs_input_cp && 456401e04c3fSmrg rctx->last_tcs == tcs) 456501e04c3fSmrg return; 456601e04c3fSmrg 456701e04c3fSmrg num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask); 456801e04c3fSmrg 456901e04c3fSmrg if (rctx->tcs_shader) { 457001e04c3fSmrg num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask); 457101e04c3fSmrg num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; 457201e04c3fSmrg num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask); 457301e04c3fSmrg } else { 457401e04c3fSmrg num_tcs_outputs = num_tcs_inputs; 457501e04c3fSmrg num_tcs_output_cp = num_tcs_input_cp; 457601e04c3fSmrg num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ 457701e04c3fSmrg } 457801e04c3fSmrg 457901e04c3fSmrg /* size in bytes */ 458001e04c3fSmrg input_vertex_size = num_tcs_inputs * 16; 458101e04c3fSmrg output_vertex_size = num_tcs_outputs * 16; 458201e04c3fSmrg 458301e04c3fSmrg input_patch_size = num_tcs_input_cp * input_vertex_size; 458401e04c3fSmrg 458501e04c3fSmrg pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; 458601e04c3fSmrg output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; 458701e04c3fSmrg 458801e04c3fSmrg output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0; 458901e04c3fSmrg perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; 459001e04c3fSmrg 459101e04c3fSmrg lds_size = output_patch0_offset + output_patch_size * *num_patches; 459201e04c3fSmrg 459301e04c3fSmrg values[0] = input_patch_size; 459401e04c3fSmrg values[1] = input_vertex_size; 459501e04c3fSmrg values[2] = num_tcs_input_cp; 459601e04c3fSmrg values[3] = num_tcs_output_cp; 459701e04c3fSmrg 459801e04c3fSmrg values[4] = output_patch_size; 459901e04c3fSmrg values[5] = output_vertex_size; 460001e04c3fSmrg values[6] = output_patch0_offset; 460101e04c3fSmrg values[7] = perpatch_output_offset; 460201e04c3fSmrg 460301e04c3fSmrg /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES * 460401e04c3fSmrg LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */ 460501e04c3fSmrg num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor); 460601e04c3fSmrg 460701e04c3fSmrg rctx->lds_alloc = (lds_size | (num_waves << 14)); 460801e04c3fSmrg 460901e04c3fSmrg rctx->last_ls = ls; 461001e04c3fSmrg rctx->last_tcs = tcs; 461101e04c3fSmrg rctx->last_num_tcs_input_cp = num_tcs_input_cp; 461201e04c3fSmrg 461301e04c3fSmrg constbuf.user_buffer = values; 461401e04c3fSmrg constbuf.buffer_size = 8 * 4; 461501e04c3fSmrg 461601e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 46177ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, false, &constbuf); 461801e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, 46197ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, false, &constbuf); 462001e04c3fSmrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 46217ec681f3Smrg R600_LDS_INFO_CONST_BUFFER, true, &constbuf); 462201e04c3fSmrg} 462301e04c3fSmrg 462401e04c3fSmrguint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, 462501e04c3fSmrg const struct pipe_draw_info *info, 462601e04c3fSmrg unsigned num_patches) 462701e04c3fSmrg{ 462801e04c3fSmrg unsigned num_output_cp; 462901e04c3fSmrg 463001e04c3fSmrg if (!rctx->tes_shader) 463101e04c3fSmrg return 0; 463201e04c3fSmrg 463301e04c3fSmrg num_output_cp = rctx->tcs_shader ? 463401e04c3fSmrg rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 46357ec681f3Smrg rctx->patch_vertices; 463601e04c3fSmrg 463701e04c3fSmrg return S_028B58_NUM_PATCHES(num_patches) | 46387ec681f3Smrg S_028B58_HS_NUM_INPUT_CP(rctx->patch_vertices) | 463901e04c3fSmrg S_028B58_HS_NUM_OUTPUT_CP(num_output_cp); 464001e04c3fSmrg} 464101e04c3fSmrg 464201e04c3fSmrgvoid evergreen_set_ls_hs_config(struct r600_context *rctx, 464301e04c3fSmrg struct radeon_cmdbuf *cs, 464401e04c3fSmrg uint32_t ls_hs_config) 464501e04c3fSmrg{ 464601e04c3fSmrg radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); 464701e04c3fSmrg} 464801e04c3fSmrg 464901e04c3fSmrgvoid evergreen_set_lds_alloc(struct r600_context *rctx, 465001e04c3fSmrg struct radeon_cmdbuf *cs, 465101e04c3fSmrg uint32_t lds_alloc) 465201e04c3fSmrg{ 465301e04c3fSmrg radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc); 465401e04c3fSmrg} 465501e04c3fSmrg 465601e04c3fSmrg/* on evergreen if you are running tessellation you need to disable dynamic 465701e04c3fSmrg GPRs to workaround a hardware bug.*/ 465801e04c3fSmrgbool evergreen_adjust_gprs(struct r600_context *rctx) 465901e04c3fSmrg{ 466001e04c3fSmrg unsigned num_gprs[EG_NUM_HW_STAGES]; 466101e04c3fSmrg unsigned def_gprs[EG_NUM_HW_STAGES]; 466201e04c3fSmrg unsigned cur_gprs[EG_NUM_HW_STAGES]; 466301e04c3fSmrg unsigned new_gprs[EG_NUM_HW_STAGES]; 466401e04c3fSmrg unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; 466501e04c3fSmrg unsigned max_gprs; 466601e04c3fSmrg unsigned i; 466701e04c3fSmrg unsigned total_gprs; 466801e04c3fSmrg unsigned tmp[3]; 466901e04c3fSmrg bool rework = false, set_default = false, set_dirty = false; 467001e04c3fSmrg max_gprs = 0; 467101e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 467201e04c3fSmrg def_gprs[i] = rctx->default_gprs[i]; 467301e04c3fSmrg max_gprs += def_gprs[i]; 467401e04c3fSmrg } 467501e04c3fSmrg max_gprs += def_num_clause_temp_gprs * 2; 467601e04c3fSmrg 467701e04c3fSmrg /* if we have no TESS and dyn gpr is enabled then do nothing. */ 467801e04c3fSmrg if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader) { 467901e04c3fSmrg if (rctx->config_state.dyn_gpr_enabled) 468001e04c3fSmrg return true; 468101e04c3fSmrg 468201e04c3fSmrg /* transition back to dyn gpr enabled state */ 468301e04c3fSmrg rctx->config_state.dyn_gpr_enabled = true; 468401e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->config_state.atom); 468501e04c3fSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 468601e04c3fSmrg return true; 468701e04c3fSmrg } 468801e04c3fSmrg 468901e04c3fSmrg 469001e04c3fSmrg /* gather required shader gprs */ 469101e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 469201e04c3fSmrg if (rctx->hw_shader_stages[i].shader) 469301e04c3fSmrg num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr; 469401e04c3fSmrg else 469501e04c3fSmrg num_gprs[i] = 0; 469601e04c3fSmrg } 469701e04c3fSmrg 469801e04c3fSmrg cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 469901e04c3fSmrg cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 470001e04c3fSmrg cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 470101e04c3fSmrg cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 470201e04c3fSmrg cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); 470301e04c3fSmrg cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); 470401e04c3fSmrg 470501e04c3fSmrg total_gprs = 0; 470601e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 470701e04c3fSmrg new_gprs[i] = num_gprs[i]; 470801e04c3fSmrg total_gprs += num_gprs[i]; 470901e04c3fSmrg } 471001e04c3fSmrg 471101e04c3fSmrg if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs))) 471201e04c3fSmrg return false; 471301e04c3fSmrg 471401e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 471501e04c3fSmrg if (new_gprs[i] > cur_gprs[i]) { 471601e04c3fSmrg rework = true; 471701e04c3fSmrg break; 471801e04c3fSmrg } 471901e04c3fSmrg } 472001e04c3fSmrg 472101e04c3fSmrg if (rctx->config_state.dyn_gpr_enabled) { 472201e04c3fSmrg set_dirty = true; 472301e04c3fSmrg rctx->config_state.dyn_gpr_enabled = false; 472401e04c3fSmrg } 472501e04c3fSmrg 472601e04c3fSmrg if (rework) { 472701e04c3fSmrg set_default = true; 472801e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 472901e04c3fSmrg if (new_gprs[i] > def_gprs[i]) 473001e04c3fSmrg set_default = false; 473101e04c3fSmrg } 473201e04c3fSmrg 473301e04c3fSmrg if (set_default) { 473401e04c3fSmrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 473501e04c3fSmrg new_gprs[i] = def_gprs[i]; 473601e04c3fSmrg } 473701e04c3fSmrg } else { 473801e04c3fSmrg unsigned ps_value = max_gprs; 473901e04c3fSmrg 474001e04c3fSmrg ps_value -= (def_num_clause_temp_gprs * 2); 474101e04c3fSmrg for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++) 474201e04c3fSmrg ps_value -= new_gprs[i]; 474301e04c3fSmrg 474401e04c3fSmrg new_gprs[R600_HW_STAGE_PS] = ps_value; 474501e04c3fSmrg } 474601e04c3fSmrg 474701e04c3fSmrg tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | 474801e04c3fSmrg S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | 474901e04c3fSmrg S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); 475001e04c3fSmrg 475101e04c3fSmrg tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | 475201e04c3fSmrg S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); 475301e04c3fSmrg 475401e04c3fSmrg tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) | 475501e04c3fSmrg S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]); 475601e04c3fSmrg 475701e04c3fSmrg if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] || 475801e04c3fSmrg rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] || 475901e04c3fSmrg rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) { 476001e04c3fSmrg rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0]; 476101e04c3fSmrg rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1]; 476201e04c3fSmrg rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2]; 476301e04c3fSmrg set_dirty = true; 476401e04c3fSmrg } 476501e04c3fSmrg } 476601e04c3fSmrg 476701e04c3fSmrg 476801e04c3fSmrg if (set_dirty) { 476901e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->config_state.atom); 477001e04c3fSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 477101e04c3fSmrg } 477201e04c3fSmrg return true; 477301e04c3fSmrg} 477401e04c3fSmrg 477501e04c3fSmrg#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) 477601e04c3fSmrg 477701e04c3fSmrgvoid eg_trace_emit(struct r600_context *rctx) 477801e04c3fSmrg{ 47797ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 478001e04c3fSmrg unsigned reloc; 478101e04c3fSmrg 478201e04c3fSmrg if (rctx->b.chip_class < EVERGREEN) 478301e04c3fSmrg return; 478401e04c3fSmrg 478501e04c3fSmrg /* This must be done after r600_need_cs_space. */ 478601e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 478701e04c3fSmrg (struct r600_resource*)rctx->trace_buf, RADEON_USAGE_WRITE, 478801e04c3fSmrg RADEON_PRIO_CP_DMA); 478901e04c3fSmrg 479001e04c3fSmrg rctx->trace_id++; 479101e04c3fSmrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rctx->trace_buf, 479201e04c3fSmrg RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); 479301e04c3fSmrg radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); 479401e04c3fSmrg radeon_emit(cs, rctx->trace_buf->gpu_address); 479501e04c3fSmrg radeon_emit(cs, rctx->trace_buf->gpu_address >> 32 | MEM_WRITE_32_BITS | MEM_WRITE_CONFIRM); 479601e04c3fSmrg radeon_emit(cs, rctx->trace_id); 479701e04c3fSmrg radeon_emit(cs, 0); 479801e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 479901e04c3fSmrg radeon_emit(cs, reloc); 480001e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 480101e04c3fSmrg radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id)); 480201e04c3fSmrg} 480301e04c3fSmrg 480401e04c3fSmrgstatic void evergreen_emit_set_append_cnt(struct r600_context *rctx, 480501e04c3fSmrg struct r600_shader_atomic *atomic, 480601e04c3fSmrg struct r600_resource *resource, 480701e04c3fSmrg uint32_t pkt_flags) 480801e04c3fSmrg{ 48097ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 481001e04c3fSmrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 481101e04c3fSmrg resource, 481201e04c3fSmrg RADEON_USAGE_READ, 481301e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 481401e04c3fSmrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 481501e04c3fSmrg uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; 481601e04c3fSmrg 481701e04c3fSmrg uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; 481801e04c3fSmrg 481901e04c3fSmrg radeon_emit(cs, PKT3(PKT3_SET_APPEND_CNT, 2, 0) | pkt_flags); 482001e04c3fSmrg radeon_emit(cs, (reg_val << 16) | 0x3); 482101e04c3fSmrg radeon_emit(cs, dst_offset & 0xfffffffc); 482201e04c3fSmrg radeon_emit(cs, (dst_offset >> 32) & 0xff); 482301e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 482401e04c3fSmrg radeon_emit(cs, reloc); 482501e04c3fSmrg} 482601e04c3fSmrg 482701e04c3fSmrgstatic void evergreen_emit_event_write_eos(struct r600_context *rctx, 482801e04c3fSmrg struct r600_shader_atomic *atomic, 482901e04c3fSmrg struct r600_resource *resource, 483001e04c3fSmrg uint32_t pkt_flags) 483101e04c3fSmrg{ 48327ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 483301e04c3fSmrg uint32_t event = EVENT_TYPE_PS_DONE; 483401e04c3fSmrg uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; 483501e04c3fSmrg uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 483601e04c3fSmrg resource, 483701e04c3fSmrg RADEON_USAGE_WRITE, 483801e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 483901e04c3fSmrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 484001e04c3fSmrg uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2; 484101e04c3fSmrg 484201e04c3fSmrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 484301e04c3fSmrg event = EVENT_TYPE_CS_DONE; 484401e04c3fSmrg 484501e04c3fSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 484601e04c3fSmrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 484701e04c3fSmrg radeon_emit(cs, (dst_offset) & 0xffffffff); 484801e04c3fSmrg radeon_emit(cs, (0 << 29) | ((dst_offset >> 32) & 0xff)); 484901e04c3fSmrg radeon_emit(cs, reg_val); 485001e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 485101e04c3fSmrg radeon_emit(cs, reloc); 485201e04c3fSmrg} 485301e04c3fSmrg 485401e04c3fSmrgstatic void cayman_emit_event_write_eos(struct r600_context *rctx, 485501e04c3fSmrg struct r600_shader_atomic *atomic, 485601e04c3fSmrg struct r600_resource *resource, 485701e04c3fSmrg uint32_t pkt_flags) 485801e04c3fSmrg{ 48597ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 486001e04c3fSmrg uint32_t event = EVENT_TYPE_PS_DONE; 486101e04c3fSmrg uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 486201e04c3fSmrg resource, 486301e04c3fSmrg RADEON_USAGE_WRITE, 486401e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 486501e04c3fSmrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 486601e04c3fSmrg 486701e04c3fSmrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 486801e04c3fSmrg event = EVENT_TYPE_CS_DONE; 486901e04c3fSmrg 487001e04c3fSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 487101e04c3fSmrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 487201e04c3fSmrg radeon_emit(cs, (dst_offset) & 0xffffffff); 487301e04c3fSmrg radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff)); 487401e04c3fSmrg radeon_emit(cs, (atomic->hw_idx) | (1 << 16)); 487501e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 487601e04c3fSmrg radeon_emit(cs, reloc); 487701e04c3fSmrg} 487801e04c3fSmrg 487901e04c3fSmrg/* writes count from a buffer into GDS */ 488001e04c3fSmrgstatic void cayman_write_count_to_gds(struct r600_context *rctx, 488101e04c3fSmrg struct r600_shader_atomic *atomic, 488201e04c3fSmrg struct r600_resource *resource, 488301e04c3fSmrg uint32_t pkt_flags) 488401e04c3fSmrg{ 48857ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 488601e04c3fSmrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 488701e04c3fSmrg resource, 488801e04c3fSmrg RADEON_USAGE_READ, 488901e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 489001e04c3fSmrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 489101e04c3fSmrg 489201e04c3fSmrg radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags); 489301e04c3fSmrg radeon_emit(cs, dst_offset & 0xffffffff); 489401e04c3fSmrg radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset >> 32) & 0xff));// GDS 489501e04c3fSmrg radeon_emit(cs, atomic->hw_idx * 4); 489601e04c3fSmrg radeon_emit(cs, 0); 489701e04c3fSmrg radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4); 489801e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 489901e04c3fSmrg radeon_emit(cs, reloc); 490001e04c3fSmrg} 490101e04c3fSmrg 490201e04c3fSmrgvoid evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, 490301e04c3fSmrg struct r600_pipe_shader *cs_shader, 490401e04c3fSmrg struct r600_shader_atomic *combined_atomics, 490501e04c3fSmrg uint8_t *atomic_used_mask_p) 490601e04c3fSmrg{ 490701e04c3fSmrg uint8_t atomic_used_mask = 0; 490801e04c3fSmrg int i, j, k; 490901e04c3fSmrg bool is_compute = cs_shader ? true : false; 491001e04c3fSmrg 491101e04c3fSmrg for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { 491201e04c3fSmrg uint8_t num_atomic_stage; 491301e04c3fSmrg struct r600_pipe_shader *pshader; 491401e04c3fSmrg 491501e04c3fSmrg if (is_compute) 491601e04c3fSmrg pshader = cs_shader; 491701e04c3fSmrg else 491801e04c3fSmrg pshader = rctx->hw_shader_stages[i].shader; 491901e04c3fSmrg if (!pshader) 492001e04c3fSmrg continue; 492101e04c3fSmrg 492201e04c3fSmrg num_atomic_stage = pshader->shader.nhwatomic_ranges; 492301e04c3fSmrg if (!num_atomic_stage) 492401e04c3fSmrg continue; 492501e04c3fSmrg 492601e04c3fSmrg for (j = 0; j < num_atomic_stage; j++) { 492701e04c3fSmrg struct r600_shader_atomic *atomic = &pshader->shader.atomics[j]; 492801e04c3fSmrg int natomics = atomic->end - atomic->start + 1; 492901e04c3fSmrg 493001e04c3fSmrg for (k = 0; k < natomics; k++) { 493101e04c3fSmrg /* seen this in a previous stage */ 493201e04c3fSmrg if (atomic_used_mask & (1u << (atomic->hw_idx + k))) 493301e04c3fSmrg continue; 493401e04c3fSmrg 493501e04c3fSmrg combined_atomics[atomic->hw_idx + k].hw_idx = atomic->hw_idx + k; 493601e04c3fSmrg combined_atomics[atomic->hw_idx + k].buffer_id = atomic->buffer_id; 493701e04c3fSmrg combined_atomics[atomic->hw_idx + k].start = atomic->start + k; 493801e04c3fSmrg combined_atomics[atomic->hw_idx + k].end = combined_atomics[atomic->hw_idx + k].start + 1; 493901e04c3fSmrg atomic_used_mask |= (1u << (atomic->hw_idx + k)); 494001e04c3fSmrg } 494101e04c3fSmrg } 494201e04c3fSmrg } 494301e04c3fSmrg *atomic_used_mask_p = atomic_used_mask; 494401e04c3fSmrg} 494501e04c3fSmrg 494601e04c3fSmrgvoid evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, 494701e04c3fSmrg bool is_compute, 494801e04c3fSmrg struct r600_shader_atomic *combined_atomics, 494901e04c3fSmrg uint8_t atomic_used_mask) 495001e04c3fSmrg{ 495101e04c3fSmrg struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; 495201e04c3fSmrg unsigned pkt_flags = 0; 495301e04c3fSmrg uint32_t mask; 495401e04c3fSmrg 495501e04c3fSmrg if (is_compute) 495601e04c3fSmrg pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; 495701e04c3fSmrg 495801e04c3fSmrg mask = atomic_used_mask; 495901e04c3fSmrg if (!mask) 496001e04c3fSmrg return; 496101e04c3fSmrg 496201e04c3fSmrg while (mask) { 496301e04c3fSmrg unsigned atomic_index = u_bit_scan(&mask); 496401e04c3fSmrg struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; 496501e04c3fSmrg struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer); 496601e04c3fSmrg assert(resource); 496701e04c3fSmrg 496801e04c3fSmrg if (rctx->b.chip_class == CAYMAN) 496901e04c3fSmrg cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags); 497001e04c3fSmrg else 497101e04c3fSmrg evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); 497201e04c3fSmrg } 497301e04c3fSmrg} 497401e04c3fSmrg 497501e04c3fSmrgvoid evergreen_emit_atomic_buffer_save(struct r600_context *rctx, 497601e04c3fSmrg bool is_compute, 497701e04c3fSmrg struct r600_shader_atomic *combined_atomics, 497801e04c3fSmrg uint8_t *atomic_used_mask_p) 497901e04c3fSmrg{ 49807ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 498101e04c3fSmrg struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; 498201e04c3fSmrg uint32_t pkt_flags = 0; 498301e04c3fSmrg uint32_t event = EVENT_TYPE_PS_DONE; 498401e04c3fSmrg uint32_t mask; 498501e04c3fSmrg uint64_t dst_offset; 498601e04c3fSmrg unsigned reloc; 498701e04c3fSmrg 498801e04c3fSmrg if (is_compute) 498901e04c3fSmrg pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; 499001e04c3fSmrg 499101e04c3fSmrg mask = *atomic_used_mask_p; 499201e04c3fSmrg if (!mask) 499301e04c3fSmrg return; 499401e04c3fSmrg 499501e04c3fSmrg while (mask) { 499601e04c3fSmrg unsigned atomic_index = u_bit_scan(&mask); 499701e04c3fSmrg struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; 499801e04c3fSmrg struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer); 499901e04c3fSmrg assert(resource); 500001e04c3fSmrg 500101e04c3fSmrg if (rctx->b.chip_class == CAYMAN) 500201e04c3fSmrg cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags); 500301e04c3fSmrg else 500401e04c3fSmrg evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags); 500501e04c3fSmrg } 500601e04c3fSmrg 500701e04c3fSmrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 500801e04c3fSmrg event = EVENT_TYPE_CS_DONE; 500901e04c3fSmrg 501001e04c3fSmrg ++rctx->append_fence_id; 501101e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 501201e04c3fSmrg r600_resource(rctx->append_fence), 501301e04c3fSmrg RADEON_USAGE_READWRITE, 501401e04c3fSmrg RADEON_PRIO_SHADER_RW_BUFFER); 501501e04c3fSmrg dst_offset = r600_resource(rctx->append_fence)->gpu_address; 501601e04c3fSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 501701e04c3fSmrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 501801e04c3fSmrg radeon_emit(cs, dst_offset & 0xffffffff); 501901e04c3fSmrg radeon_emit(cs, (2 << 29) | ((dst_offset >> 32) & 0xff)); 502001e04c3fSmrg radeon_emit(cs, rctx->append_fence_id); 502101e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 502201e04c3fSmrg radeon_emit(cs, reloc); 502301e04c3fSmrg 502401e04c3fSmrg radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0) | pkt_flags); 502501e04c3fSmrg radeon_emit(cs, WAIT_REG_MEM_GEQUAL | WAIT_REG_MEM_MEMORY | (1 << 8)); 502601e04c3fSmrg radeon_emit(cs, dst_offset & 0xffffffff); 502701e04c3fSmrg radeon_emit(cs, ((dst_offset >> 32) & 0xff)); 502801e04c3fSmrg radeon_emit(cs, rctx->append_fence_id); 502901e04c3fSmrg radeon_emit(cs, 0xffffffff); 503001e04c3fSmrg radeon_emit(cs, 0xa); 503101e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 503201e04c3fSmrg radeon_emit(cs, reloc); 503301e04c3fSmrg} 5034