1848b8605Smrg/* 2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 8848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 9848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22848b8605Smrg */ 23848b8605Smrg#include "r600_formats.h" 24848b8605Smrg#include "r600_shader.h" 25b8e80941Smrg#include "r600_query.h" 26848b8605Smrg#include "evergreend.h" 27848b8605Smrg 28848b8605Smrg#include "pipe/p_shader_tokens.h" 29848b8605Smrg#include "util/u_pack_color.h" 30848b8605Smrg#include "util/u_memory.h" 31848b8605Smrg#include "util/u_framebuffer.h" 32848b8605Smrg#include "util/u_dual_blend.h" 33848b8605Smrg#include "evergreen_compute.h" 34848b8605Smrg#include "util/u_math.h" 35848b8605Smrg 36b8e80941Smrgstatic inline unsigned evergreen_array_mode(unsigned mode) 37848b8605Smrg{ 38848b8605Smrg switch (mode) { 39b8e80941Smrg default: 40848b8605Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED; 41848b8605Smrg break; 42848b8605Smrg case RADEON_SURF_MODE_1D: return V_028C70_ARRAY_1D_TILED_THIN1; 43848b8605Smrg break; 44848b8605Smrg case RADEON_SURF_MODE_2D: return V_028C70_ARRAY_2D_TILED_THIN1; 45848b8605Smrg } 46848b8605Smrg} 47848b8605Smrg 48848b8605Smrgstatic uint32_t eg_num_banks(uint32_t nbanks) 49848b8605Smrg{ 50848b8605Smrg switch (nbanks) { 51848b8605Smrg case 2: 52848b8605Smrg return 0; 53848b8605Smrg case 4: 54848b8605Smrg return 1; 55848b8605Smrg case 8: 56848b8605Smrg default: 57848b8605Smrg return 2; 58848b8605Smrg case 16: 59848b8605Smrg return 3; 60848b8605Smrg } 61848b8605Smrg} 62848b8605Smrg 63848b8605Smrg 64848b8605Smrgstatic unsigned eg_tile_split(unsigned tile_split) 65848b8605Smrg{ 66848b8605Smrg switch (tile_split) { 67848b8605Smrg case 64: tile_split = 0; break; 68848b8605Smrg case 128: tile_split = 1; break; 69848b8605Smrg case 256: tile_split = 2; break; 70848b8605Smrg case 512: tile_split = 3; break; 71848b8605Smrg default: 72848b8605Smrg case 1024: tile_split = 4; break; 73848b8605Smrg case 2048: tile_split = 5; break; 74848b8605Smrg case 4096: tile_split = 6; break; 75848b8605Smrg } 76848b8605Smrg return tile_split; 77848b8605Smrg} 78848b8605Smrg 79848b8605Smrgstatic unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect) 80848b8605Smrg{ 81848b8605Smrg switch (macro_tile_aspect) { 82848b8605Smrg default: 83848b8605Smrg case 1: macro_tile_aspect = 0; break; 84848b8605Smrg case 2: macro_tile_aspect = 1; break; 85848b8605Smrg case 4: macro_tile_aspect = 2; break; 86848b8605Smrg case 8: macro_tile_aspect = 3; break; 87848b8605Smrg } 88848b8605Smrg return macro_tile_aspect; 89848b8605Smrg} 90848b8605Smrg 91848b8605Smrgstatic unsigned eg_bank_wh(unsigned bankwh) 92848b8605Smrg{ 93848b8605Smrg switch (bankwh) { 94848b8605Smrg default: 95848b8605Smrg case 1: bankwh = 0; break; 96848b8605Smrg case 2: bankwh = 1; break; 97848b8605Smrg case 4: bankwh = 2; break; 98848b8605Smrg case 8: bankwh = 3; break; 99848b8605Smrg } 100848b8605Smrg return bankwh; 101848b8605Smrg} 102848b8605Smrg 103848b8605Smrgstatic uint32_t r600_translate_blend_function(int blend_func) 104848b8605Smrg{ 105848b8605Smrg switch (blend_func) { 106848b8605Smrg case PIPE_BLEND_ADD: 107848b8605Smrg return V_028780_COMB_DST_PLUS_SRC; 108848b8605Smrg case PIPE_BLEND_SUBTRACT: 109848b8605Smrg return V_028780_COMB_SRC_MINUS_DST; 110848b8605Smrg case PIPE_BLEND_REVERSE_SUBTRACT: 111848b8605Smrg return V_028780_COMB_DST_MINUS_SRC; 112848b8605Smrg case PIPE_BLEND_MIN: 113848b8605Smrg return V_028780_COMB_MIN_DST_SRC; 114848b8605Smrg case PIPE_BLEND_MAX: 115848b8605Smrg return V_028780_COMB_MAX_DST_SRC; 116848b8605Smrg default: 117848b8605Smrg R600_ERR("Unknown blend function %d\n", blend_func); 118848b8605Smrg assert(0); 119848b8605Smrg break; 120848b8605Smrg } 121848b8605Smrg return 0; 122848b8605Smrg} 123848b8605Smrg 124848b8605Smrgstatic uint32_t r600_translate_blend_factor(int blend_fact) 125848b8605Smrg{ 126848b8605Smrg switch (blend_fact) { 127848b8605Smrg case PIPE_BLENDFACTOR_ONE: 128848b8605Smrg return V_028780_BLEND_ONE; 129848b8605Smrg case PIPE_BLENDFACTOR_SRC_COLOR: 130848b8605Smrg return V_028780_BLEND_SRC_COLOR; 131848b8605Smrg case PIPE_BLENDFACTOR_SRC_ALPHA: 132848b8605Smrg return V_028780_BLEND_SRC_ALPHA; 133848b8605Smrg case PIPE_BLENDFACTOR_DST_ALPHA: 134848b8605Smrg return V_028780_BLEND_DST_ALPHA; 135848b8605Smrg case PIPE_BLENDFACTOR_DST_COLOR: 136848b8605Smrg return V_028780_BLEND_DST_COLOR; 137848b8605Smrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 138848b8605Smrg return V_028780_BLEND_SRC_ALPHA_SATURATE; 139848b8605Smrg case PIPE_BLENDFACTOR_CONST_COLOR: 140848b8605Smrg return V_028780_BLEND_CONST_COLOR; 141848b8605Smrg case PIPE_BLENDFACTOR_CONST_ALPHA: 142848b8605Smrg return V_028780_BLEND_CONST_ALPHA; 143848b8605Smrg case PIPE_BLENDFACTOR_ZERO: 144848b8605Smrg return V_028780_BLEND_ZERO; 145848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 146848b8605Smrg return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 147848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 148848b8605Smrg return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 149848b8605Smrg case PIPE_BLENDFACTOR_INV_DST_ALPHA: 150848b8605Smrg return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 151848b8605Smrg case PIPE_BLENDFACTOR_INV_DST_COLOR: 152848b8605Smrg return V_028780_BLEND_ONE_MINUS_DST_COLOR; 153848b8605Smrg case PIPE_BLENDFACTOR_INV_CONST_COLOR: 154848b8605Smrg return V_028780_BLEND_ONE_MINUS_CONST_COLOR; 155848b8605Smrg case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 156848b8605Smrg return V_028780_BLEND_ONE_MINUS_CONST_ALPHA; 157848b8605Smrg case PIPE_BLENDFACTOR_SRC1_COLOR: 158848b8605Smrg return V_028780_BLEND_SRC1_COLOR; 159848b8605Smrg case PIPE_BLENDFACTOR_SRC1_ALPHA: 160848b8605Smrg return V_028780_BLEND_SRC1_ALPHA; 161848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 162848b8605Smrg return V_028780_BLEND_INV_SRC1_COLOR; 163848b8605Smrg case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 164848b8605Smrg return V_028780_BLEND_INV_SRC1_ALPHA; 165848b8605Smrg default: 166848b8605Smrg R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 167848b8605Smrg assert(0); 168848b8605Smrg break; 169848b8605Smrg } 170848b8605Smrg return 0; 171848b8605Smrg} 172848b8605Smrg 173b8e80941Smrgstatic unsigned r600_tex_dim(struct r600_texture *rtex, 174b8e80941Smrg unsigned view_target, unsigned nr_samples) 175848b8605Smrg{ 176b8e80941Smrg unsigned res_target = rtex->resource.b.b.target; 177b8e80941Smrg 178b8e80941Smrg if (view_target == PIPE_TEXTURE_CUBE || 179b8e80941Smrg view_target == PIPE_TEXTURE_CUBE_ARRAY) 180b8e80941Smrg res_target = view_target; 181b8e80941Smrg /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 182b8e80941Smrg else if (res_target == PIPE_TEXTURE_CUBE || 183b8e80941Smrg res_target == PIPE_TEXTURE_CUBE_ARRAY) 184b8e80941Smrg res_target = PIPE_TEXTURE_2D_ARRAY; 185b8e80941Smrg 186b8e80941Smrg switch (res_target) { 187848b8605Smrg default: 188848b8605Smrg case PIPE_TEXTURE_1D: 189848b8605Smrg return V_030000_SQ_TEX_DIM_1D; 190848b8605Smrg case PIPE_TEXTURE_1D_ARRAY: 191848b8605Smrg return V_030000_SQ_TEX_DIM_1D_ARRAY; 192848b8605Smrg case PIPE_TEXTURE_2D: 193848b8605Smrg case PIPE_TEXTURE_RECT: 194848b8605Smrg return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_MSAA : 195848b8605Smrg V_030000_SQ_TEX_DIM_2D; 196848b8605Smrg case PIPE_TEXTURE_2D_ARRAY: 197848b8605Smrg return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA : 198848b8605Smrg V_030000_SQ_TEX_DIM_2D_ARRAY; 199848b8605Smrg case PIPE_TEXTURE_3D: 200848b8605Smrg return V_030000_SQ_TEX_DIM_3D; 201848b8605Smrg case PIPE_TEXTURE_CUBE: 202848b8605Smrg case PIPE_TEXTURE_CUBE_ARRAY: 203848b8605Smrg return V_030000_SQ_TEX_DIM_CUBEMAP; 204848b8605Smrg } 205848b8605Smrg} 206848b8605Smrg 207848b8605Smrgstatic uint32_t r600_translate_dbformat(enum pipe_format format) 208848b8605Smrg{ 209848b8605Smrg switch (format) { 210848b8605Smrg case PIPE_FORMAT_Z16_UNORM: 211848b8605Smrg return V_028040_Z_16; 212848b8605Smrg case PIPE_FORMAT_Z24X8_UNORM: 213848b8605Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 214848b8605Smrg case PIPE_FORMAT_X8Z24_UNORM: 215848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 216848b8605Smrg return V_028040_Z_24; 217848b8605Smrg case PIPE_FORMAT_Z32_FLOAT: 218848b8605Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 219848b8605Smrg return V_028040_Z_32_FLOAT; 220848b8605Smrg default: 221848b8605Smrg return ~0U; 222848b8605Smrg } 223848b8605Smrg} 224848b8605Smrg 225848b8605Smrgstatic bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 226848b8605Smrg{ 227b8e80941Smrg return r600_translate_texformat(screen, format, NULL, NULL, NULL, 228b8e80941Smrg FALSE) != ~0U; 229848b8605Smrg} 230848b8605Smrg 231848b8605Smrgstatic bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format) 232848b8605Smrg{ 233b8e80941Smrg return r600_translate_colorformat(chip, format, FALSE) != ~0U && 234b8e80941Smrg r600_translate_colorswap(format, FALSE) != ~0U; 235848b8605Smrg} 236848b8605Smrg 237848b8605Smrgstatic bool r600_is_zs_format_supported(enum pipe_format format) 238848b8605Smrg{ 239848b8605Smrg return r600_translate_dbformat(format) != ~0U; 240848b8605Smrg} 241848b8605Smrg 242848b8605Smrgboolean evergreen_is_format_supported(struct pipe_screen *screen, 243848b8605Smrg enum pipe_format format, 244848b8605Smrg enum pipe_texture_target target, 245848b8605Smrg unsigned sample_count, 246b8e80941Smrg unsigned storage_sample_count, 247848b8605Smrg unsigned usage) 248848b8605Smrg{ 249848b8605Smrg struct r600_screen *rscreen = (struct r600_screen*)screen; 250848b8605Smrg unsigned retval = 0; 251848b8605Smrg 252848b8605Smrg if (target >= PIPE_MAX_TEXTURE_TYPES) { 253848b8605Smrg R600_ERR("r600: unsupported texture type %d\n", target); 254848b8605Smrg return FALSE; 255848b8605Smrg } 256848b8605Smrg 257b8e80941Smrg if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) 258b8e80941Smrg return false; 259848b8605Smrg 260848b8605Smrg if (sample_count > 1) { 261848b8605Smrg if (!rscreen->has_msaa) 262848b8605Smrg return FALSE; 263848b8605Smrg 264848b8605Smrg switch (sample_count) { 265848b8605Smrg case 2: 266848b8605Smrg case 4: 267848b8605Smrg case 8: 268848b8605Smrg break; 269848b8605Smrg default: 270848b8605Smrg return FALSE; 271848b8605Smrg } 272848b8605Smrg } 273848b8605Smrg 274848b8605Smrg if (usage & PIPE_BIND_SAMPLER_VIEW) { 275848b8605Smrg if (target == PIPE_BUFFER) { 276848b8605Smrg if (r600_is_vertex_format_supported(format)) 277848b8605Smrg retval |= PIPE_BIND_SAMPLER_VIEW; 278848b8605Smrg } else { 279848b8605Smrg if (r600_is_sampler_format_supported(screen, format)) 280848b8605Smrg retval |= PIPE_BIND_SAMPLER_VIEW; 281848b8605Smrg } 282848b8605Smrg } 283848b8605Smrg 284848b8605Smrg if ((usage & (PIPE_BIND_RENDER_TARGET | 285848b8605Smrg PIPE_BIND_DISPLAY_TARGET | 286848b8605Smrg PIPE_BIND_SCANOUT | 287b8e80941Smrg PIPE_BIND_SHARED | 288b8e80941Smrg PIPE_BIND_BLENDABLE)) && 289848b8605Smrg r600_is_colorbuffer_format_supported(rscreen->b.chip_class, format)) { 290848b8605Smrg retval |= usage & 291848b8605Smrg (PIPE_BIND_RENDER_TARGET | 292848b8605Smrg PIPE_BIND_DISPLAY_TARGET | 293848b8605Smrg PIPE_BIND_SCANOUT | 294848b8605Smrg PIPE_BIND_SHARED); 295b8e80941Smrg if (!util_format_is_pure_integer(format) && 296b8e80941Smrg !util_format_is_depth_or_stencil(format)) 297b8e80941Smrg retval |= usage & PIPE_BIND_BLENDABLE; 298848b8605Smrg } 299848b8605Smrg 300848b8605Smrg if ((usage & PIPE_BIND_DEPTH_STENCIL) && 301848b8605Smrg r600_is_zs_format_supported(format)) { 302848b8605Smrg retval |= PIPE_BIND_DEPTH_STENCIL; 303848b8605Smrg } 304848b8605Smrg 305848b8605Smrg if ((usage & PIPE_BIND_VERTEX_BUFFER) && 306848b8605Smrg r600_is_vertex_format_supported(format)) { 307848b8605Smrg retval |= PIPE_BIND_VERTEX_BUFFER; 308848b8605Smrg } 309848b8605Smrg 310b8e80941Smrg if ((usage & PIPE_BIND_LINEAR) && 311b8e80941Smrg !util_format_is_compressed(format) && 312b8e80941Smrg !(usage & PIPE_BIND_DEPTH_STENCIL)) 313b8e80941Smrg retval |= PIPE_BIND_LINEAR; 314848b8605Smrg 315848b8605Smrg return retval == usage; 316848b8605Smrg} 317848b8605Smrg 318848b8605Smrgstatic void *evergreen_create_blend_state_mode(struct pipe_context *ctx, 319848b8605Smrg const struct pipe_blend_state *state, int mode) 320848b8605Smrg{ 321848b8605Smrg uint32_t color_control = 0, target_mask = 0; 322848b8605Smrg struct r600_blend_state *blend = CALLOC_STRUCT(r600_blend_state); 323848b8605Smrg 324848b8605Smrg if (!blend) { 325848b8605Smrg return NULL; 326848b8605Smrg } 327848b8605Smrg 328848b8605Smrg r600_init_command_buffer(&blend->buffer, 20); 329848b8605Smrg r600_init_command_buffer(&blend->buffer_no_blend, 20); 330848b8605Smrg 331848b8605Smrg if (state->logicop_enable) { 332848b8605Smrg color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); 333848b8605Smrg } else { 334848b8605Smrg color_control |= (0xcc << 16); 335848b8605Smrg } 336848b8605Smrg /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 337848b8605Smrg if (state->independent_blend_enable) { 338848b8605Smrg for (int i = 0; i < 8; i++) { 339848b8605Smrg target_mask |= (state->rt[i].colormask << (4 * i)); 340848b8605Smrg } 341848b8605Smrg } else { 342848b8605Smrg for (int i = 0; i < 8; i++) { 343848b8605Smrg target_mask |= (state->rt[0].colormask << (4 * i)); 344848b8605Smrg } 345848b8605Smrg } 346848b8605Smrg 347848b8605Smrg /* only have dual source on MRT0 */ 348848b8605Smrg blend->dual_src_blend = util_blend_state_is_dual(state, 0); 349848b8605Smrg blend->cb_target_mask = target_mask; 350848b8605Smrg blend->alpha_to_one = state->alpha_to_one; 351848b8605Smrg 352848b8605Smrg if (target_mask) 353848b8605Smrg color_control |= S_028808_MODE(mode); 354848b8605Smrg else 355848b8605Smrg color_control |= S_028808_MODE(V_028808_CB_DISABLE); 356848b8605Smrg 357848b8605Smrg 358848b8605Smrg r600_store_context_reg(&blend->buffer, R_028808_CB_COLOR_CONTROL, color_control); 359848b8605Smrg r600_store_context_reg(&blend->buffer, R_028B70_DB_ALPHA_TO_MASK, 360848b8605Smrg S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 361848b8605Smrg S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 362848b8605Smrg S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 363848b8605Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 364848b8605Smrg S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 365848b8605Smrg r600_store_context_reg_seq(&blend->buffer, R_028780_CB_BLEND0_CONTROL, 8); 366848b8605Smrg 367848b8605Smrg /* Copy over the dwords set so far into buffer_no_blend. 368848b8605Smrg * Only the CB_BLENDi_CONTROL registers must be set after this. */ 369848b8605Smrg memcpy(blend->buffer_no_blend.buf, blend->buffer.buf, blend->buffer.num_dw * 4); 370848b8605Smrg blend->buffer_no_blend.num_dw = blend->buffer.num_dw; 371848b8605Smrg 372848b8605Smrg for (int i = 0; i < 8; i++) { 373848b8605Smrg /* state->rt entries > 0 only written if independent blending */ 374848b8605Smrg const int j = state->independent_blend_enable ? i : 0; 375848b8605Smrg 376848b8605Smrg unsigned eqRGB = state->rt[j].rgb_func; 377848b8605Smrg unsigned srcRGB = state->rt[j].rgb_src_factor; 378848b8605Smrg unsigned dstRGB = state->rt[j].rgb_dst_factor; 379848b8605Smrg unsigned eqA = state->rt[j].alpha_func; 380848b8605Smrg unsigned srcA = state->rt[j].alpha_src_factor; 381848b8605Smrg unsigned dstA = state->rt[j].alpha_dst_factor; 382848b8605Smrg uint32_t bc = 0; 383848b8605Smrg 384848b8605Smrg r600_store_value(&blend->buffer_no_blend, 0); 385848b8605Smrg 386848b8605Smrg if (!state->rt[j].blend_enable) { 387848b8605Smrg r600_store_value(&blend->buffer, 0); 388848b8605Smrg continue; 389848b8605Smrg } 390848b8605Smrg 391848b8605Smrg bc |= S_028780_BLEND_CONTROL_ENABLE(1); 392848b8605Smrg bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); 393848b8605Smrg bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); 394848b8605Smrg bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); 395848b8605Smrg 396848b8605Smrg if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 397848b8605Smrg bc |= S_028780_SEPARATE_ALPHA_BLEND(1); 398848b8605Smrg bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); 399848b8605Smrg bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); 400848b8605Smrg bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); 401848b8605Smrg } 402848b8605Smrg r600_store_value(&blend->buffer, bc); 403848b8605Smrg } 404848b8605Smrg return blend; 405848b8605Smrg} 406848b8605Smrg 407848b8605Smrgstatic void *evergreen_create_blend_state(struct pipe_context *ctx, 408848b8605Smrg const struct pipe_blend_state *state) 409848b8605Smrg{ 410848b8605Smrg 411848b8605Smrg return evergreen_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 412848b8605Smrg} 413848b8605Smrg 414848b8605Smrgstatic void *evergreen_create_dsa_state(struct pipe_context *ctx, 415848b8605Smrg const struct pipe_depth_stencil_alpha_state *state) 416848b8605Smrg{ 417848b8605Smrg unsigned db_depth_control, alpha_test_control, alpha_ref; 418848b8605Smrg struct r600_dsa_state *dsa = CALLOC_STRUCT(r600_dsa_state); 419848b8605Smrg 420b8e80941Smrg if (!dsa) { 421848b8605Smrg return NULL; 422848b8605Smrg } 423848b8605Smrg 424848b8605Smrg r600_init_command_buffer(&dsa->buffer, 3); 425848b8605Smrg 426848b8605Smrg dsa->valuemask[0] = state->stencil[0].valuemask; 427848b8605Smrg dsa->valuemask[1] = state->stencil[1].valuemask; 428848b8605Smrg dsa->writemask[0] = state->stencil[0].writemask; 429848b8605Smrg dsa->writemask[1] = state->stencil[1].writemask; 430848b8605Smrg dsa->zwritemask = state->depth.writemask; 431848b8605Smrg 432848b8605Smrg db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 433848b8605Smrg S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 434848b8605Smrg S_028800_ZFUNC(state->depth.func); 435848b8605Smrg 436848b8605Smrg /* stencil */ 437848b8605Smrg if (state->stencil[0].enabled) { 438848b8605Smrg db_depth_control |= S_028800_STENCIL_ENABLE(1); 439848b8605Smrg db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); /* translates straight */ 440848b8605Smrg db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); 441848b8605Smrg db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); 442848b8605Smrg db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); 443848b8605Smrg 444848b8605Smrg if (state->stencil[1].enabled) { 445848b8605Smrg db_depth_control |= S_028800_BACKFACE_ENABLE(1); 446848b8605Smrg db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); /* translates straight */ 447848b8605Smrg db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); 448848b8605Smrg db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); 449848b8605Smrg db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); 450848b8605Smrg } 451848b8605Smrg } 452848b8605Smrg 453848b8605Smrg /* alpha */ 454848b8605Smrg alpha_test_control = 0; 455848b8605Smrg alpha_ref = 0; 456848b8605Smrg if (state->alpha.enabled) { 457848b8605Smrg alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); 458848b8605Smrg alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); 459848b8605Smrg alpha_ref = fui(state->alpha.ref_value); 460848b8605Smrg } 461848b8605Smrg dsa->sx_alpha_test_control = alpha_test_control & 0xff; 462848b8605Smrg dsa->alpha_ref = alpha_ref; 463848b8605Smrg 464848b8605Smrg /* misc */ 465848b8605Smrg r600_store_context_reg(&dsa->buffer, R_028800_DB_DEPTH_CONTROL, db_depth_control); 466848b8605Smrg return dsa; 467848b8605Smrg} 468848b8605Smrg 469848b8605Smrgstatic void *evergreen_create_rs_state(struct pipe_context *ctx, 470848b8605Smrg const struct pipe_rasterizer_state *state) 471848b8605Smrg{ 472848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 473848b8605Smrg unsigned tmp, spi_interp; 474848b8605Smrg float psize_min, psize_max; 475848b8605Smrg struct r600_rasterizer_state *rs = CALLOC_STRUCT(r600_rasterizer_state); 476848b8605Smrg 477b8e80941Smrg if (!rs) { 478848b8605Smrg return NULL; 479848b8605Smrg } 480848b8605Smrg 481848b8605Smrg r600_init_command_buffer(&rs->buffer, 30); 482848b8605Smrg 483b8e80941Smrg rs->scissor_enable = state->scissor; 484b8e80941Smrg rs->clip_halfz = state->clip_halfz; 485848b8605Smrg rs->flatshade = state->flatshade; 486848b8605Smrg rs->sprite_coord_enable = state->sprite_coord_enable; 487b8e80941Smrg rs->rasterizer_discard = state->rasterizer_discard; 488848b8605Smrg rs->two_side = state->light_twoside; 489848b8605Smrg rs->clip_plane_enable = state->clip_plane_enable; 490848b8605Smrg rs->pa_sc_line_stipple = state->line_stipple_enable ? 491848b8605Smrg S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 492848b8605Smrg S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 493848b8605Smrg rs->pa_cl_clip_cntl = 494b8e80941Smrg S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 495b8e80941Smrg S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 496b8e80941Smrg S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 497848b8605Smrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) | 498848b8605Smrg S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard); 499848b8605Smrg rs->multisample_enable = state->multisample; 500848b8605Smrg 501848b8605Smrg /* offset */ 502848b8605Smrg rs->offset_units = state->offset_units; 503b8e80941Smrg rs->offset_scale = state->offset_scale * 16.0f; 504848b8605Smrg rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri; 505b8e80941Smrg rs->offset_units_unscaled = state->offset_units_unscaled; 506848b8605Smrg 507848b8605Smrg if (state->point_size_per_vertex) { 508848b8605Smrg psize_min = util_get_min_point_size(state); 509848b8605Smrg psize_max = 8192; 510848b8605Smrg } else { 511848b8605Smrg /* Force the point size to be as if the vertex output was disabled. */ 512848b8605Smrg psize_min = state->point_size; 513848b8605Smrg psize_max = state->point_size; 514848b8605Smrg } 515848b8605Smrg 516848b8605Smrg spi_interp = S_0286D4_FLAT_SHADE_ENA(1); 517848b8605Smrg if (state->sprite_coord_enable) { 518848b8605Smrg spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | 519848b8605Smrg S_0286D4_PNT_SPRITE_OVRD_X(2) | 520848b8605Smrg S_0286D4_PNT_SPRITE_OVRD_Y(3) | 521848b8605Smrg S_0286D4_PNT_SPRITE_OVRD_Z(0) | 522848b8605Smrg S_0286D4_PNT_SPRITE_OVRD_W(1); 523848b8605Smrg if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 524848b8605Smrg spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); 525848b8605Smrg } 526848b8605Smrg } 527848b8605Smrg 528848b8605Smrg r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3); 529848b8605Smrg /* point size 12.4 fixed point (divide by two, because 0.5 = 1 pixel) */ 530848b8605Smrg tmp = r600_pack_float_12p4(state->point_size/2); 531848b8605Smrg r600_store_value(&rs->buffer, /* R_028A00_PA_SU_POINT_SIZE */ 532848b8605Smrg S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 533848b8605Smrg r600_store_value(&rs->buffer, /* R_028A04_PA_SU_POINT_MINMAX */ 534848b8605Smrg S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) | 535848b8605Smrg S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2))); 536848b8605Smrg r600_store_value(&rs->buffer, /* R_028A08_PA_SU_LINE_CNTL */ 537848b8605Smrg S_028A08_WIDTH((unsigned)(state->line_width * 8))); 538848b8605Smrg 539848b8605Smrg r600_store_context_reg(&rs->buffer, R_0286D4_SPI_INTERP_CONTROL_0, spi_interp); 540848b8605Smrg r600_store_context_reg(&rs->buffer, R_028A48_PA_SC_MODE_CNTL_0, 541848b8605Smrg S_028A48_MSAA_ENABLE(state->multisample) | 542b8e80941Smrg S_028A48_VPORT_SCISSOR_ENABLE(1) | 543848b8605Smrg S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable)); 544848b8605Smrg 545848b8605Smrg if (rctx->b.chip_class == CAYMAN) { 546848b8605Smrg r600_store_context_reg(&rs->buffer, CM_R_028BE4_PA_SU_VTX_CNTL, 547848b8605Smrg S_028C08_PIX_CENTER_HALF(state->half_pixel_center) | 548848b8605Smrg S_028C08_QUANT_MODE(V_028C08_X_1_256TH)); 549848b8605Smrg } else { 550848b8605Smrg r600_store_context_reg(&rs->buffer, R_028C08_PA_SU_VTX_CNTL, 551848b8605Smrg S_028C08_PIX_CENTER_HALF(state->half_pixel_center) | 552848b8605Smrg S_028C08_QUANT_MODE(V_028C08_X_1_256TH)); 553848b8605Smrg } 554848b8605Smrg 555848b8605Smrg r600_store_context_reg(&rs->buffer, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 556848b8605Smrg r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL, 557848b8605Smrg S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 558848b8605Smrg S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 559848b8605Smrg S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 560848b8605Smrg S_028814_FACE(!state->front_ccw) | 561848b8605Smrg S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 562848b8605Smrg S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 563848b8605Smrg S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 564848b8605Smrg S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 565848b8605Smrg state->fill_back != PIPE_POLYGON_MODE_FILL) | 566848b8605Smrg S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | 567848b8605Smrg S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back))); 568848b8605Smrg return rs; 569848b8605Smrg} 570848b8605Smrg 571848b8605Smrgstatic void *evergreen_create_sampler_state(struct pipe_context *ctx, 572848b8605Smrg const struct pipe_sampler_state *state) 573848b8605Smrg{ 574b8e80941Smrg struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; 575848b8605Smrg struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); 576b8e80941Smrg unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 577b8e80941Smrg : state->max_anisotropy; 578b8e80941Smrg unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 579b8e80941Smrg float max_lod = state->max_lod; 580848b8605Smrg 581b8e80941Smrg if (!ss) { 582848b8605Smrg return NULL; 583848b8605Smrg } 584848b8605Smrg 585b8e80941Smrg /* If the min_mip_filter is NONE, then the texture has no mipmapping and 586b8e80941Smrg * MIP_FILTER will also be set to NONE. However, if more then one LOD is 587b8e80941Smrg * configured, then the texture lookup seems to fail for some specific texture 588b8e80941Smrg * formats. Forcing the number of LODs to one in this case fixes it. */ 589b8e80941Smrg if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) 590b8e80941Smrg max_lod = state->min_lod; 591b8e80941Smrg 592848b8605Smrg ss->border_color_use = sampler_state_needs_border_color(state); 593848b8605Smrg 594848b8605Smrg /* R_03C000_SQ_TEX_SAMPLER_WORD0_0 */ 595848b8605Smrg ss->tex_sampler_words[0] = 596848b8605Smrg S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | 597848b8605Smrg S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | 598848b8605Smrg S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | 599b8e80941Smrg S_03C000_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 600b8e80941Smrg S_03C000_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 601848b8605Smrg S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | 602b8e80941Smrg S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) | 603848b8605Smrg S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | 604848b8605Smrg S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); 605848b8605Smrg /* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */ 606848b8605Smrg ss->tex_sampler_words[1] = 607848b8605Smrg S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 608b8e80941Smrg S_03C004_MAX_LOD(S_FIXED(CLAMP(max_lod, 0, 15), 8)); 609848b8605Smrg /* R_03C008_SQ_TEX_SAMPLER_WORD2_0 */ 610848b8605Smrg ss->tex_sampler_words[2] = 611848b8605Smrg S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 612848b8605Smrg (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | 613848b8605Smrg S_03C008_TYPE(1); 614848b8605Smrg 615848b8605Smrg if (ss->border_color_use) { 616848b8605Smrg memcpy(&ss->border_color, &state->border_color, sizeof(state->border_color)); 617848b8605Smrg } 618848b8605Smrg return ss; 619848b8605Smrg} 620848b8605Smrg 621b8e80941Smrgstruct eg_buf_res_params { 622b8e80941Smrg enum pipe_format pipe_format; 623b8e80941Smrg unsigned offset; 624b8e80941Smrg unsigned size; 625b8e80941Smrg unsigned char swizzle[4]; 626b8e80941Smrg bool uncached; 627b8e80941Smrg bool force_swizzle; 628b8e80941Smrg bool size_in_bytes; 629b8e80941Smrg}; 630b8e80941Smrg 631b8e80941Smrgstatic void evergreen_fill_buffer_resource_words(struct r600_context *rctx, 632b8e80941Smrg struct pipe_resource *buffer, 633b8e80941Smrg struct eg_buf_res_params *params, 634b8e80941Smrg bool *skip_mip_address_reloc, 635b8e80941Smrg unsigned tex_resource_words[8]) 636848b8605Smrg{ 637b8e80941Smrg struct r600_texture *tmp = (struct r600_texture*)buffer; 638848b8605Smrg uint64_t va; 639b8e80941Smrg int stride = util_format_get_blocksize(params->pipe_format); 640848b8605Smrg unsigned format, num_format, format_comp, endian; 641848b8605Smrg unsigned swizzle_res; 642848b8605Smrg const struct util_format_description *desc; 643848b8605Smrg 644b8e80941Smrg r600_vertex_data_type(params->pipe_format, 645848b8605Smrg &format, &num_format, &format_comp, 646848b8605Smrg &endian); 647848b8605Smrg 648b8e80941Smrg desc = util_format_description(params->pipe_format); 649848b8605Smrg 650b8e80941Smrg if (params->force_swizzle) 651b8e80941Smrg swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE); 652b8e80941Smrg else 653b8e80941Smrg swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE); 654848b8605Smrg 655b8e80941Smrg va = tmp->resource.gpu_address + params->offset; 656b8e80941Smrg *skip_mip_address_reloc = true; 657b8e80941Smrg tex_resource_words[0] = va; 658b8e80941Smrg tex_resource_words[1] = params->size - 1; 659b8e80941Smrg tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) | 660848b8605Smrg S_030008_STRIDE(stride) | 661848b8605Smrg S_030008_DATA_FORMAT(format) | 662848b8605Smrg S_030008_NUM_FORMAT_ALL(num_format) | 663848b8605Smrg S_030008_FORMAT_COMP_ALL(format_comp) | 664848b8605Smrg S_030008_ENDIAN_SWAP(endian); 665b8e80941Smrg tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached); 666848b8605Smrg /* 667b8e80941Smrg * dword 4 is for number of elements, for use with resinfo, 668b8e80941Smrg * albeit the amd gpu shader analyser 669848b8605Smrg * uses a const buffer to store the element sizes for buffer txq 670848b8605Smrg */ 671b8e80941Smrg tex_resource_words[4] = params->size_in_bytes ? params->size : (params->size / stride); 672b8e80941Smrg 673b8e80941Smrg tex_resource_words[5] = tex_resource_words[6] = 0; 674b8e80941Smrg tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); 675b8e80941Smrg} 676b8e80941Smrg 677b8e80941Smrgstatic struct pipe_sampler_view * 678b8e80941Smrgtexture_buffer_sampler_view(struct r600_context *rctx, 679b8e80941Smrg struct r600_pipe_sampler_view *view, 680b8e80941Smrg unsigned width0, unsigned height0) 681b8e80941Smrg{ 682b8e80941Smrg struct r600_texture *tmp = (struct r600_texture*)view->base.texture; 683b8e80941Smrg struct eg_buf_res_params params; 684b8e80941Smrg 685b8e80941Smrg memset(¶ms, 0, sizeof(params)); 686b8e80941Smrg 687b8e80941Smrg params.pipe_format = view->base.format; 688b8e80941Smrg params.offset = view->base.u.buf.offset; 689b8e80941Smrg params.size = view->base.u.buf.size; 690b8e80941Smrg params.swizzle[0] = view->base.swizzle_r; 691b8e80941Smrg params.swizzle[1] = view->base.swizzle_g; 692b8e80941Smrg params.swizzle[2] = view->base.swizzle_b; 693b8e80941Smrg params.swizzle[3] = view->base.swizzle_a; 694b8e80941Smrg 695b8e80941Smrg evergreen_fill_buffer_resource_words(rctx, view->base.texture, 696b8e80941Smrg ¶ms, &view->skip_mip_address_reloc, 697b8e80941Smrg view->tex_resource_words); 698b8e80941Smrg view->tex_resource = &tmp->resource; 699848b8605Smrg 700848b8605Smrg if (tmp->resource.gpu_address) 701b8e80941Smrg LIST_ADDTAIL(&view->list, &rctx->texture_buffers); 702848b8605Smrg return &view->base; 703848b8605Smrg} 704848b8605Smrg 705b8e80941Smrgstruct eg_tex_res_params { 706b8e80941Smrg enum pipe_format pipe_format; 707b8e80941Smrg int force_level; 708b8e80941Smrg unsigned width0; 709b8e80941Smrg unsigned height0; 710b8e80941Smrg unsigned first_level; 711b8e80941Smrg unsigned last_level; 712b8e80941Smrg unsigned first_layer; 713b8e80941Smrg unsigned last_layer; 714b8e80941Smrg unsigned target; 715b8e80941Smrg unsigned char swizzle[4]; 716b8e80941Smrg}; 717b8e80941Smrg 718b8e80941Smrgstatic int evergreen_fill_tex_resource_words(struct r600_context *rctx, 719b8e80941Smrg struct pipe_resource *texture, 720b8e80941Smrg struct eg_tex_res_params *params, 721b8e80941Smrg bool *skip_mip_address_reloc, 722b8e80941Smrg unsigned tex_resource_words[8]) 723848b8605Smrg{ 724b8e80941Smrg struct r600_screen *rscreen = (struct r600_screen*)rctx->b.b.screen; 725848b8605Smrg struct r600_texture *tmp = (struct r600_texture*)texture; 726848b8605Smrg unsigned format, endian; 727848b8605Smrg uint32_t word4 = 0, yuv_format = 0, pitch = 0; 728b8e80941Smrg unsigned char array_mode = 0, non_disp_tiling = 0; 729848b8605Smrg unsigned height, depth, width; 730848b8605Smrg unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh; 731b8e80941Smrg struct legacy_surf_level *surflevel; 732848b8605Smrg unsigned base_level, first_level, last_level; 733b8e80941Smrg unsigned dim, last_layer; 734848b8605Smrg uint64_t va; 735b8e80941Smrg bool do_endian_swap = FALSE; 736848b8605Smrg 737b8e80941Smrg tile_split = tmp->surface.u.legacy.tile_split; 738b8e80941Smrg surflevel = tmp->surface.u.legacy.level; 739848b8605Smrg 740848b8605Smrg /* Texturing with separate depth and stencil. */ 741b8e80941Smrg if (tmp->db_compatible) { 742b8e80941Smrg switch (params->pipe_format) { 743848b8605Smrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 744b8e80941Smrg params->pipe_format = PIPE_FORMAT_Z32_FLOAT; 745848b8605Smrg break; 746848b8605Smrg case PIPE_FORMAT_X8Z24_UNORM: 747848b8605Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 748b8e80941Smrg /* Z24 is always stored like this for DB 749b8e80941Smrg * compatibility. 750b8e80941Smrg */ 751b8e80941Smrg params->pipe_format = PIPE_FORMAT_Z24X8_UNORM; 752848b8605Smrg break; 753848b8605Smrg case PIPE_FORMAT_X24S8_UINT: 754848b8605Smrg case PIPE_FORMAT_S8X24_UINT: 755848b8605Smrg case PIPE_FORMAT_X32_S8X24_UINT: 756b8e80941Smrg params->pipe_format = PIPE_FORMAT_S8_UINT; 757b8e80941Smrg tile_split = tmp->surface.u.legacy.stencil_tile_split; 758b8e80941Smrg surflevel = tmp->surface.u.legacy.stencil_level; 759848b8605Smrg break; 760848b8605Smrg default:; 761848b8605Smrg } 762848b8605Smrg } 763848b8605Smrg 764b8e80941Smrg if (R600_BIG_ENDIAN) 765b8e80941Smrg do_endian_swap = !tmp->db_compatible; 766b8e80941Smrg 767b8e80941Smrg format = r600_translate_texformat(rctx->b.b.screen, params->pipe_format, 768b8e80941Smrg params->swizzle, 769b8e80941Smrg &word4, &yuv_format, do_endian_swap); 770848b8605Smrg assert(format != ~0); 771848b8605Smrg if (format == ~0) { 772b8e80941Smrg return -1; 773848b8605Smrg } 774848b8605Smrg 775b8e80941Smrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 776848b8605Smrg 777848b8605Smrg base_level = 0; 778b8e80941Smrg first_level = params->first_level; 779b8e80941Smrg last_level = params->last_level; 780b8e80941Smrg width = params->width0; 781b8e80941Smrg height = params->height0; 782848b8605Smrg depth = texture->depth0; 783848b8605Smrg 784b8e80941Smrg if (params->force_level) { 785b8e80941Smrg base_level = params->force_level; 786848b8605Smrg first_level = 0; 787848b8605Smrg last_level = 0; 788b8e80941Smrg width = u_minify(width, params->force_level); 789b8e80941Smrg height = u_minify(height, params->force_level); 790b8e80941Smrg depth = u_minify(depth, params->force_level); 791848b8605Smrg } 792848b8605Smrg 793b8e80941Smrg pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(params->pipe_format); 794848b8605Smrg non_disp_tiling = tmp->non_disp_tiling; 795848b8605Smrg 796848b8605Smrg switch (surflevel[base_level].mode) { 797b8e80941Smrg default: 798848b8605Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 799848b8605Smrg array_mode = V_028C70_ARRAY_LINEAR_ALIGNED; 800848b8605Smrg break; 801848b8605Smrg case RADEON_SURF_MODE_2D: 802848b8605Smrg array_mode = V_028C70_ARRAY_2D_TILED_THIN1; 803848b8605Smrg break; 804848b8605Smrg case RADEON_SURF_MODE_1D: 805848b8605Smrg array_mode = V_028C70_ARRAY_1D_TILED_THIN1; 806848b8605Smrg break; 807848b8605Smrg } 808b8e80941Smrg macro_aspect = tmp->surface.u.legacy.mtilea; 809b8e80941Smrg bankw = tmp->surface.u.legacy.bankw; 810b8e80941Smrg bankh = tmp->surface.u.legacy.bankh; 811848b8605Smrg tile_split = eg_tile_split(tile_split); 812848b8605Smrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 813848b8605Smrg bankw = eg_bank_wh(bankw); 814848b8605Smrg bankh = eg_bank_wh(bankh); 815848b8605Smrg fmask_bankh = eg_bank_wh(tmp->fmask.bank_height); 816848b8605Smrg 817848b8605Smrg /* 128 bit formats require tile type = 1 */ 818848b8605Smrg if (rscreen->b.chip_class == CAYMAN) { 819b8e80941Smrg if (util_format_get_blocksize(params->pipe_format) >= 16) 820848b8605Smrg non_disp_tiling = 1; 821848b8605Smrg } 822b8e80941Smrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 823b8e80941Smrg 824b8e80941Smrg 825b8e80941Smrg va = tmp->resource.gpu_address; 826b8e80941Smrg 827b8e80941Smrg /* array type views and views into array types need to use layer offset */ 828b8e80941Smrg dim = r600_tex_dim(tmp, params->target, texture->nr_samples); 829848b8605Smrg 830b8e80941Smrg if (dim == V_030000_SQ_TEX_DIM_1D_ARRAY) { 831848b8605Smrg height = 1; 832848b8605Smrg depth = texture->array_size; 833b8e80941Smrg } else if (dim == V_030000_SQ_TEX_DIM_2D_ARRAY || 834b8e80941Smrg dim == V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA) { 835848b8605Smrg depth = texture->array_size; 836b8e80941Smrg } else if (dim == V_030000_SQ_TEX_DIM_CUBEMAP) 837848b8605Smrg depth = texture->array_size / 6; 838848b8605Smrg 839b8e80941Smrg tex_resource_words[0] = (S_030000_DIM(dim) | 840b8e80941Smrg S_030000_PITCH((pitch / 8) - 1) | 841b8e80941Smrg S_030000_TEX_WIDTH(width - 1)); 842848b8605Smrg if (rscreen->b.chip_class == CAYMAN) 843b8e80941Smrg tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); 844848b8605Smrg else 845b8e80941Smrg tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); 846b8e80941Smrg tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) | 847848b8605Smrg S_030004_TEX_DEPTH(depth - 1) | 848848b8605Smrg S_030004_ARRAY_MODE(array_mode)); 849b8e80941Smrg tex_resource_words[2] = (surflevel[base_level].offset + va) >> 8; 850848b8605Smrg 851b8e80941Smrg *skip_mip_address_reloc = false; 852848b8605Smrg /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ 853848b8605Smrg if (texture->nr_samples > 1 && rscreen->has_compressed_msaa_texturing) { 854848b8605Smrg if (tmp->is_depth) { 855848b8605Smrg /* disable FMASK (0 = disabled) */ 856b8e80941Smrg tex_resource_words[3] = 0; 857b8e80941Smrg *skip_mip_address_reloc = true; 858848b8605Smrg } else { 859848b8605Smrg /* FMASK should be in MIP_ADDRESS for multisample textures */ 860b8e80941Smrg tex_resource_words[3] = (tmp->fmask.offset + va) >> 8; 861848b8605Smrg } 862848b8605Smrg } else if (last_level && texture->nr_samples <= 1) { 863b8e80941Smrg tex_resource_words[3] = (surflevel[1].offset + va) >> 8; 864848b8605Smrg } else { 865b8e80941Smrg tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8; 866848b8605Smrg } 867848b8605Smrg 868b8e80941Smrg last_layer = params->last_layer; 869b8e80941Smrg if (params->target != texture->target && depth == 1) { 870b8e80941Smrg last_layer = params->first_layer; 871b8e80941Smrg } 872b8e80941Smrg tex_resource_words[4] = (word4 | 873b8e80941Smrg S_030010_ENDIAN_SWAP(endian)); 874b8e80941Smrg tex_resource_words[5] = S_030014_BASE_ARRAY(params->first_layer) | 875b8e80941Smrg S_030014_LAST_ARRAY(last_layer); 876b8e80941Smrg tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split); 877848b8605Smrg 878848b8605Smrg if (texture->nr_samples > 1) { 879848b8605Smrg unsigned log_samples = util_logbase2(texture->nr_samples); 880848b8605Smrg if (rscreen->b.chip_class == CAYMAN) { 881b8e80941Smrg tex_resource_words[4] |= S_030010_LOG2_NUM_FRAGMENTS(log_samples); 882848b8605Smrg } 883848b8605Smrg /* LAST_LEVEL holds log2(nr_samples) for multisample textures */ 884b8e80941Smrg tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples); 885b8e80941Smrg tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh); 886848b8605Smrg } else { 887b8e80941Smrg bool no_mip = first_level == last_level; 888b8e80941Smrg 889b8e80941Smrg tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level); 890b8e80941Smrg tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level); 891848b8605Smrg /* aniso max 16 samples */ 892b8e80941Smrg tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(no_mip ? 0 : 4); 893848b8605Smrg } 894848b8605Smrg 895b8e80941Smrg tex_resource_words[7] = S_03001C_DATA_FORMAT(format) | 896848b8605Smrg S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE) | 897848b8605Smrg S_03001C_BANK_WIDTH(bankw) | 898848b8605Smrg S_03001C_BANK_HEIGHT(bankh) | 899848b8605Smrg S_03001C_MACRO_TILE_ASPECT(macro_aspect) | 900848b8605Smrg S_03001C_NUM_BANKS(nbanks) | 901b8e80941Smrg S_03001C_DEPTH_SAMPLE_ORDER(tmp->db_compatible); 902b8e80941Smrg return 0; 903b8e80941Smrg} 904b8e80941Smrg 905b8e80941Smrgstruct pipe_sampler_view * 906b8e80941Smrgevergreen_create_sampler_view_custom(struct pipe_context *ctx, 907b8e80941Smrg struct pipe_resource *texture, 908b8e80941Smrg const struct pipe_sampler_view *state, 909b8e80941Smrg unsigned width0, unsigned height0, 910b8e80941Smrg unsigned force_level) 911b8e80941Smrg{ 912b8e80941Smrg struct r600_context *rctx = (struct r600_context*)ctx; 913b8e80941Smrg struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); 914b8e80941Smrg struct r600_texture *tmp = (struct r600_texture*)texture; 915b8e80941Smrg struct eg_tex_res_params params; 916b8e80941Smrg int ret; 917b8e80941Smrg 918b8e80941Smrg if (!view) 919b8e80941Smrg return NULL; 920b8e80941Smrg 921b8e80941Smrg /* initialize base object */ 922b8e80941Smrg view->base = *state; 923b8e80941Smrg view->base.texture = NULL; 924b8e80941Smrg pipe_reference(NULL, &texture->reference); 925b8e80941Smrg view->base.texture = texture; 926b8e80941Smrg view->base.reference.count = 1; 927b8e80941Smrg view->base.context = ctx; 928b8e80941Smrg 929b8e80941Smrg if (state->target == PIPE_BUFFER) 930b8e80941Smrg return texture_buffer_sampler_view(rctx, view, width0, height0); 931b8e80941Smrg 932b8e80941Smrg memset(¶ms, 0, sizeof(params)); 933b8e80941Smrg params.pipe_format = state->format; 934b8e80941Smrg params.force_level = force_level; 935b8e80941Smrg params.width0 = width0; 936b8e80941Smrg params.height0 = height0; 937b8e80941Smrg params.first_level = state->u.tex.first_level; 938b8e80941Smrg params.last_level = state->u.tex.last_level; 939b8e80941Smrg params.first_layer = state->u.tex.first_layer; 940b8e80941Smrg params.last_layer = state->u.tex.last_layer; 941b8e80941Smrg params.target = state->target; 942b8e80941Smrg params.swizzle[0] = state->swizzle_r; 943b8e80941Smrg params.swizzle[1] = state->swizzle_g; 944b8e80941Smrg params.swizzle[2] = state->swizzle_b; 945b8e80941Smrg params.swizzle[3] = state->swizzle_a; 946b8e80941Smrg 947b8e80941Smrg ret = evergreen_fill_tex_resource_words(rctx, texture, ¶ms, 948b8e80941Smrg &view->skip_mip_address_reloc, 949b8e80941Smrg view->tex_resource_words); 950b8e80941Smrg if (ret != 0) { 951b8e80941Smrg FREE(view); 952b8e80941Smrg return NULL; 953b8e80941Smrg } 954b8e80941Smrg 955b8e80941Smrg if (state->format == PIPE_FORMAT_X24S8_UINT || 956b8e80941Smrg state->format == PIPE_FORMAT_S8X24_UINT || 957b8e80941Smrg state->format == PIPE_FORMAT_X32_S8X24_UINT || 958b8e80941Smrg state->format == PIPE_FORMAT_S8_UINT) 959b8e80941Smrg view->is_stencil_sampler = true; 960b8e80941Smrg 961b8e80941Smrg view->tex_resource = &tmp->resource; 962b8e80941Smrg 963848b8605Smrg return &view->base; 964848b8605Smrg} 965848b8605Smrg 966848b8605Smrgstatic struct pipe_sampler_view * 967848b8605Smrgevergreen_create_sampler_view(struct pipe_context *ctx, 968848b8605Smrg struct pipe_resource *tex, 969848b8605Smrg const struct pipe_sampler_view *state) 970848b8605Smrg{ 971848b8605Smrg return evergreen_create_sampler_view_custom(ctx, tex, state, 972848b8605Smrg tex->width0, tex->height0, 0); 973848b8605Smrg} 974848b8605Smrg 975b8e80941Smrgstatic void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) 976b8e80941Smrg{ 977b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 978b8e80941Smrg struct r600_config_state *a = (struct r600_config_state*)atom; 979b8e80941Smrg 980b8e80941Smrg radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); 981b8e80941Smrg if (a->dyn_gpr_enabled) { 982b8e80941Smrg radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs)); 983b8e80941Smrg radeon_emit(cs, 0); 984b8e80941Smrg radeon_emit(cs, 0); 985b8e80941Smrg } else { 986b8e80941Smrg radeon_emit(cs, a->sq_gpr_resource_mgmt_1); 987b8e80941Smrg radeon_emit(cs, a->sq_gpr_resource_mgmt_2); 988b8e80941Smrg radeon_emit(cs, a->sq_gpr_resource_mgmt_3); 989b8e80941Smrg } 990b8e80941Smrg radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8)); 991b8e80941Smrg if (a->dyn_gpr_enabled) { 992b8e80941Smrg radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 993b8e80941Smrg S_028838_PS_GPRS(0x1e) | 994b8e80941Smrg S_028838_VS_GPRS(0x1e) | 995b8e80941Smrg S_028838_GS_GPRS(0x1e) | 996b8e80941Smrg S_028838_ES_GPRS(0x1e) | 997b8e80941Smrg S_028838_HS_GPRS(0x1e) | 998b8e80941Smrg S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ 999b8e80941Smrg } 1000b8e80941Smrg} 1001b8e80941Smrg 1002848b8605Smrgstatic void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) 1003848b8605Smrg{ 1004b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 1005848b8605Smrg struct pipe_clip_state *state = &rctx->clip_state.state; 1006848b8605Smrg 1007b8e80941Smrg radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); 1008848b8605Smrg radeon_emit_array(cs, (unsigned*)state, 6*4); 1009848b8605Smrg} 1010848b8605Smrg 1011848b8605Smrgstatic void evergreen_set_polygon_stipple(struct pipe_context *ctx, 1012848b8605Smrg const struct pipe_poly_stipple *state) 1013848b8605Smrg{ 1014848b8605Smrg} 1015848b8605Smrg 1016848b8605Smrgstatic void evergreen_get_scissor_rect(struct r600_context *rctx, 1017848b8605Smrg unsigned tl_x, unsigned tl_y, unsigned br_x, unsigned br_y, 1018848b8605Smrg uint32_t *tl, uint32_t *br) 1019848b8605Smrg{ 1020b8e80941Smrg struct pipe_scissor_state scissor = {tl_x, tl_y, br_x, br_y}; 1021848b8605Smrg 1022b8e80941Smrg evergreen_apply_scissor_bug_workaround(&rctx->b, &scissor); 1023848b8605Smrg 1024b8e80941Smrg *tl = S_028240_TL_X(scissor.minx) | S_028240_TL_Y(scissor.miny); 1025b8e80941Smrg *br = S_028244_BR_X(scissor.maxx) | S_028244_BR_Y(scissor.maxy); 1026848b8605Smrg} 1027848b8605Smrg 1028b8e80941Smrgstruct r600_tex_color_info { 1029b8e80941Smrg unsigned info; 1030b8e80941Smrg unsigned view; 1031b8e80941Smrg unsigned dim; 1032b8e80941Smrg unsigned pitch; 1033b8e80941Smrg unsigned slice; 1034b8e80941Smrg unsigned attrib; 1035b8e80941Smrg unsigned ntype; 1036b8e80941Smrg unsigned fmask; 1037b8e80941Smrg unsigned fmask_slice; 1038b8e80941Smrg uint64_t offset; 1039b8e80941Smrg boolean export_16bpc; 1040b8e80941Smrg}; 1041848b8605Smrg 1042b8e80941Smrgstatic void evergreen_set_color_surface_buffer(struct r600_context *rctx, 1043b8e80941Smrg struct r600_resource *res, 1044b8e80941Smrg enum pipe_format pformat, 1045b8e80941Smrg unsigned first_element, 1046b8e80941Smrg unsigned last_element, 1047b8e80941Smrg struct r600_tex_color_info *color) 1048848b8605Smrg{ 1049b8e80941Smrg unsigned format, swap, ntype, endian; 1050b8e80941Smrg const struct util_format_description *desc; 1051b8e80941Smrg unsigned block_size = util_format_get_blocksize(res->b.b.format); 1052848b8605Smrg unsigned pitch_alignment = 1053b8e80941Smrg MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size); 1054b8e80941Smrg unsigned pitch = align(res->b.b.width0, pitch_alignment); 1055b8e80941Smrg int i; 1056b8e80941Smrg unsigned width_elements; 1057848b8605Smrg 1058b8e80941Smrg width_elements = last_element - first_element + 1; 1059848b8605Smrg 1060b8e80941Smrg format = r600_translate_colorformat(rctx->b.chip_class, pformat, FALSE); 1061b8e80941Smrg swap = r600_translate_colorswap(pformat, FALSE); 1062848b8605Smrg 1063b8e80941Smrg endian = r600_colorformat_endian_swap(format, FALSE); 1064848b8605Smrg 1065b8e80941Smrg desc = util_format_description(pformat); 1066b8e80941Smrg for (i = 0; i < 4; i++) { 1067b8e80941Smrg if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1068b8e80941Smrg break; 1069b8e80941Smrg } 1070b8e80941Smrg } 1071b8e80941Smrg ntype = V_028C70_NUMBER_UNORM; 1072b8e80941Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1073b8e80941Smrg ntype = V_028C70_NUMBER_SRGB; 1074b8e80941Smrg else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1075b8e80941Smrg if (desc->channel[i].normalized) 1076b8e80941Smrg ntype = V_028C70_NUMBER_SNORM; 1077b8e80941Smrg else if (desc->channel[i].pure_integer) 1078b8e80941Smrg ntype = V_028C70_NUMBER_SINT; 1079b8e80941Smrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1080b8e80941Smrg if (desc->channel[i].normalized) 1081b8e80941Smrg ntype = V_028C70_NUMBER_UNORM; 1082b8e80941Smrg else if (desc->channel[i].pure_integer) 1083b8e80941Smrg ntype = V_028C70_NUMBER_UINT; 1084b8e80941Smrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1085b8e80941Smrg ntype = V_028C70_NUMBER_FLOAT; 1086b8e80941Smrg } 1087848b8605Smrg 1088b8e80941Smrg pitch = (pitch / 8) - 1; 1089b8e80941Smrg color->pitch = S_028C64_PITCH_TILE_MAX(pitch); 1090b8e80941Smrg 1091b8e80941Smrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED); 1092b8e80941Smrg color->info |= S_028C70_FORMAT(format) | 1093b8e80941Smrg S_028C70_COMP_SWAP(swap) | 1094b8e80941Smrg S_028C70_BLEND_CLAMP(0) | 1095b8e80941Smrg S_028C70_BLEND_BYPASS(1) | 1096b8e80941Smrg S_028C70_NUMBER_TYPE(ntype) | 1097b8e80941Smrg S_028C70_ENDIAN(endian); 1098b8e80941Smrg color->attrib = S_028C74_NON_DISP_TILING_ORDER(1); 1099b8e80941Smrg color->ntype = ntype; 1100b8e80941Smrg color->export_16bpc = false; 1101b8e80941Smrg color->dim = width_elements - 1; 1102b8e80941Smrg color->slice = 0; /* (width_elements / 64) - 1;*/ 1103b8e80941Smrg color->view = 0; 1104b8e80941Smrg color->offset = (res->gpu_address + first_element) >> 8; 1105b8e80941Smrg 1106b8e80941Smrg color->fmask = color->offset; 1107b8e80941Smrg color->fmask_slice = 0; 1108848b8605Smrg} 1109848b8605Smrg 1110b8e80941Smrgstatic void evergreen_set_color_surface_common(struct r600_context *rctx, 1111b8e80941Smrg struct r600_texture *rtex, 1112b8e80941Smrg unsigned level, 1113b8e80941Smrg unsigned first_layer, 1114b8e80941Smrg unsigned last_layer, 1115b8e80941Smrg enum pipe_format pformat, 1116b8e80941Smrg struct r600_tex_color_info *color) 1117848b8605Smrg{ 1118848b8605Smrg struct r600_screen *rscreen = rctx->screen; 1119848b8605Smrg unsigned pitch, slice; 1120848b8605Smrg unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks; 1121b8e80941Smrg unsigned format, swap, ntype, endian; 1122848b8605Smrg const struct util_format_description *desc; 1123b8e80941Smrg bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE; 1124848b8605Smrg int i; 1125b8e80941Smrg 1126b8e80941Smrg color->offset = rtex->surface.u.legacy.level[level].offset; 1127b8e80941Smrg color->view = S_028C6C_SLICE_START(first_layer) | 1128b8e80941Smrg S_028C6C_SLICE_MAX(last_layer); 1129b8e80941Smrg 1130b8e80941Smrg color->offset += rtex->resource.gpu_address; 1131b8e80941Smrg color->offset >>= 8; 1132b8e80941Smrg 1133b8e80941Smrg color->dim = 0; 1134b8e80941Smrg pitch = (rtex->surface.u.legacy.level[level].nblk_x) / 8 - 1; 1135b8e80941Smrg slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64; 1136848b8605Smrg if (slice) { 1137848b8605Smrg slice = slice - 1; 1138848b8605Smrg } 1139b8e80941Smrg 1140b8e80941Smrg color->info = 0; 1141b8e80941Smrg switch (rtex->surface.u.legacy.level[level].mode) { 1142b8e80941Smrg default: 1143848b8605Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1144b8e80941Smrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED); 1145848b8605Smrg non_disp_tiling = 1; 1146848b8605Smrg break; 1147848b8605Smrg case RADEON_SURF_MODE_1D: 1148b8e80941Smrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1); 1149848b8605Smrg non_disp_tiling = rtex->non_disp_tiling; 1150848b8605Smrg break; 1151848b8605Smrg case RADEON_SURF_MODE_2D: 1152b8e80941Smrg color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1); 1153848b8605Smrg non_disp_tiling = rtex->non_disp_tiling; 1154848b8605Smrg break; 1155848b8605Smrg } 1156b8e80941Smrg tile_split = rtex->surface.u.legacy.tile_split; 1157b8e80941Smrg macro_aspect = rtex->surface.u.legacy.mtilea; 1158b8e80941Smrg bankw = rtex->surface.u.legacy.bankw; 1159b8e80941Smrg bankh = rtex->surface.u.legacy.bankh; 1160b8e80941Smrg if (rtex->fmask.size) 1161b8e80941Smrg fmask_bankh = rtex->fmask.bank_height; 1162b8e80941Smrg else 1163b8e80941Smrg fmask_bankh = rtex->surface.u.legacy.bankh; 1164848b8605Smrg tile_split = eg_tile_split(tile_split); 1165848b8605Smrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 1166848b8605Smrg bankw = eg_bank_wh(bankw); 1167848b8605Smrg bankh = eg_bank_wh(bankh); 1168848b8605Smrg fmask_bankh = eg_bank_wh(fmask_bankh); 1169848b8605Smrg 1170848b8605Smrg if (rscreen->b.chip_class == CAYMAN) { 1171b8e80941Smrg if (util_format_get_blocksize(pformat) >= 16) 1172848b8605Smrg non_disp_tiling = 1; 1173848b8605Smrg } 1174b8e80941Smrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 1175b8e80941Smrg desc = util_format_description(pformat); 1176848b8605Smrg for (i = 0; i < 4; i++) { 1177848b8605Smrg if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1178848b8605Smrg break; 1179848b8605Smrg } 1180848b8605Smrg } 1181b8e80941Smrg color->attrib = S_028C74_TILE_SPLIT(tile_split)| 1182b8e80941Smrg S_028C74_NUM_BANKS(nbanks) | 1183b8e80941Smrg S_028C74_BANK_WIDTH(bankw) | 1184b8e80941Smrg S_028C74_BANK_HEIGHT(bankh) | 1185b8e80941Smrg S_028C74_MACRO_TILE_ASPECT(macro_aspect) | 1186b8e80941Smrg S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) | 1187b8e80941Smrg S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1188848b8605Smrg 1189848b8605Smrg if (rctx->b.chip_class == CAYMAN) { 1190b8e80941Smrg color->attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == 1191b8e80941Smrg PIPE_SWIZZLE_1); 1192848b8605Smrg 1193848b8605Smrg if (rtex->resource.b.b.nr_samples > 1) { 1194848b8605Smrg unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1195b8e80941Smrg color->attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1196848b8605Smrg S_028C74_NUM_FRAGMENTS(log_samples); 1197848b8605Smrg } 1198848b8605Smrg } 1199848b8605Smrg 1200848b8605Smrg ntype = V_028C70_NUMBER_UNORM; 1201848b8605Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1202848b8605Smrg ntype = V_028C70_NUMBER_SRGB; 1203848b8605Smrg else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1204848b8605Smrg if (desc->channel[i].normalized) 1205848b8605Smrg ntype = V_028C70_NUMBER_SNORM; 1206848b8605Smrg else if (desc->channel[i].pure_integer) 1207848b8605Smrg ntype = V_028C70_NUMBER_SINT; 1208848b8605Smrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1209848b8605Smrg if (desc->channel[i].normalized) 1210848b8605Smrg ntype = V_028C70_NUMBER_UNORM; 1211848b8605Smrg else if (desc->channel[i].pure_integer) 1212848b8605Smrg ntype = V_028C70_NUMBER_UINT; 1213b8e80941Smrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1214b8e80941Smrg ntype = V_028C70_NUMBER_FLOAT; 1215848b8605Smrg } 1216848b8605Smrg 1217b8e80941Smrg if (R600_BIG_ENDIAN) 1218b8e80941Smrg do_endian_swap = !rtex->db_compatible; 1219848b8605Smrg 1220b8e80941Smrg format = r600_translate_colorformat(rctx->b.chip_class, pformat, do_endian_swap); 1221b8e80941Smrg assert(format != ~0); 1222b8e80941Smrg swap = r600_translate_colorswap(pformat, do_endian_swap); 1223848b8605Smrg assert(swap != ~0); 1224848b8605Smrg 1225b8e80941Smrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 1226848b8605Smrg 1227848b8605Smrg /* blend clamp should be set for all NORM/SRGB types */ 1228848b8605Smrg if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 1229848b8605Smrg ntype == V_028C70_NUMBER_SRGB) 1230848b8605Smrg blend_clamp = 1; 1231848b8605Smrg 1232848b8605Smrg /* set blend bypass according to docs if SINT/UINT or 1233848b8605Smrg 8/24 COLOR variants */ 1234848b8605Smrg if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1235848b8605Smrg format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1236848b8605Smrg format == V_028C70_COLOR_X24_8_32_FLOAT) { 1237848b8605Smrg blend_clamp = 0; 1238848b8605Smrg blend_bypass = 1; 1239848b8605Smrg } 1240848b8605Smrg 1241b8e80941Smrg color->ntype = ntype; 1242b8e80941Smrg color->info |= S_028C70_FORMAT(format) | 1243848b8605Smrg S_028C70_COMP_SWAP(swap) | 1244848b8605Smrg S_028C70_BLEND_CLAMP(blend_clamp) | 1245848b8605Smrg S_028C70_BLEND_BYPASS(blend_bypass) | 1246b8e80941Smrg S_028C70_SIMPLE_FLOAT(1) | 1247848b8605Smrg S_028C70_NUMBER_TYPE(ntype) | 1248848b8605Smrg S_028C70_ENDIAN(endian); 1249848b8605Smrg 1250b8e80941Smrg if (rtex->fmask.size) { 1251b8e80941Smrg color->info |= S_028C70_COMPRESSION(1); 1252b8e80941Smrg } 1253b8e80941Smrg 1254848b8605Smrg /* EXPORT_NORM is an optimzation that can be enabled for better 1255848b8605Smrg * performance in certain cases. 1256848b8605Smrg * EXPORT_NORM can be enabled if: 1257848b8605Smrg * - 11-bit or smaller UNORM/SNORM/SRGB 1258848b8605Smrg * - 16-bit or smaller FLOAT 1259848b8605Smrg */ 1260b8e80941Smrg color->export_16bpc = false; 1261848b8605Smrg if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && 1262848b8605Smrg ((desc->channel[i].size < 12 && 1263848b8605Smrg desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && 1264848b8605Smrg ntype != V_028C70_NUMBER_UINT && ntype != V_028C70_NUMBER_SINT) || 1265848b8605Smrg (desc->channel[i].size < 17 && 1266848b8605Smrg desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) { 1267b8e80941Smrg color->info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); 1268b8e80941Smrg color->export_16bpc = true; 1269848b8605Smrg } 1270848b8605Smrg 1271b8e80941Smrg color->pitch = S_028C64_PITCH_TILE_MAX(pitch); 1272b8e80941Smrg color->slice = S_028C68_SLICE_TILE_MAX(slice); 1273b8e80941Smrg 1274848b8605Smrg if (rtex->fmask.size) { 1275b8e80941Smrg color->fmask = (rtex->resource.gpu_address + rtex->fmask.offset) >> 8; 1276b8e80941Smrg color->fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1277b8e80941Smrg } else { 1278b8e80941Smrg color->fmask = color->offset; 1279b8e80941Smrg color->fmask_slice = S_028C88_TILE_MAX(slice); 1280848b8605Smrg } 1281b8e80941Smrg} 1282b8e80941Smrg 1283b8e80941Smrg/** 1284b8e80941Smrg * This function intializes the CB* register values for RATs. It is meant 1285b8e80941Smrg * to be used for 1D aligned buffers that do not have an associated 1286b8e80941Smrg * radeon_surf. 1287b8e80941Smrg */ 1288b8e80941Smrgvoid evergreen_init_color_surface_rat(struct r600_context *rctx, 1289b8e80941Smrg struct r600_surface *surf) 1290b8e80941Smrg{ 1291b8e80941Smrg struct pipe_resource *pipe_buffer = surf->base.texture; 1292b8e80941Smrg struct r600_tex_color_info color; 1293b8e80941Smrg 1294b8e80941Smrg evergreen_set_color_surface_buffer(rctx, (struct r600_resource *)surf->base.texture, 1295b8e80941Smrg surf->base.format, 0, pipe_buffer->width0, 1296b8e80941Smrg &color); 1297b8e80941Smrg 1298b8e80941Smrg surf->cb_color_base = color.offset; 1299b8e80941Smrg surf->cb_color_dim = color.dim; 1300b8e80941Smrg surf->cb_color_info = color.info | S_028C70_RAT(1); 1301b8e80941Smrg surf->cb_color_pitch = color.pitch; 1302b8e80941Smrg surf->cb_color_slice = color.slice; 1303b8e80941Smrg surf->cb_color_view = color.view; 1304b8e80941Smrg surf->cb_color_attrib = color.attrib; 1305b8e80941Smrg surf->cb_color_fmask = color.fmask; 1306b8e80941Smrg surf->cb_color_fmask_slice = color.fmask_slice; 1307b8e80941Smrg 1308b8e80941Smrg surf->cb_color_view = 0; 1309848b8605Smrg 1310b8e80941Smrg /* Set the buffer range the GPU will have access to: */ 1311b8e80941Smrg util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range, 1312b8e80941Smrg 0, pipe_buffer->width0); 1313b8e80941Smrg} 1314b8e80941Smrg 1315b8e80941Smrg 1316b8e80941Smrgvoid evergreen_init_color_surface(struct r600_context *rctx, 1317b8e80941Smrg struct r600_surface *surf) 1318b8e80941Smrg{ 1319b8e80941Smrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1320b8e80941Smrg unsigned level = surf->base.u.tex.level; 1321b8e80941Smrg struct r600_tex_color_info color; 1322b8e80941Smrg 1323b8e80941Smrg evergreen_set_color_surface_common(rctx, rtex, level, 1324b8e80941Smrg surf->base.u.tex.first_layer, 1325b8e80941Smrg surf->base.u.tex.last_layer, 1326b8e80941Smrg surf->base.format, 1327b8e80941Smrg &color); 1328b8e80941Smrg 1329b8e80941Smrg surf->alphatest_bypass = color.ntype == V_028C70_NUMBER_UINT || 1330b8e80941Smrg color.ntype == V_028C70_NUMBER_SINT; 1331b8e80941Smrg surf->export_16bpc = color.export_16bpc; 1332848b8605Smrg 1333848b8605Smrg /* XXX handle enabling of CB beyond BASE8 which has different offset */ 1334b8e80941Smrg surf->cb_color_base = color.offset; 1335b8e80941Smrg surf->cb_color_dim = color.dim; 1336b8e80941Smrg surf->cb_color_info = color.info; 1337b8e80941Smrg surf->cb_color_pitch = color.pitch; 1338b8e80941Smrg surf->cb_color_slice = color.slice; 1339b8e80941Smrg surf->cb_color_view = color.view; 1340b8e80941Smrg surf->cb_color_attrib = color.attrib; 1341b8e80941Smrg surf->cb_color_fmask = color.fmask; 1342b8e80941Smrg surf->cb_color_fmask_slice = color.fmask_slice; 1343848b8605Smrg 1344848b8605Smrg surf->color_initialized = true; 1345848b8605Smrg} 1346848b8605Smrg 1347848b8605Smrgstatic void evergreen_init_depth_surface(struct r600_context *rctx, 1348848b8605Smrg struct r600_surface *surf) 1349848b8605Smrg{ 1350848b8605Smrg struct r600_screen *rscreen = rctx->screen; 1351848b8605Smrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1352b8e80941Smrg unsigned level = surf->base.u.tex.level; 1353b8e80941Smrg struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level]; 1354848b8605Smrg uint64_t offset; 1355b8e80941Smrg unsigned format, array_mode; 1356848b8605Smrg unsigned macro_aspect, tile_split, bankh, bankw, nbanks; 1357848b8605Smrg 1358b8e80941Smrg 1359848b8605Smrg format = r600_translate_dbformat(surf->base.format); 1360848b8605Smrg assert(format != ~0); 1361848b8605Smrg 1362848b8605Smrg offset = rtex->resource.gpu_address; 1363b8e80941Smrg offset += rtex->surface.u.legacy.level[level].offset; 1364b8e80941Smrg 1365b8e80941Smrg switch (rtex->surface.u.legacy.level[level].mode) { 1366848b8605Smrg case RADEON_SURF_MODE_2D: 1367848b8605Smrg array_mode = V_028C70_ARRAY_2D_TILED_THIN1; 1368848b8605Smrg break; 1369848b8605Smrg case RADEON_SURF_MODE_1D: 1370848b8605Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1371848b8605Smrg default: 1372848b8605Smrg array_mode = V_028C70_ARRAY_1D_TILED_THIN1; 1373848b8605Smrg break; 1374848b8605Smrg } 1375b8e80941Smrg tile_split = rtex->surface.u.legacy.tile_split; 1376b8e80941Smrg macro_aspect = rtex->surface.u.legacy.mtilea; 1377b8e80941Smrg bankw = rtex->surface.u.legacy.bankw; 1378b8e80941Smrg bankh = rtex->surface.u.legacy.bankh; 1379848b8605Smrg tile_split = eg_tile_split(tile_split); 1380848b8605Smrg macro_aspect = eg_macro_tile_aspect(macro_aspect); 1381848b8605Smrg bankw = eg_bank_wh(bankw); 1382848b8605Smrg bankh = eg_bank_wh(bankh); 1383b8e80941Smrg nbanks = eg_num_banks(rscreen->b.info.r600_num_banks); 1384848b8605Smrg offset >>= 8; 1385848b8605Smrg 1386848b8605Smrg surf->db_z_info = S_028040_ARRAY_MODE(array_mode) | 1387848b8605Smrg S_028040_FORMAT(format) | 1388848b8605Smrg S_028040_TILE_SPLIT(tile_split)| 1389848b8605Smrg S_028040_NUM_BANKS(nbanks) | 1390848b8605Smrg S_028040_BANK_WIDTH(bankw) | 1391848b8605Smrg S_028040_BANK_HEIGHT(bankh) | 1392848b8605Smrg S_028040_MACRO_TILE_ASPECT(macro_aspect); 1393848b8605Smrg if (rscreen->b.chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) { 1394848b8605Smrg surf->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1395848b8605Smrg } 1396b8e80941Smrg 1397b8e80941Smrg assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 1398b8e80941Smrg 1399848b8605Smrg surf->db_depth_base = offset; 1400848b8605Smrg surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1401848b8605Smrg S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 1402b8e80941Smrg surf->db_depth_size = S_028058_PITCH_TILE_MAX(levelinfo->nblk_x / 8 - 1) | 1403b8e80941Smrg S_028058_HEIGHT_TILE_MAX(levelinfo->nblk_y / 8 - 1); 1404b8e80941Smrg surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(levelinfo->nblk_x * 1405b8e80941Smrg levelinfo->nblk_y / 64 - 1); 1406848b8605Smrg 1407b8e80941Smrg if (rtex->surface.has_stencil) { 1408848b8605Smrg uint64_t stencil_offset; 1409b8e80941Smrg unsigned stile_split = rtex->surface.u.legacy.stencil_tile_split; 1410848b8605Smrg 1411848b8605Smrg stile_split = eg_tile_split(stile_split); 1412848b8605Smrg 1413b8e80941Smrg stencil_offset = rtex->surface.u.legacy.stencil_level[level].offset; 1414848b8605Smrg stencil_offset += rtex->resource.gpu_address; 1415848b8605Smrg 1416848b8605Smrg surf->db_stencil_base = stencil_offset >> 8; 1417848b8605Smrg surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) | 1418848b8605Smrg S_028044_TILE_SPLIT(stile_split); 1419848b8605Smrg } else { 1420848b8605Smrg surf->db_stencil_base = offset; 1421848b8605Smrg /* DRM 2.6.18 allows the INVALID format to disable stencil. 1422848b8605Smrg * Older kernels are out of luck. */ 1423848b8605Smrg surf->db_stencil_info = rctx->screen->b.info.drm_minor >= 18 ? 1424848b8605Smrg S_028044_FORMAT(V_028044_STENCIL_INVALID) : 1425848b8605Smrg S_028044_FORMAT(V_028044_STENCIL_8); 1426848b8605Smrg } 1427848b8605Smrg 1428b8e80941Smrg if (r600_htile_enabled(rtex, level)) { 1429b8e80941Smrg uint64_t va = rtex->resource.gpu_address + rtex->htile_offset; 1430848b8605Smrg surf->db_htile_data_base = va >> 8; 1431848b8605Smrg surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) | 1432b8e80941Smrg S_028ABC_HTILE_HEIGHT(1) | 1433b8e80941Smrg S_028ABC_FULL_CACHE(1); 1434848b8605Smrg surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); 1435848b8605Smrg surf->db_preload_control = 0; 1436848b8605Smrg } 1437848b8605Smrg 1438848b8605Smrg surf->depth_initialized = true; 1439848b8605Smrg} 1440848b8605Smrg 1441848b8605Smrgstatic void evergreen_set_framebuffer_state(struct pipe_context *ctx, 1442848b8605Smrg const struct pipe_framebuffer_state *state) 1443848b8605Smrg{ 1444848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 1445848b8605Smrg struct r600_surface *surf; 1446848b8605Smrg struct r600_texture *rtex; 1447848b8605Smrg uint32_t i, log_samples; 1448b8e80941Smrg uint32_t target_mask = 0; 1449b8e80941Smrg /* Flush TC when changing the framebuffer state, because the only 1450b8e80941Smrg * client not using TC that can change textures is the framebuffer. 1451b8e80941Smrg * Other places don't typically have to flush TC. 1452b8e80941Smrg */ 1453b8e80941Smrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | 1454b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV | 1455b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV_CB | 1456b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV_CB_META | 1457b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV_DB | 1458b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV_DB_META | 1459b8e80941Smrg R600_CONTEXT_INV_TEX_CACHE; 1460848b8605Smrg 1461848b8605Smrg util_copy_framebuffer_state(&rctx->framebuffer.state, state); 1462848b8605Smrg 1463848b8605Smrg /* Colorbuffers. */ 1464848b8605Smrg rctx->framebuffer.export_16bpc = state->nr_cbufs != 0; 1465848b8605Smrg rctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 1466848b8605Smrg util_format_is_pure_integer(state->cbufs[0]->format); 1467848b8605Smrg rctx->framebuffer.compressed_cb_mask = 0; 1468848b8605Smrg rctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 1469848b8605Smrg 1470848b8605Smrg for (i = 0; i < state->nr_cbufs; i++) { 1471848b8605Smrg surf = (struct r600_surface*)state->cbufs[i]; 1472848b8605Smrg if (!surf) 1473848b8605Smrg continue; 1474848b8605Smrg 1475b8e80941Smrg target_mask |= (0xf << (i * 4)); 1476b8e80941Smrg 1477848b8605Smrg rtex = (struct r600_texture*)surf->base.texture; 1478848b8605Smrg 1479848b8605Smrg r600_context_add_resource_size(ctx, state->cbufs[i]->texture); 1480848b8605Smrg 1481848b8605Smrg if (!surf->color_initialized) { 1482848b8605Smrg evergreen_init_color_surface(rctx, surf); 1483848b8605Smrg } 1484848b8605Smrg 1485848b8605Smrg if (!surf->export_16bpc) { 1486848b8605Smrg rctx->framebuffer.export_16bpc = false; 1487848b8605Smrg } 1488848b8605Smrg 1489b8e80941Smrg if (rtex->fmask.size) { 1490848b8605Smrg rctx->framebuffer.compressed_cb_mask |= 1 << i; 1491848b8605Smrg } 1492848b8605Smrg } 1493848b8605Smrg 1494848b8605Smrg /* Update alpha-test state dependencies. 1495848b8605Smrg * Alpha-test is done on the first colorbuffer only. */ 1496848b8605Smrg if (state->nr_cbufs) { 1497848b8605Smrg bool alphatest_bypass = false; 1498848b8605Smrg bool export_16bpc = true; 1499848b8605Smrg 1500848b8605Smrg surf = (struct r600_surface*)state->cbufs[0]; 1501848b8605Smrg if (surf) { 1502848b8605Smrg alphatest_bypass = surf->alphatest_bypass; 1503848b8605Smrg export_16bpc = surf->export_16bpc; 1504848b8605Smrg } 1505848b8605Smrg 1506848b8605Smrg if (rctx->alphatest_state.bypass != alphatest_bypass) { 1507848b8605Smrg rctx->alphatest_state.bypass = alphatest_bypass; 1508b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1509848b8605Smrg } 1510848b8605Smrg if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) { 1511848b8605Smrg rctx->alphatest_state.cb0_export_16bpc = export_16bpc; 1512b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1513848b8605Smrg } 1514848b8605Smrg } 1515848b8605Smrg 1516848b8605Smrg /* ZS buffer. */ 1517848b8605Smrg if (state->zsbuf) { 1518848b8605Smrg surf = (struct r600_surface*)state->zsbuf; 1519848b8605Smrg 1520848b8605Smrg r600_context_add_resource_size(ctx, state->zsbuf->texture); 1521848b8605Smrg 1522848b8605Smrg if (!surf->depth_initialized) { 1523848b8605Smrg evergreen_init_depth_surface(rctx, surf); 1524848b8605Smrg } 1525848b8605Smrg 1526848b8605Smrg if (state->zsbuf->format != rctx->poly_offset_state.zs_format) { 1527848b8605Smrg rctx->poly_offset_state.zs_format = state->zsbuf->format; 1528b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); 1529848b8605Smrg } 1530848b8605Smrg 1531848b8605Smrg if (rctx->db_state.rsurf != surf) { 1532848b8605Smrg rctx->db_state.rsurf = surf; 1533b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 1534b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1535848b8605Smrg } 1536848b8605Smrg } else if (rctx->db_state.rsurf) { 1537848b8605Smrg rctx->db_state.rsurf = NULL; 1538b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 1539b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1540848b8605Smrg } 1541848b8605Smrg 1542b8e80941Smrg if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs || 1543b8e80941Smrg rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) { 1544b8e80941Smrg rctx->cb_misc_state.bound_cbufs_target_mask = target_mask; 1545848b8605Smrg rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; 1546b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 1547848b8605Smrg } 1548848b8605Smrg 1549848b8605Smrg if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) { 1550848b8605Smrg rctx->alphatest_state.bypass = false; 1551b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1552848b8605Smrg } 1553848b8605Smrg 1554848b8605Smrg log_samples = util_logbase2(rctx->framebuffer.nr_samples); 1555848b8605Smrg /* This is for Cayman to program SAMPLE_RATE, and for RV770 to fix a hw bug. */ 1556848b8605Smrg if ((rctx->b.chip_class == CAYMAN || 1557848b8605Smrg rctx->b.family == CHIP_RV770) && 1558848b8605Smrg rctx->db_misc_state.log_samples != log_samples) { 1559848b8605Smrg rctx->db_misc_state.log_samples = log_samples; 1560b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1561848b8605Smrg } 1562848b8605Smrg 1563848b8605Smrg 1564848b8605Smrg /* Calculate the CS size. */ 1565848b8605Smrg rctx->framebuffer.atom.num_dw = 4; /* SCISSOR */ 1566848b8605Smrg 1567848b8605Smrg /* MSAA. */ 1568848b8605Smrg if (rctx->b.chip_class == EVERGREEN) 1569b8e80941Smrg rctx->framebuffer.atom.num_dw += 17; /* Evergreen */ 1570848b8605Smrg else 1571848b8605Smrg rctx->framebuffer.atom.num_dw += 28; /* Cayman */ 1572848b8605Smrg 1573848b8605Smrg /* Colorbuffers. */ 1574848b8605Smrg rctx->framebuffer.atom.num_dw += state->nr_cbufs * 23; 1575b8e80941Smrg rctx->framebuffer.atom.num_dw += state->nr_cbufs * 2; 1576848b8605Smrg rctx->framebuffer.atom.num_dw += (12 - state->nr_cbufs) * 3; 1577848b8605Smrg 1578848b8605Smrg /* ZS buffer. */ 1579848b8605Smrg if (state->zsbuf) { 1580848b8605Smrg rctx->framebuffer.atom.num_dw += 24; 1581b8e80941Smrg rctx->framebuffer.atom.num_dw += 2; 1582848b8605Smrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1583848b8605Smrg rctx->framebuffer.atom.num_dw += 4; 1584848b8605Smrg } 1585848b8605Smrg 1586b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 1587b8e80941Smrg 1588b8e80941Smrg r600_set_sample_locations_constant_buffer(rctx); 1589b8e80941Smrg rctx->framebuffer.do_update_surf_dirtiness = true; 1590848b8605Smrg} 1591848b8605Smrg 1592b8e80941Smrgstatic void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 1593b8e80941Smrg{ 1594b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 1595848b8605Smrg 1596b8e80941Smrg if (rctx->ps_iter_samples == min_samples) 1597b8e80941Smrg return; 1598b8e80941Smrg 1599b8e80941Smrg rctx->ps_iter_samples = min_samples; 1600b8e80941Smrg if (rctx->framebuffer.nr_samples > 1) { 1601b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 1602b8e80941Smrg } 1603b8e80941Smrg} 1604b8e80941Smrg 1605b8e80941Smrg/* 8xMSAA */ 1606b8e80941Smrgstatic const uint32_t sample_locs_8x[] = { 1607b8e80941Smrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1608848b8605Smrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1609848b8605Smrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1610848b8605Smrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1611848b8605Smrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1612848b8605Smrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1613848b8605Smrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1614848b8605Smrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1615848b8605Smrg}; 1616848b8605Smrgstatic unsigned max_dist_8x = 7; 1617848b8605Smrg 1618848b8605Smrgstatic void evergreen_get_sample_position(struct pipe_context *ctx, 1619848b8605Smrg unsigned sample_count, 1620848b8605Smrg unsigned sample_index, 1621848b8605Smrg float *out_value) 1622848b8605Smrg{ 1623848b8605Smrg int offset, index; 1624848b8605Smrg struct { 1625848b8605Smrg int idx:4; 1626848b8605Smrg } val; 1627848b8605Smrg switch (sample_count) { 1628848b8605Smrg case 1: 1629848b8605Smrg default: 1630848b8605Smrg out_value[0] = out_value[1] = 0.5; 1631848b8605Smrg break; 1632848b8605Smrg case 2: 1633848b8605Smrg offset = 4 * (sample_index * 2); 1634848b8605Smrg val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; 1635848b8605Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1636848b8605Smrg val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; 1637848b8605Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1638848b8605Smrg break; 1639848b8605Smrg case 4: 1640848b8605Smrg offset = 4 * (sample_index * 2); 1641848b8605Smrg val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; 1642848b8605Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1643848b8605Smrg val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; 1644848b8605Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1645848b8605Smrg break; 1646848b8605Smrg case 8: 1647848b8605Smrg offset = 4 * (sample_index % 4 * 2); 1648848b8605Smrg index = (sample_index / 4); 1649848b8605Smrg val.idx = (sample_locs_8x[index] >> offset) & 0xf; 1650848b8605Smrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1651848b8605Smrg val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf; 1652848b8605Smrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1653848b8605Smrg break; 1654848b8605Smrg } 1655848b8605Smrg} 1656848b8605Smrg 1657b8e80941Smrgstatic void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) 1658848b8605Smrg{ 1659848b8605Smrg 1660b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 1661848b8605Smrg unsigned max_dist = 0; 1662848b8605Smrg 1663848b8605Smrg switch (nr_samples) { 1664848b8605Smrg default: 1665848b8605Smrg nr_samples = 0; 1666848b8605Smrg break; 1667848b8605Smrg case 2: 1668b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_2x)); 1669b8e80941Smrg radeon_emit_array(cs, eg_sample_locs_2x, ARRAY_SIZE(eg_sample_locs_2x)); 1670848b8605Smrg max_dist = eg_max_dist_2x; 1671848b8605Smrg break; 1672848b8605Smrg case 4: 1673b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_4x)); 1674b8e80941Smrg radeon_emit_array(cs, eg_sample_locs_4x, ARRAY_SIZE(eg_sample_locs_4x)); 1675848b8605Smrg max_dist = eg_max_dist_4x; 1676848b8605Smrg break; 1677848b8605Smrg case 8: 1678b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(sample_locs_8x)); 1679b8e80941Smrg radeon_emit_array(cs, sample_locs_8x, ARRAY_SIZE(sample_locs_8x)); 1680848b8605Smrg max_dist = max_dist_8x; 1681848b8605Smrg break; 1682848b8605Smrg } 1683848b8605Smrg 1684848b8605Smrg if (nr_samples > 1) { 1685b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1686848b8605Smrg radeon_emit(cs, S_028C00_LAST_PIXEL(1) | 1687848b8605Smrg S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1688848b8605Smrg radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) | 1689848b8605Smrg S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */ 1690b8e80941Smrg radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 1691b8e80941Smrg EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | 1692b8e80941Smrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 1693b8e80941Smrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1)); 1694848b8605Smrg } else { 1695b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1696848b8605Smrg radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1697848b8605Smrg radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */ 1698b8e80941Smrg radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 1699b8e80941Smrg EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 1700b8e80941Smrg EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1)); 1701b8e80941Smrg } 1702b8e80941Smrg} 1703b8e80941Smrg 1704b8e80941Smrgstatic void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom, 1705b8e80941Smrg int immed_id_base, int res_id_base, int offset, uint32_t pkt_flags) 1706b8e80941Smrg{ 1707b8e80941Smrg struct r600_image_state *state = (struct r600_image_state *)atom; 1708b8e80941Smrg struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; 1709b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 1710b8e80941Smrg struct r600_texture *rtex; 1711b8e80941Smrg struct r600_resource *resource; 1712b8e80941Smrg int i; 1713b8e80941Smrg 1714b8e80941Smrg for (i = 0; i < R600_MAX_IMAGES; i++) { 1715b8e80941Smrg struct r600_image_view *image = &state->views[i]; 1716b8e80941Smrg unsigned reloc, immed_reloc; 1717b8e80941Smrg int idx = i + offset; 1718b8e80941Smrg 1719b8e80941Smrg if (!pkt_flags) 1720b8e80941Smrg idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0); 1721b8e80941Smrg if (!image->base.resource) 1722b8e80941Smrg continue; 1723b8e80941Smrg 1724b8e80941Smrg resource = (struct r600_resource *)image->base.resource; 1725b8e80941Smrg if (resource->b.b.target != PIPE_BUFFER) 1726b8e80941Smrg rtex = (struct r600_texture *)image->base.resource; 1727b8e80941Smrg else 1728b8e80941Smrg rtex = NULL; 1729b8e80941Smrg 1730b8e80941Smrg reloc = radeon_add_to_buffer_list(&rctx->b, 1731b8e80941Smrg &rctx->b.gfx, 1732b8e80941Smrg resource, 1733b8e80941Smrg RADEON_USAGE_READWRITE, 1734b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 1735b8e80941Smrg 1736b8e80941Smrg immed_reloc = radeon_add_to_buffer_list(&rctx->b, 1737b8e80941Smrg &rctx->b.gfx, 1738b8e80941Smrg resource->immed_buffer, 1739b8e80941Smrg RADEON_USAGE_READWRITE, 1740b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 1741b8e80941Smrg 1742b8e80941Smrg if (pkt_flags) 1743b8e80941Smrg radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); 1744b8e80941Smrg else 1745b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); 1746b8e80941Smrg 1747b8e80941Smrg radeon_emit(cs, image->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 1748b8e80941Smrg radeon_emit(cs, image->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 1749b8e80941Smrg radeon_emit(cs, image->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 1750b8e80941Smrg radeon_emit(cs, image->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 1751b8e80941Smrg radeon_emit(cs, image->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 1752b8e80941Smrg radeon_emit(cs, image->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 1753b8e80941Smrg radeon_emit(cs, image->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ 1754b8e80941Smrg radeon_emit(cs, rtex ? rtex->cmask.base_address_reg : image->cb_color_base); /* R_028C7C_CB_COLOR0_CMASK */ 1755b8e80941Smrg radeon_emit(cs, rtex ? rtex->cmask.slice_tile_max : 0); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 1756b8e80941Smrg radeon_emit(cs, image->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 1757b8e80941Smrg radeon_emit(cs, image->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 1758b8e80941Smrg radeon_emit(cs, rtex ? rtex->color_clear_value[0] : 0); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 1759b8e80941Smrg radeon_emit(cs, rtex ? rtex->color_clear_value[1] : 0); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 1760b8e80941Smrg 1761b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ 1762b8e80941Smrg radeon_emit(cs, reloc); 1763b8e80941Smrg 1764b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ 1765b8e80941Smrg radeon_emit(cs, reloc); 1766b8e80941Smrg 1767b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */ 1768b8e80941Smrg radeon_emit(cs, reloc); 1769b8e80941Smrg 1770b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */ 1771b8e80941Smrg radeon_emit(cs, reloc); 1772b8e80941Smrg 1773b8e80941Smrg if (pkt_flags) 1774b8e80941Smrg radeon_compute_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); 1775b8e80941Smrg else 1776b8e80941Smrg radeon_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); 1777b8e80941Smrg 1778b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /**/ 1779b8e80941Smrg radeon_emit(cs, immed_reloc); 1780b8e80941Smrg 1781b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 1782b8e80941Smrg radeon_emit(cs, (immed_id_base + i + offset) * 8); 1783b8e80941Smrg radeon_emit_array(cs, image->immed_resource_words, 8); 1784b8e80941Smrg 1785b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 1786b8e80941Smrg radeon_emit(cs, immed_reloc); 1787b8e80941Smrg 1788b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 1789b8e80941Smrg radeon_emit(cs, (res_id_base + i + offset) * 8); 1790b8e80941Smrg radeon_emit_array(cs, image->resource_words, 8); 1791b8e80941Smrg 1792b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 1793b8e80941Smrg radeon_emit(cs, reloc); 1794b8e80941Smrg 1795b8e80941Smrg if (!image->skip_mip_address_reloc) { 1796b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 1797b8e80941Smrg radeon_emit(cs, reloc); 1798b8e80941Smrg } 1799848b8605Smrg } 1800848b8605Smrg} 1801848b8605Smrg 1802b8e80941Smrgstatic void evergreen_emit_fragment_image_state(struct r600_context *rctx, struct r600_atom *atom) 1803b8e80941Smrg{ 1804b8e80941Smrg evergreen_emit_image_state(rctx, atom, 1805b8e80941Smrg R600_IMAGE_IMMED_RESOURCE_OFFSET, 1806b8e80941Smrg R600_IMAGE_REAL_RESOURCE_OFFSET, 0, 0); 1807b8e80941Smrg} 1808b8e80941Smrg 1809b8e80941Smrgstatic void evergreen_emit_compute_image_state(struct r600_context *rctx, struct r600_atom *atom) 1810b8e80941Smrg{ 1811b8e80941Smrg evergreen_emit_image_state(rctx, atom, 1812b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, 1813b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, 1814b8e80941Smrg 0, RADEON_CP_PACKET3_COMPUTE_MODE); 1815b8e80941Smrg} 1816b8e80941Smrg 1817b8e80941Smrgstatic void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom) 1818b8e80941Smrg{ 1819b8e80941Smrg int offset = util_bitcount(rctx->fragment_images.enabled_mask); 1820b8e80941Smrg evergreen_emit_image_state(rctx, atom, 1821b8e80941Smrg R600_IMAGE_IMMED_RESOURCE_OFFSET, 1822b8e80941Smrg R600_IMAGE_REAL_RESOURCE_OFFSET, offset, 0); 1823b8e80941Smrg} 1824b8e80941Smrg 1825b8e80941Smrgstatic void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struct r600_atom *atom) 1826b8e80941Smrg{ 1827b8e80941Smrg int offset = util_bitcount(rctx->compute_images.enabled_mask); 1828b8e80941Smrg evergreen_emit_image_state(rctx, atom, 1829b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, 1830b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, 1831b8e80941Smrg offset, RADEON_CP_PACKET3_COMPUTE_MODE); 1832b8e80941Smrg} 1833b8e80941Smrg 1834848b8605Smrgstatic void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) 1835848b8605Smrg{ 1836b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 1837848b8605Smrg struct pipe_framebuffer_state *state = &rctx->framebuffer.state; 1838848b8605Smrg unsigned nr_cbufs = state->nr_cbufs; 1839848b8605Smrg unsigned i, tl, br; 1840848b8605Smrg struct r600_texture *tex = NULL; 1841848b8605Smrg struct r600_surface *cb = NULL; 1842848b8605Smrg 1843848b8605Smrg /* XXX support more colorbuffers once we need them */ 1844848b8605Smrg assert(nr_cbufs <= 8); 1845848b8605Smrg if (nr_cbufs > 8) 1846848b8605Smrg nr_cbufs = 8; 1847848b8605Smrg 1848848b8605Smrg /* Colorbuffers. */ 1849848b8605Smrg for (i = 0; i < nr_cbufs; i++) { 1850848b8605Smrg unsigned reloc, cmask_reloc; 1851848b8605Smrg 1852848b8605Smrg cb = (struct r600_surface*)state->cbufs[i]; 1853848b8605Smrg if (!cb) { 1854b8e80941Smrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1855848b8605Smrg S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1856848b8605Smrg continue; 1857848b8605Smrg } 1858848b8605Smrg 1859848b8605Smrg tex = (struct r600_texture *)cb->base.texture; 1860b8e80941Smrg reloc = radeon_add_to_buffer_list(&rctx->b, 1861b8e80941Smrg &rctx->b.gfx, 1862848b8605Smrg (struct r600_resource*)cb->base.texture, 1863848b8605Smrg RADEON_USAGE_READWRITE, 1864b8e80941Smrg tex->resource.b.b.nr_samples > 1 ? 1865848b8605Smrg RADEON_PRIO_COLOR_BUFFER_MSAA : 1866848b8605Smrg RADEON_PRIO_COLOR_BUFFER); 1867848b8605Smrg 1868848b8605Smrg if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 1869b8e80941Smrg cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 1870848b8605Smrg tex->cmask_buffer, RADEON_USAGE_READWRITE, 1871b8e80941Smrg RADEON_PRIO_SEPARATE_META); 1872848b8605Smrg } else { 1873848b8605Smrg cmask_reloc = reloc; 1874848b8605Smrg } 1875848b8605Smrg 1876b8e80941Smrg radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 1877848b8605Smrg radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 1878848b8605Smrg radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 1879848b8605Smrg radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 1880848b8605Smrg radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 1881848b8605Smrg radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 1882848b8605Smrg radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 1883848b8605Smrg radeon_emit(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ 1884848b8605Smrg radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 1885848b8605Smrg radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 1886848b8605Smrg radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 1887848b8605Smrg radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 1888848b8605Smrg radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 1889848b8605Smrg radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 1890848b8605Smrg 1891848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ 1892848b8605Smrg radeon_emit(cs, reloc); 1893848b8605Smrg 1894848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ 1895848b8605Smrg radeon_emit(cs, reloc); 1896848b8605Smrg 1897848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */ 1898848b8605Smrg radeon_emit(cs, cmask_reloc); 1899848b8605Smrg 1900848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */ 1901848b8605Smrg radeon_emit(cs, reloc); 1902848b8605Smrg } 1903848b8605Smrg /* set CB_COLOR1_INFO for possible dual-src blending */ 1904b8e80941Smrg if (rctx->framebuffer.dual_src_blend && i == 1 && state->cbufs[0]) { 1905b8e80941Smrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 1906848b8605Smrg cb->cb_color_info | tex->cb_color_info); 1907848b8605Smrg i++; 1908848b8605Smrg } 1909b8e80941Smrg i += util_bitcount(rctx->fragment_images.enabled_mask); 1910b8e80941Smrg i += util_bitcount(rctx->fragment_buffers.enabled_mask); 1911b8e80941Smrg for (; i < 8 ; i++) 1912b8e80941Smrg radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 1913b8e80941Smrg for (; i < 12; i++) 1914b8e80941Smrg radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0); 1915848b8605Smrg 1916848b8605Smrg /* ZS buffer. */ 1917848b8605Smrg if (state->zsbuf) { 1918848b8605Smrg struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 1919b8e80941Smrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, 1920b8e80941Smrg &rctx->b.gfx, 1921848b8605Smrg (struct r600_resource*)state->zsbuf->texture, 1922848b8605Smrg RADEON_USAGE_READWRITE, 1923848b8605Smrg zb->base.texture->nr_samples > 1 ? 1924848b8605Smrg RADEON_PRIO_DEPTH_BUFFER_MSAA : 1925848b8605Smrg RADEON_PRIO_DEPTH_BUFFER); 1926848b8605Smrg 1927b8e80941Smrg radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 1928848b8605Smrg 1929b8e80941Smrg radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8); 1930848b8605Smrg radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 1931848b8605Smrg radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 1932848b8605Smrg radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 1933848b8605Smrg radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 1934848b8605Smrg radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 1935848b8605Smrg radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 1936848b8605Smrg radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 1937848b8605Smrg radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 1938848b8605Smrg 1939848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028048_DB_Z_READ_BASE */ 1940848b8605Smrg radeon_emit(cs, reloc); 1941848b8605Smrg 1942848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_02804C_DB_STENCIL_READ_BASE */ 1943848b8605Smrg radeon_emit(cs, reloc); 1944848b8605Smrg 1945848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028050_DB_Z_WRITE_BASE */ 1946848b8605Smrg radeon_emit(cs, reloc); 1947848b8605Smrg 1948848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028054_DB_STENCIL_WRITE_BASE */ 1949848b8605Smrg radeon_emit(cs, reloc); 1950848b8605Smrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1951848b8605Smrg /* DRM 2.6.18 allows the INVALID format to disable depth/stencil. 1952848b8605Smrg * Older kernels are out of luck. */ 1953b8e80941Smrg radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 1954848b8605Smrg radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 1955848b8605Smrg radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 1956848b8605Smrg } 1957848b8605Smrg 1958848b8605Smrg /* Framebuffer dimensions. */ 1959848b8605Smrg evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br); 1960848b8605Smrg 1961b8e80941Smrg radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2); 1962848b8605Smrg radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */ 1963848b8605Smrg radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */ 1964848b8605Smrg 1965848b8605Smrg if (rctx->b.chip_class == EVERGREEN) { 1966b8e80941Smrg evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); 1967848b8605Smrg } else { 1968b8e80941Smrg cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples, 1969b8e80941Smrg rctx->ps_iter_samples, 0); 1970848b8605Smrg } 1971848b8605Smrg} 1972848b8605Smrg 1973848b8605Smrgstatic void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) 1974848b8605Smrg{ 1975b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 1976848b8605Smrg struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; 1977848b8605Smrg float offset_units = state->offset_units; 1978848b8605Smrg float offset_scale = state->offset_scale; 1979b8e80941Smrg uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 1980848b8605Smrg 1981b8e80941Smrg if (!state->offset_units_unscaled) { 1982b8e80941Smrg switch (state->zs_format) { 1983b8e80941Smrg case PIPE_FORMAT_Z24X8_UNORM: 1984b8e80941Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1985b8e80941Smrg case PIPE_FORMAT_X8Z24_UNORM: 1986b8e80941Smrg case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1987b8e80941Smrg offset_units *= 2.0f; 1988b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = 1989b8e80941Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24); 1990b8e80941Smrg break; 1991b8e80941Smrg case PIPE_FORMAT_Z16_UNORM: 1992b8e80941Smrg offset_units *= 4.0f; 1993b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = 1994b8e80941Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16); 1995b8e80941Smrg break; 1996b8e80941Smrg default: 1997b8e80941Smrg pa_su_poly_offset_db_fmt_cntl = 1998b8e80941Smrg S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) | 1999b8e80941Smrg S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 2000b8e80941Smrg } 2001848b8605Smrg } 2002848b8605Smrg 2003b8e80941Smrg radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4); 2004848b8605Smrg radeon_emit(cs, fui(offset_scale)); 2005848b8605Smrg radeon_emit(cs, fui(offset_units)); 2006848b8605Smrg radeon_emit(cs, fui(offset_scale)); 2007848b8605Smrg radeon_emit(cs, fui(offset_units)); 2008b8e80941Smrg 2009b8e80941Smrg radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2010b8e80941Smrg pa_su_poly_offset_db_fmt_cntl); 2011b8e80941Smrg} 2012b8e80941Smrg 2013b8e80941Smrguint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a, 2014b8e80941Smrg unsigned nr_cbufs) 2015b8e80941Smrg{ 2016b8e80941Smrg unsigned base_mask = 0; 2017b8e80941Smrg unsigned dirty_mask = a->image_rat_enabled_mask; 2018b8e80941Smrg while (dirty_mask) { 2019b8e80941Smrg unsigned idx = u_bit_scan(&dirty_mask); 2020b8e80941Smrg base_mask |= (0xf << (idx * 4)); 2021b8e80941Smrg } 2022b8e80941Smrg unsigned offset = util_last_bit(a->image_rat_enabled_mask); 2023b8e80941Smrg dirty_mask = a->buffer_rat_enabled_mask; 2024b8e80941Smrg while (dirty_mask) { 2025b8e80941Smrg unsigned idx = u_bit_scan(&dirty_mask); 2026b8e80941Smrg base_mask |= (0xf << (idx + offset) * 4); 2027b8e80941Smrg } 2028b8e80941Smrg return base_mask << (nr_cbufs * 4); 2029848b8605Smrg} 2030848b8605Smrg 2031848b8605Smrgstatic void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) 2032848b8605Smrg{ 2033b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2034848b8605Smrg struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; 2035b8e80941Smrg unsigned fb_colormask = a->bound_cbufs_target_mask; 2036b8e80941Smrg unsigned ps_colormask = a->ps_color_export_mask; 2037b8e80941Smrg unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, a->nr_cbufs); 2038b8e80941Smrg radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); 2039b8e80941Smrg radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */ 2040b8e80941Smrg /* This must match the used export instructions exactly. 2041b8e80941Smrg * Other values may lead to undefined behavior and hangs. 2042b8e80941Smrg */ 2043b8e80941Smrg radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */ 2044848b8605Smrg} 2045848b8605Smrg 2046848b8605Smrgstatic void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) 2047848b8605Smrg{ 2048b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2049848b8605Smrg struct r600_db_state *a = (struct r600_db_state*)atom; 2050848b8605Smrg 2051848b8605Smrg if (a->rsurf && a->rsurf->db_htile_surface) { 2052848b8605Smrg struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture; 2053848b8605Smrg unsigned reloc_idx; 2054848b8605Smrg 2055b8e80941Smrg radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 2056b8e80941Smrg radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); 2057b8e80941Smrg radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); 2058b8e80941Smrg radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); 2059b8e80941Smrg reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource, 2060b8e80941Smrg RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); 2061b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2062b8e80941Smrg radeon_emit(cs, reloc_idx); 2063848b8605Smrg } else { 2064b8e80941Smrg radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0); 2065b8e80941Smrg radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0); 2066848b8605Smrg } 2067848b8605Smrg} 2068848b8605Smrg 2069848b8605Smrgstatic void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) 2070848b8605Smrg{ 2071b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2072848b8605Smrg struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; 2073848b8605Smrg unsigned db_render_control = 0; 2074848b8605Smrg unsigned db_count_control = 0; 2075848b8605Smrg unsigned db_render_override = 2076848b8605Smrg S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 2077848b8605Smrg S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); 2078848b8605Smrg 2079b8e80941Smrg if (rctx->b.num_occlusion_queries > 0 && 2080b8e80941Smrg !a->occlusion_queries_disabled) { 2081848b8605Smrg db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1); 2082848b8605Smrg if (rctx->b.chip_class == CAYMAN) { 2083848b8605Smrg db_count_control |= S_028004_SAMPLE_RATE(a->log_samples); 2084848b8605Smrg } 2085848b8605Smrg db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); 2086848b8605Smrg } else { 2087b8e80941Smrg db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1); 2088848b8605Smrg } 2089b8e80941Smrg 2090b8e80941Smrg /* This is to fix a lockup when hyperz and alpha test are enabled at 2091b8e80941Smrg * the same time somehow GPU get confuse on which order to pick for 2092b8e80941Smrg * z test 2093b8e80941Smrg */ 2094b8e80941Smrg if (rctx->alphatest_state.sx_alpha_test_control) 2095b8e80941Smrg db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1); 2096b8e80941Smrg 2097848b8605Smrg if (a->flush_depthstencil_through_cb) { 2098848b8605Smrg assert(a->copy_depth || a->copy_stencil); 2099848b8605Smrg 2100848b8605Smrg db_render_control |= S_028000_DEPTH_COPY_ENABLE(a->copy_depth) | 2101848b8605Smrg S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) | 2102848b8605Smrg S_028000_COPY_CENTROID(1) | 2103848b8605Smrg S_028000_COPY_SAMPLE(a->copy_sample); 2104b8e80941Smrg } else if (a->flush_depth_inplace || a->flush_stencil_inplace) { 2105b8e80941Smrg db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) | 2106b8e80941Smrg S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace); 2107848b8605Smrg db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1); 2108848b8605Smrg } 2109848b8605Smrg if (a->htile_clear) { 2110848b8605Smrg /* FIXME we might want to disable cliprect here */ 2111848b8605Smrg db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1); 2112848b8605Smrg } 2113848b8605Smrg 2114b8e80941Smrg radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 2115848b8605Smrg radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */ 2116848b8605Smrg radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */ 2117b8e80941Smrg radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); 2118b8e80941Smrg radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control); 2119848b8605Smrg} 2120848b8605Smrg 2121848b8605Smrgstatic void evergreen_emit_vertex_buffers(struct r600_context *rctx, 2122848b8605Smrg struct r600_vertexbuf_state *state, 2123848b8605Smrg unsigned resource_offset, 2124848b8605Smrg unsigned pkt_flags) 2125848b8605Smrg{ 2126b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2127848b8605Smrg uint32_t dirty_mask = state->dirty_mask; 2128848b8605Smrg 2129848b8605Smrg while (dirty_mask) { 2130848b8605Smrg struct pipe_vertex_buffer *vb; 2131848b8605Smrg struct r600_resource *rbuffer; 2132848b8605Smrg uint64_t va; 2133848b8605Smrg unsigned buffer_index = u_bit_scan(&dirty_mask); 2134848b8605Smrg 2135848b8605Smrg vb = &state->vb[buffer_index]; 2136b8e80941Smrg rbuffer = (struct r600_resource*)vb->buffer.resource; 2137848b8605Smrg assert(rbuffer); 2138848b8605Smrg 2139848b8605Smrg va = rbuffer->gpu_address + vb->buffer_offset; 2140848b8605Smrg 2141848b8605Smrg /* fetch resources start at index 992 */ 2142848b8605Smrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2143848b8605Smrg radeon_emit(cs, (resource_offset + buffer_index) * 8); 2144848b8605Smrg radeon_emit(cs, va); /* RESOURCEi_WORD0 */ 2145b8e80941Smrg radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */ 2146848b8605Smrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 2147848b8605Smrg S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | 2148848b8605Smrg S_030008_STRIDE(vb->stride) | 2149848b8605Smrg S_030008_BASE_ADDRESS_HI(va >> 32UL)); 2150848b8605Smrg radeon_emit(cs, /* RESOURCEi_WORD3 */ 2151848b8605Smrg S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | 2152848b8605Smrg S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | 2153848b8605Smrg S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | 2154848b8605Smrg S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); 2155848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 2156848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 2157848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ 2158848b8605Smrg radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ 2159848b8605Smrg 2160848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2161b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2162b8e80941Smrg RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); 2163848b8605Smrg } 2164848b8605Smrg state->dirty_mask = 0; 2165848b8605Smrg} 2166848b8605Smrg 2167848b8605Smrgstatic void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) 2168848b8605Smrg{ 2169b8e80941Smrg evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0); 2170848b8605Smrg} 2171848b8605Smrg 2172848b8605Smrgstatic void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom) 2173848b8605Smrg{ 2174b8e80941Smrg evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS, 2175848b8605Smrg RADEON_CP_PACKET3_COMPUTE_MODE); 2176848b8605Smrg} 2177848b8605Smrg 2178848b8605Smrgstatic void evergreen_emit_constant_buffers(struct r600_context *rctx, 2179848b8605Smrg struct r600_constbuf_state *state, 2180848b8605Smrg unsigned buffer_id_base, 2181848b8605Smrg unsigned reg_alu_constbuf_size, 2182848b8605Smrg unsigned reg_alu_const_cache, 2183848b8605Smrg unsigned pkt_flags) 2184848b8605Smrg{ 2185b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2186848b8605Smrg uint32_t dirty_mask = state->dirty_mask; 2187848b8605Smrg 2188848b8605Smrg while (dirty_mask) { 2189848b8605Smrg struct pipe_constant_buffer *cb; 2190848b8605Smrg struct r600_resource *rbuffer; 2191848b8605Smrg uint64_t va; 2192848b8605Smrg unsigned buffer_index = ffs(dirty_mask) - 1; 2193848b8605Smrg unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER); 2194848b8605Smrg 2195848b8605Smrg cb = &state->cb[buffer_index]; 2196848b8605Smrg rbuffer = (struct r600_resource*)cb->buffer; 2197848b8605Smrg assert(rbuffer); 2198848b8605Smrg 2199848b8605Smrg va = rbuffer->gpu_address + cb->buffer_offset; 2200848b8605Smrg 2201b8e80941Smrg if (buffer_index < R600_MAX_HW_CONST_BUFFERS) { 2202b8e80941Smrg radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, 2203b8e80941Smrg DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); 2204b8e80941Smrg radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, 2205848b8605Smrg pkt_flags); 2206b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2207b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2208b8e80941Smrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 2209848b8605Smrg } 2210848b8605Smrg 2211848b8605Smrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2212848b8605Smrg radeon_emit(cs, (buffer_id_base + buffer_index) * 8); 2213848b8605Smrg radeon_emit(cs, va); /* RESOURCEi_WORD0 */ 2214b8e80941Smrg radeon_emit(cs, cb->buffer_size -1); /* RESOURCEi_WORD1 */ 2215848b8605Smrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 2216848b8605Smrg S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | 2217848b8605Smrg S_030008_STRIDE(gs_ring_buffer ? 4 : 16) | 2218848b8605Smrg S_030008_BASE_ADDRESS_HI(va >> 32UL) | 2219848b8605Smrg S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT)); 2220848b8605Smrg radeon_emit(cs, /* RESOURCEi_WORD3 */ 2221848b8605Smrg S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) | 2222848b8605Smrg S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | 2223848b8605Smrg S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | 2224848b8605Smrg S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | 2225848b8605Smrg S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); 2226848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 2227848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 2228848b8605Smrg radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ 2229848b8605Smrg radeon_emit(cs, /* RESOURCEi_WORD7 */ 2230848b8605Smrg S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); 2231848b8605Smrg 2232848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2233b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2234b8e80941Smrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 2235848b8605Smrg 2236848b8605Smrg dirty_mask &= ~(1 << buffer_index); 2237848b8605Smrg } 2238848b8605Smrg state->dirty_mask = 0; 2239848b8605Smrg} 2240848b8605Smrg 2241b8e80941Smrg/* VS constants can be in VS/ES (same space) or LS if tess is enabled */ 2242848b8605Smrgstatic void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2243848b8605Smrg{ 2244b8e80941Smrg if (rctx->vs_shader->current->shader.vs_as_ls) { 2245b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 2246b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_LS, 2247b8e80941Smrg R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 2248b8e80941Smrg R_028F40_ALU_CONST_CACHE_LS_0, 2249b8e80941Smrg 0 /* PKT3 flags */); 2250b8e80941Smrg } else { 2251b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 2252b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_VS, 2253b8e80941Smrg R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 2254b8e80941Smrg R_028980_ALU_CONST_CACHE_VS_0, 2255b8e80941Smrg 0 /* PKT3 flags */); 2256b8e80941Smrg } 2257848b8605Smrg} 2258848b8605Smrg 2259848b8605Smrgstatic void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2260848b8605Smrg{ 2261b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 2262b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_GS, 2263848b8605Smrg R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 2264848b8605Smrg R_0289C0_ALU_CONST_CACHE_GS_0, 2265848b8605Smrg 0 /* PKT3 flags */); 2266848b8605Smrg} 2267848b8605Smrg 2268848b8605Smrgstatic void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2269848b8605Smrg{ 2270b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 2271b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_PS, 2272b8e80941Smrg R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 2273b8e80941Smrg R_028940_ALU_CONST_CACHE_PS_0, 2274b8e80941Smrg 0 /* PKT3 flags */); 2275848b8605Smrg} 2276848b8605Smrg 2277848b8605Smrgstatic void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2278848b8605Smrg{ 2279b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 2280b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS, 2281848b8605Smrg R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 2282848b8605Smrg R_028F40_ALU_CONST_CACHE_LS_0, 2283848b8605Smrg RADEON_CP_PACKET3_COMPUTE_MODE); 2284848b8605Smrg} 2285848b8605Smrg 2286b8e80941Smrg/* tes constants can be emitted to VS or ES - which are common */ 2287b8e80941Smrgstatic void evergreen_emit_tes_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2288b8e80941Smrg{ 2289b8e80941Smrg if (!rctx->tes_shader) 2290b8e80941Smrg return; 2291b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL], 2292b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_VS, 2293b8e80941Smrg R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 2294b8e80941Smrg R_028980_ALU_CONST_CACHE_VS_0, 2295b8e80941Smrg 0); 2296b8e80941Smrg} 2297b8e80941Smrg 2298b8e80941Smrgstatic void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 2299b8e80941Smrg{ 2300b8e80941Smrg if (!rctx->tes_shader) 2301b8e80941Smrg return; 2302b8e80941Smrg evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL], 2303b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_HS, 2304b8e80941Smrg R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 2305b8e80941Smrg R_028F00_ALU_CONST_CACHE_HS_0, 2306b8e80941Smrg 0); 2307b8e80941Smrg} 2308b8e80941Smrg 2309b8e80941Smrgvoid evergreen_setup_scratch_buffers(struct r600_context *rctx) { 2310b8e80941Smrg static const struct { 2311b8e80941Smrg unsigned ring_base; 2312b8e80941Smrg unsigned item_size; 2313b8e80941Smrg unsigned ring_size; 2314b8e80941Smrg } regs[EG_NUM_HW_STAGES] = { 2315b8e80941Smrg [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE }, 2316b8e80941Smrg [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE }, 2317b8e80941Smrg [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE }, 2318b8e80941Smrg [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE }, 2319b8e80941Smrg [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE }, 2320b8e80941Smrg [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE } 2321b8e80941Smrg }; 2322b8e80941Smrg 2323b8e80941Smrg for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) { 2324b8e80941Smrg struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader; 2325b8e80941Smrg 2326b8e80941Smrg if (stage && unlikely(stage->scratch_space_needed)) { 2327b8e80941Smrg r600_setup_scratch_area_for_shader(rctx, stage, 2328b8e80941Smrg &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size); 2329b8e80941Smrg } 2330b8e80941Smrg } 2331b8e80941Smrg} 2332b8e80941Smrg 2333848b8605Smrgstatic void evergreen_emit_sampler_views(struct r600_context *rctx, 2334848b8605Smrg struct r600_samplerview_state *state, 2335b8e80941Smrg unsigned resource_id_base, unsigned pkt_flags) 2336848b8605Smrg{ 2337b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2338848b8605Smrg uint32_t dirty_mask = state->dirty_mask; 2339848b8605Smrg 2340848b8605Smrg while (dirty_mask) { 2341848b8605Smrg struct r600_pipe_sampler_view *rview; 2342848b8605Smrg unsigned resource_index = u_bit_scan(&dirty_mask); 2343848b8605Smrg unsigned reloc; 2344848b8605Smrg 2345848b8605Smrg rview = state->views[resource_index]; 2346848b8605Smrg assert(rview); 2347848b8605Smrg 2348b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); 2349848b8605Smrg radeon_emit(cs, (resource_id_base + resource_index) * 8); 2350848b8605Smrg radeon_emit_array(cs, rview->tex_resource_words, 8); 2351848b8605Smrg 2352b8e80941Smrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, 2353848b8605Smrg RADEON_USAGE_READ, 2354b8e80941Smrg r600_get_sampler_view_priority(rview->tex_resource)); 2355b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2356848b8605Smrg radeon_emit(cs, reloc); 2357848b8605Smrg 2358848b8605Smrg if (!rview->skip_mip_address_reloc) { 2359b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); 2360848b8605Smrg radeon_emit(cs, reloc); 2361848b8605Smrg } 2362848b8605Smrg } 2363848b8605Smrg state->dirty_mask = 0; 2364848b8605Smrg} 2365848b8605Smrg 2366848b8605Smrgstatic void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2367848b8605Smrg{ 2368b8e80941Smrg if (rctx->vs_shader->current->shader.vs_as_ls) { 2369b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 2370b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_LS + R600_MAX_CONST_BUFFERS, 0); 2371b8e80941Smrg } else { 2372b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 2373b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0); 2374b8e80941Smrg } 2375848b8605Smrg} 2376848b8605Smrg 2377848b8605Smrgstatic void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2378848b8605Smrg{ 2379b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 2380b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0); 2381b8e80941Smrg} 2382b8e80941Smrg 2383b8e80941Smrgstatic void evergreen_emit_tcs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2384b8e80941Smrg{ 2385b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views, 2386b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_HS + R600_MAX_CONST_BUFFERS, 0); 2387b8e80941Smrg} 2388b8e80941Smrg 2389b8e80941Smrgstatic void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2390b8e80941Smrg{ 2391b8e80941Smrg if (!rctx->tes_shader) 2392b8e80941Smrg return; 2393b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views, 2394b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0); 2395848b8605Smrg} 2396848b8605Smrg 2397848b8605Smrgstatic void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2398848b8605Smrg{ 2399b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, 2400b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0); 2401b8e80941Smrg} 2402b8e80941Smrg 2403b8e80941Smrgstatic void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 2404b8e80941Smrg{ 2405b8e80941Smrg evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views, 2406b8e80941Smrg EG_FETCH_CONSTANTS_OFFSET_CS + R600_MAX_CONST_BUFFERS, RADEON_CP_PACKET3_COMPUTE_MODE); 2407b8e80941Smrg} 2408b8e80941Smrg 2409b8e80941Smrgstatic void evergreen_convert_border_color(union pipe_color_union *in, 2410b8e80941Smrg union pipe_color_union *out, 2411b8e80941Smrg enum pipe_format format) 2412b8e80941Smrg{ 2413b8e80941Smrg if (util_format_is_pure_integer(format) && 2414b8e80941Smrg !util_format_is_depth_or_stencil(format)) { 2415b8e80941Smrg const struct util_format_description *d = util_format_description(format); 2416b8e80941Smrg 2417b8e80941Smrg for (int i = 0; i < d->nr_channels; ++i) { 2418b8e80941Smrg int cs = d->channel[i].size; 2419b8e80941Smrg if (d->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) 2420b8e80941Smrg out->f[i] = (double)(in->i[i]) / ((1ul << (cs - 1)) - 1 ); 2421b8e80941Smrg else if (d->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) 2422b8e80941Smrg out->f[i] = (double)(in->ui[i]) / ((1ul << cs) - 1 ); 2423b8e80941Smrg else 2424b8e80941Smrg out->f[i] = 0; 2425b8e80941Smrg } 2426b8e80941Smrg 2427b8e80941Smrg } else { 2428b8e80941Smrg switch (format) { 2429b8e80941Smrg case PIPE_FORMAT_X24S8_UINT: 2430b8e80941Smrg case PIPE_FORMAT_X32_S8X24_UINT: 2431b8e80941Smrg out->f[0] = (double)(in->ui[0]) / 255.0; 2432b8e80941Smrg out->f[1] = out->f[2] = out->f[3] = 0.0f; 2433b8e80941Smrg break; 2434b8e80941Smrg default: 2435b8e80941Smrg memcpy(out->f, in->f, 4 * sizeof(float)); 2436b8e80941Smrg } 2437b8e80941Smrg } 2438848b8605Smrg} 2439848b8605Smrg 2440848b8605Smrgstatic void evergreen_emit_sampler_states(struct r600_context *rctx, 2441848b8605Smrg struct r600_textures_info *texinfo, 2442848b8605Smrg unsigned resource_id_base, 2443b8e80941Smrg unsigned border_index_reg, 2444b8e80941Smrg unsigned pkt_flags) 2445848b8605Smrg{ 2446b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2447848b8605Smrg uint32_t dirty_mask = texinfo->states.dirty_mask; 2448b8e80941Smrg union pipe_color_union border_color = {{0,0,0,1}}; 2449b8e80941Smrg union pipe_color_union *border_color_ptr = &border_color; 2450848b8605Smrg 2451848b8605Smrg while (dirty_mask) { 2452848b8605Smrg struct r600_pipe_sampler_state *rstate; 2453848b8605Smrg unsigned i = u_bit_scan(&dirty_mask); 2454848b8605Smrg 2455848b8605Smrg rstate = texinfo->states.states[i]; 2456848b8605Smrg assert(rstate); 2457848b8605Smrg 2458b8e80941Smrg if (rstate->border_color_use) { 2459b8e80941Smrg struct r600_pipe_sampler_view *rview = texinfo->views.views[i]; 2460b8e80941Smrg if (rview) { 2461b8e80941Smrg evergreen_convert_border_color(&rstate->border_color, 2462b8e80941Smrg &border_color, rview->base.format); 2463b8e80941Smrg } else { 2464b8e80941Smrg border_color_ptr = &rstate->border_color; 2465b8e80941Smrg } 2466b8e80941Smrg } 2467b8e80941Smrg 2468b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags); 2469848b8605Smrg radeon_emit(cs, (resource_id_base + i) * 3); 2470848b8605Smrg radeon_emit_array(cs, rstate->tex_sampler_words, 3); 2471848b8605Smrg 2472848b8605Smrg if (rstate->border_color_use) { 2473b8e80941Smrg radeon_set_config_reg_seq(cs, border_index_reg, 5); 2474848b8605Smrg radeon_emit(cs, i); 2475b8e80941Smrg radeon_emit_array(cs, border_color_ptr->ui, 4); 2476848b8605Smrg } 2477848b8605Smrg } 2478848b8605Smrg texinfo->states.dirty_mask = 0; 2479848b8605Smrg} 2480848b8605Smrg 2481848b8605Smrgstatic void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2482848b8605Smrg{ 2483b8e80941Smrg if (rctx->vs_shader->current->shader.vs_as_ls) { 2484b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 72, 2485b8e80941Smrg R_00A450_TD_LS_SAMPLER0_BORDER_COLOR_INDEX, 0); 2486b8e80941Smrg } else { 2487b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, 2488b8e80941Smrg R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); 2489b8e80941Smrg } 2490848b8605Smrg} 2491848b8605Smrg 2492848b8605Smrgstatic void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2493848b8605Smrg{ 2494b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, 2495b8e80941Smrg R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0); 2496b8e80941Smrg} 2497b8e80941Smrg 2498b8e80941Smrgstatic void evergreen_emit_tcs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2499b8e80941Smrg{ 2500b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL], 54, 2501b8e80941Smrg R_00A43C_TD_HS_SAMPLER0_BORDER_COLOR_INDEX, 0); 2502b8e80941Smrg} 2503b8e80941Smrg 2504b8e80941Smrgstatic void evergreen_emit_tes_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2505b8e80941Smrg{ 2506b8e80941Smrg if (!rctx->tes_shader) 2507b8e80941Smrg return; 2508b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL], 18, 2509b8e80941Smrg R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); 2510848b8605Smrg} 2511848b8605Smrg 2512848b8605Smrgstatic void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2513848b8605Smrg{ 2514b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, 2515b8e80941Smrg R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0); 2516b8e80941Smrg} 2517b8e80941Smrg 2518b8e80941Smrgstatic void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 2519b8e80941Smrg{ 2520b8e80941Smrg evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90, 2521b8e80941Smrg R_00A464_TD_CS_SAMPLER0_BORDER_INDEX, 2522b8e80941Smrg RADEON_CP_PACKET3_COMPUTE_MODE); 2523848b8605Smrg} 2524848b8605Smrg 2525848b8605Smrgstatic void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) 2526848b8605Smrg{ 2527848b8605Smrg struct r600_sample_mask *s = (struct r600_sample_mask*)a; 2528848b8605Smrg uint8_t mask = s->sample_mask; 2529848b8605Smrg 2530b8e80941Smrg radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, 2531848b8605Smrg mask | (mask << 8) | (mask << 16) | (mask << 24)); 2532848b8605Smrg} 2533848b8605Smrg 2534848b8605Smrgstatic void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) 2535848b8605Smrg{ 2536848b8605Smrg struct r600_sample_mask *s = (struct r600_sample_mask*)a; 2537b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2538848b8605Smrg uint16_t mask = s->sample_mask; 2539848b8605Smrg 2540b8e80941Smrg radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 2541848b8605Smrg radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */ 2542848b8605Smrg radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */ 2543848b8605Smrg} 2544848b8605Smrg 2545848b8605Smrgstatic void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) 2546848b8605Smrg{ 2547b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2548848b8605Smrg struct r600_cso_state *state = (struct r600_cso_state*)a; 2549848b8605Smrg struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; 2550848b8605Smrg 2551b8e80941Smrg if (!shader) 2552b8e80941Smrg return; 2553b8e80941Smrg 2554b8e80941Smrg radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS, 2555848b8605Smrg (shader->buffer->gpu_address + shader->offset) >> 8); 2556848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2557b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, 2558b8e80941Smrg RADEON_USAGE_READ, 2559b8e80941Smrg RADEON_PRIO_SHADER_BINARY)); 2560848b8605Smrg} 2561848b8605Smrg 2562848b8605Smrgstatic void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) 2563848b8605Smrg{ 2564b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2565848b8605Smrg struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; 2566848b8605Smrg 2567b8e80941Smrg uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0; 2568b8e80941Smrg 2569b8e80941Smrg if (rctx->vs_shader->current->shader.vs_as_gs_a) { 2570b8e80941Smrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A); 2571b8e80941Smrg primid = 1; 2572b8e80941Smrg } 2573848b8605Smrg 2574848b8605Smrg if (state->geom_enable) { 2575848b8605Smrg uint32_t cut_val; 2576848b8605Smrg 2577b8e80941Smrg if (rctx->gs_shader->gs_max_out_vertices <= 128) 2578848b8605Smrg cut_val = V_028A40_GS_CUT_128; 2579b8e80941Smrg else if (rctx->gs_shader->gs_max_out_vertices <= 256) 2580848b8605Smrg cut_val = V_028A40_GS_CUT_256; 2581b8e80941Smrg else if (rctx->gs_shader->gs_max_out_vertices <= 512) 2582848b8605Smrg cut_val = V_028A40_GS_CUT_512; 2583848b8605Smrg else 2584848b8605Smrg cut_val = V_028A40_GS_CUT_1024; 2585b8e80941Smrg 2586b8e80941Smrg v = S_028B54_GS_EN(1) | 2587b8e80941Smrg S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); 2588b8e80941Smrg if (!rctx->tes_shader) 2589b8e80941Smrg v |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); 2590848b8605Smrg 2591848b8605Smrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) | 2592848b8605Smrg S_028A40_CUT_MODE(cut_val); 2593848b8605Smrg 2594848b8605Smrg if (rctx->gs_shader->current->shader.gs_prim_id_input) 2595848b8605Smrg primid = 1; 2596848b8605Smrg } 2597848b8605Smrg 2598b8e80941Smrg if (rctx->tes_shader) { 2599b8e80941Smrg uint32_t type, partitioning, topology; 2600b8e80941Smrg struct tgsi_shader_info *info = &rctx->tes_shader->current->selector->info; 2601b8e80941Smrg unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; 2602b8e80941Smrg unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; 2603b8e80941Smrg bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; 2604b8e80941Smrg bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; 2605b8e80941Smrg switch (tes_prim_mode) { 2606b8e80941Smrg case PIPE_PRIM_LINES: 2607b8e80941Smrg type = V_028B6C_TESS_ISOLINE; 2608b8e80941Smrg break; 2609b8e80941Smrg case PIPE_PRIM_TRIANGLES: 2610b8e80941Smrg type = V_028B6C_TESS_TRIANGLE; 2611b8e80941Smrg break; 2612b8e80941Smrg case PIPE_PRIM_QUADS: 2613b8e80941Smrg type = V_028B6C_TESS_QUAD; 2614b8e80941Smrg break; 2615b8e80941Smrg default: 2616b8e80941Smrg assert(0); 2617b8e80941Smrg return; 2618b8e80941Smrg } 2619b8e80941Smrg 2620b8e80941Smrg switch (tes_spacing) { 2621b8e80941Smrg case PIPE_TESS_SPACING_FRACTIONAL_ODD: 2622b8e80941Smrg partitioning = V_028B6C_PART_FRAC_ODD; 2623b8e80941Smrg break; 2624b8e80941Smrg case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 2625b8e80941Smrg partitioning = V_028B6C_PART_FRAC_EVEN; 2626b8e80941Smrg break; 2627b8e80941Smrg case PIPE_TESS_SPACING_EQUAL: 2628b8e80941Smrg partitioning = V_028B6C_PART_INTEGER; 2629b8e80941Smrg break; 2630b8e80941Smrg default: 2631b8e80941Smrg assert(0); 2632b8e80941Smrg return; 2633b8e80941Smrg } 2634b8e80941Smrg 2635b8e80941Smrg if (tes_point_mode) 2636b8e80941Smrg topology = V_028B6C_OUTPUT_POINT; 2637b8e80941Smrg else if (tes_prim_mode == PIPE_PRIM_LINES) 2638b8e80941Smrg topology = V_028B6C_OUTPUT_LINE; 2639b8e80941Smrg else if (tes_vertex_order_cw) 2640b8e80941Smrg /* XXX follow radeonsi and invert */ 2641b8e80941Smrg topology = V_028B6C_OUTPUT_TRIANGLE_CCW; 2642b8e80941Smrg else 2643b8e80941Smrg topology = V_028B6C_OUTPUT_TRIANGLE_CW; 2644b8e80941Smrg 2645b8e80941Smrg tf_param = S_028B6C_TYPE(type) | 2646b8e80941Smrg S_028B6C_PARTITIONING(partitioning) | 2647b8e80941Smrg S_028B6C_TOPOLOGY(topology); 2648b8e80941Smrg } 2649b8e80941Smrg 2650b8e80941Smrg if (rctx->tes_shader) { 2651b8e80941Smrg v |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | 2652b8e80941Smrg S_028B54_HS_EN(1); 2653b8e80941Smrg if (!state->geom_enable) 2654b8e80941Smrg v |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); 2655b8e80941Smrg else 2656b8e80941Smrg v |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); 2657b8e80941Smrg } 2658b8e80941Smrg 2659b8e80941Smrg radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, v ? 1 : 0 ); 2660b8e80941Smrg radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v); 2661b8e80941Smrg radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2); 2662b8e80941Smrg radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid); 2663b8e80941Smrg radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, tf_param); 2664848b8605Smrg} 2665848b8605Smrg 2666848b8605Smrgstatic void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) 2667848b8605Smrg{ 2668b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 2669848b8605Smrg struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; 2670848b8605Smrg struct r600_resource *rbuffer; 2671848b8605Smrg 2672b8e80941Smrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 2673848b8605Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2674848b8605Smrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 2675848b8605Smrg 2676848b8605Smrg if (state->enable) { 2677848b8605Smrg rbuffer =(struct r600_resource*)state->esgs_ring.buffer; 2678b8e80941Smrg radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 2679848b8605Smrg rbuffer->gpu_address >> 8); 2680848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2681b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2682848b8605Smrg RADEON_USAGE_READWRITE, 2683b8e80941Smrg RADEON_PRIO_SHADER_RINGS)); 2684b8e80941Smrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 2685848b8605Smrg state->esgs_ring.buffer_size >> 8); 2686848b8605Smrg 2687848b8605Smrg rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; 2688b8e80941Smrg radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 2689848b8605Smrg rbuffer->gpu_address >> 8); 2690848b8605Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 2691b8e80941Smrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 2692848b8605Smrg RADEON_USAGE_READWRITE, 2693b8e80941Smrg RADEON_PRIO_SHADER_RINGS)); 2694b8e80941Smrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 2695848b8605Smrg state->gsvs_ring.buffer_size >> 8); 2696848b8605Smrg } else { 2697b8e80941Smrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0); 2698b8e80941Smrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0); 2699848b8605Smrg } 2700848b8605Smrg 2701b8e80941Smrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 2702848b8605Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2703848b8605Smrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 2704848b8605Smrg} 2705848b8605Smrg 2706848b8605Smrgvoid cayman_init_common_regs(struct r600_command_buffer *cb, 2707848b8605Smrg enum chip_class ctx_chip_class, 2708848b8605Smrg enum radeon_family ctx_family, 2709848b8605Smrg int ctx_drm_minor) 2710848b8605Smrg{ 2711848b8605Smrg r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); 2712848b8605Smrg r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */ 2713848b8605Smrg /* always set the temp clauses */ 2714848b8605Smrg r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ 2715848b8605Smrg 2716848b8605Smrg r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); 2717848b8605Smrg r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ 2718848b8605Smrg r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ 2719848b8605Smrg 2720848b8605Smrg r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8)); 2721848b8605Smrg 2722848b8605Smrg r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2); 2723848b8605Smrg r600_store_value(cb, 0); 2724848b8605Smrg r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf)); 2725848b8605Smrg 2726848b8605Smrg r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); 2727848b8605Smrg} 2728848b8605Smrg 2729848b8605Smrgstatic void cayman_init_atom_start_cs(struct r600_context *rctx) 2730848b8605Smrg{ 2731848b8605Smrg struct r600_command_buffer *cb = &rctx->start_cs_cmd; 2732b8e80941Smrg int i; 2733848b8605Smrg 2734b8e80941Smrg r600_init_command_buffer(cb, 338); 2735848b8605Smrg 2736848b8605Smrg /* This must be first. */ 2737848b8605Smrg r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 2738848b8605Smrg r600_store_value(cb, 0x80000000); 2739848b8605Smrg r600_store_value(cb, 0x80000000); 2740848b8605Smrg 2741848b8605Smrg /* We're setting config registers here. */ 2742848b8605Smrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2743848b8605Smrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2744848b8605Smrg 2745b8e80941Smrg /* This enables pipeline stat & streamout queries. 2746b8e80941Smrg * They are only disabled by blits. 2747b8e80941Smrg */ 2748b8e80941Smrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2749b8e80941Smrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0)); 2750b8e80941Smrg 2751848b8605Smrg cayman_init_common_regs(cb, rctx->b.chip_class, 2752848b8605Smrg rctx->b.family, rctx->screen->b.info.drm_minor); 2753848b8605Smrg 2754848b8605Smrg r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0); 2755848b8605Smrg r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4)); 2756848b8605Smrg 2757b8e80941Smrg /* remove LS/HS from one SIMD for hw workaround */ 2758b8e80941Smrg r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3); 2759b8e80941Smrg r600_store_value(cb, 0xffffffff); 2760b8e80941Smrg r600_store_value(cb, 0xffffffff); 2761b8e80941Smrg r600_store_value(cb, 0xfffffffe); 2762b8e80941Smrg 2763848b8605Smrg r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6); 2764848b8605Smrg r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */ 2765848b8605Smrg r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */ 2766848b8605Smrg r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */ 2767848b8605Smrg r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */ 2768848b8605Smrg r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */ 2769848b8605Smrg r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */ 2770848b8605Smrg 2771848b8605Smrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 2772848b8605Smrg r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */ 2773848b8605Smrg r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */ 2774848b8605Smrg r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */ 2775848b8605Smrg r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */ 2776848b8605Smrg 2777848b8605Smrg r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); 2778848b8605Smrg r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ 2779848b8605Smrg r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ 2780b8e80941Smrg r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */ 2781b8e80941Smrg r600_store_value(cb, fui(0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */ 2782b8e80941Smrg r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */ 2783848b8605Smrg r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */ 2784848b8605Smrg r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */ 2785848b8605Smrg r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */ 2786848b8605Smrg r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */ 2787848b8605Smrg r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */ 2788848b8605Smrg r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */ 2789848b8605Smrg r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ 2790848b8605Smrg r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ 2791848b8605Smrg 2792848b8605Smrg r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0); 2793848b8605Smrg 2794848b8605Smrg r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1); 2795848b8605Smrg 2796848b8605Smrg r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 2797848b8605Smrg r600_store_value(cb, 0x76543210); /* CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */ 2798848b8605Smrg r600_store_value(cb, 0xfedcba98); /* CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */ 2799848b8605Smrg 2800b8e80941Smrg r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff); 2801b8e80941Smrg r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2); 2802b8e80941Smrg r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */ 2803848b8605Smrg r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */ 2804848b8605Smrg 2805848b8605Smrg r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0); 2806848b8605Smrg 2807848b8605Smrg r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2); 2808848b8605Smrg r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */ 2809848b8605Smrg r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */ 2810848b8605Smrg 2811848b8605Smrg r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 2812848b8605Smrg 2813848b8605Smrg r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0); 2814848b8605Smrg 2815848b8605Smrg r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); 2816848b8605Smrg 2817848b8605Smrg r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); 2818848b8605Smrg r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ 2819848b8605Smrg r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ 2820848b8605Smrg r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ 2821848b8605Smrg 2822848b8605Smrg r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); 2823848b8605Smrg r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 2824848b8605Smrg 2825848b8605Smrg r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 2826848b8605Smrg r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); 2827848b8605Smrg 2828848b8605Smrg r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2); 2829848b8605Smrg r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */ 2830848b8605Smrg r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */ 2831848b8605Smrg 2832848b8605Smrg r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2); 2833848b8605Smrg r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */ 2834848b8605Smrg r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */ 2835848b8605Smrg 2836848b8605Smrg r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2837848b8605Smrg r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2838b8e80941Smrg r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2839b8e80941Smrg r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2840b8e80941Smrg r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2841b8e80941Smrg r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 2842b8e80941Smrg 2843848b8605Smrg r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); 2844848b8605Smrg 2845848b8605Smrg /* to avoid GPU doing any preloading of constant from random address */ 2846848b8605Smrg r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16); 2847848b8605Smrg for (i = 0; i < 16; i++) 2848848b8605Smrg r600_store_value(cb, 0); 2849848b8605Smrg 2850848b8605Smrg r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16); 2851848b8605Smrg for (i = 0; i < 16; i++) 2852848b8605Smrg r600_store_value(cb, 0); 2853848b8605Smrg 2854848b8605Smrg r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16); 2855848b8605Smrg for (i = 0; i < 16; i++) 2856848b8605Smrg r600_store_value(cb, 0); 2857848b8605Smrg 2858848b8605Smrg r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16); 2859848b8605Smrg for (i = 0; i < 16; i++) 2860848b8605Smrg r600_store_value(cb, 0); 2861848b8605Smrg 2862848b8605Smrg r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16); 2863848b8605Smrg for (i = 0; i < 16; i++) 2864848b8605Smrg r600_store_value(cb, 0); 2865848b8605Smrg 2866848b8605Smrg if (rctx->screen->b.has_streamout) { 2867848b8605Smrg r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 2868848b8605Smrg } 2869848b8605Smrg 2870848b8605Smrg r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); 2871848b8605Smrg r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 2872848b8605Smrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); 2873848b8605Smrg r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); 2874848b8605Smrg r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ 2875848b8605Smrg r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ 2876848b8605Smrg 2877b8e80941Smrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2); 2878b8e80941Smrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 2879b8e80941Smrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 2880b8e80941Smrg r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0); 2881848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF); 2882848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF); 2883848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF); 2884b8e80941Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF); 2885b8e80941Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF); 2886848b8605Smrg} 2887848b8605Smrg 2888b8e80941Smrgvoid evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb, 2889b8e80941Smrg enum chip_class ctx_chip_class, 2890b8e80941Smrg enum radeon_family ctx_family, 2891b8e80941Smrg int ctx_drm_minor) 2892848b8605Smrg{ 2893848b8605Smrg int ps_prio; 2894848b8605Smrg int vs_prio; 2895848b8605Smrg int gs_prio; 2896848b8605Smrg int es_prio; 2897848b8605Smrg 2898848b8605Smrg int hs_prio; 2899848b8605Smrg int cs_prio; 2900848b8605Smrg int ls_prio; 2901848b8605Smrg 2902848b8605Smrg unsigned tmp; 2903848b8605Smrg 2904848b8605Smrg ps_prio = 0; 2905848b8605Smrg vs_prio = 1; 2906848b8605Smrg gs_prio = 2; 2907848b8605Smrg es_prio = 3; 2908b8e80941Smrg hs_prio = 3; 2909b8e80941Smrg ls_prio = 3; 2910848b8605Smrg cs_prio = 0; 2911848b8605Smrg 2912b8e80941Smrg rctx->default_gprs[R600_HW_STAGE_PS] = 93; 2913b8e80941Smrg rctx->default_gprs[R600_HW_STAGE_VS] = 46; 2914b8e80941Smrg rctx->r6xx_num_clause_temp_gprs = 4; 2915b8e80941Smrg rctx->default_gprs[R600_HW_STAGE_GS] = 31; 2916b8e80941Smrg rctx->default_gprs[R600_HW_STAGE_ES] = 31; 2917b8e80941Smrg rctx->default_gprs[EG_HW_STAGE_HS] = 23; 2918b8e80941Smrg rctx->default_gprs[EG_HW_STAGE_LS] = 23; 2919848b8605Smrg 2920848b8605Smrg tmp = 0; 2921848b8605Smrg switch (ctx_family) { 2922848b8605Smrg case CHIP_CEDAR: 2923848b8605Smrg case CHIP_PALM: 2924848b8605Smrg case CHIP_SUMO: 2925848b8605Smrg case CHIP_SUMO2: 2926848b8605Smrg case CHIP_CAICOS: 2927848b8605Smrg break; 2928848b8605Smrg default: 2929848b8605Smrg tmp |= S_008C00_VC_ENABLE(1); 2930848b8605Smrg break; 2931848b8605Smrg } 2932848b8605Smrg tmp |= S_008C00_EXPORT_SRC_C(1); 2933848b8605Smrg tmp |= S_008C00_CS_PRIO(cs_prio); 2934848b8605Smrg tmp |= S_008C00_LS_PRIO(ls_prio); 2935848b8605Smrg tmp |= S_008C00_HS_PRIO(hs_prio); 2936848b8605Smrg tmp |= S_008C00_PS_PRIO(ps_prio); 2937848b8605Smrg tmp |= S_008C00_VS_PRIO(vs_prio); 2938848b8605Smrg tmp |= S_008C00_GS_PRIO(gs_prio); 2939848b8605Smrg tmp |= S_008C00_ES_PRIO(es_prio); 2940848b8605Smrg 2941b8e80941Smrg r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1); 2942b8e80941Smrg r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */ 2943848b8605Smrg 2944b8e80941Smrg r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); 2945b8e80941Smrg r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ 2946b8e80941Smrg r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ 2947848b8605Smrg 2948848b8605Smrg /* The cs checker requires this register to be set. */ 2949848b8605Smrg r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); 2950848b8605Smrg 2951848b8605Smrg r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2); 2952848b8605Smrg r600_store_value(cb, 0); 2953848b8605Smrg r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf)); 2954848b8605Smrg 2955848b8605Smrg return; 2956848b8605Smrg} 2957848b8605Smrg 2958848b8605Smrgvoid evergreen_init_atom_start_cs(struct r600_context *rctx) 2959848b8605Smrg{ 2960848b8605Smrg struct r600_command_buffer *cb = &rctx->start_cs_cmd; 2961848b8605Smrg int num_ps_threads; 2962848b8605Smrg int num_vs_threads; 2963848b8605Smrg int num_gs_threads; 2964848b8605Smrg int num_es_threads; 2965848b8605Smrg int num_hs_threads; 2966848b8605Smrg int num_ls_threads; 2967848b8605Smrg 2968848b8605Smrg int num_ps_stack_entries; 2969848b8605Smrg int num_vs_stack_entries; 2970848b8605Smrg int num_gs_stack_entries; 2971848b8605Smrg int num_es_stack_entries; 2972848b8605Smrg int num_hs_stack_entries; 2973848b8605Smrg int num_ls_stack_entries; 2974848b8605Smrg enum radeon_family family; 2975848b8605Smrg unsigned tmp, i; 2976848b8605Smrg 2977848b8605Smrg if (rctx->b.chip_class == CAYMAN) { 2978848b8605Smrg cayman_init_atom_start_cs(rctx); 2979848b8605Smrg return; 2980848b8605Smrg } 2981848b8605Smrg 2982b8e80941Smrg r600_init_command_buffer(cb, 338); 2983848b8605Smrg 2984848b8605Smrg /* This must be first. */ 2985848b8605Smrg r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 2986848b8605Smrg r600_store_value(cb, 0x80000000); 2987848b8605Smrg r600_store_value(cb, 0x80000000); 2988848b8605Smrg 2989848b8605Smrg /* We're setting config registers here. */ 2990848b8605Smrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2991848b8605Smrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2992848b8605Smrg 2993b8e80941Smrg /* This enables pipeline stat & streamout queries. 2994b8e80941Smrg * They are only disabled by blits. 2995b8e80941Smrg */ 2996b8e80941Smrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2997b8e80941Smrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0)); 2998b8e80941Smrg 2999b8e80941Smrg evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, 3000848b8605Smrg rctx->b.family, rctx->screen->b.info.drm_minor); 3001848b8605Smrg 3002848b8605Smrg family = rctx->b.family; 3003848b8605Smrg switch (family) { 3004848b8605Smrg case CHIP_CEDAR: 3005848b8605Smrg default: 3006848b8605Smrg num_ps_threads = 96; 3007848b8605Smrg num_vs_threads = 16; 3008848b8605Smrg num_gs_threads = 16; 3009848b8605Smrg num_es_threads = 16; 3010848b8605Smrg num_hs_threads = 16; 3011848b8605Smrg num_ls_threads = 16; 3012848b8605Smrg num_ps_stack_entries = 42; 3013848b8605Smrg num_vs_stack_entries = 42; 3014848b8605Smrg num_gs_stack_entries = 42; 3015848b8605Smrg num_es_stack_entries = 42; 3016848b8605Smrg num_hs_stack_entries = 42; 3017848b8605Smrg num_ls_stack_entries = 42; 3018848b8605Smrg break; 3019848b8605Smrg case CHIP_REDWOOD: 3020848b8605Smrg num_ps_threads = 128; 3021848b8605Smrg num_vs_threads = 20; 3022848b8605Smrg num_gs_threads = 20; 3023848b8605Smrg num_es_threads = 20; 3024848b8605Smrg num_hs_threads = 20; 3025848b8605Smrg num_ls_threads = 20; 3026848b8605Smrg num_ps_stack_entries = 42; 3027848b8605Smrg num_vs_stack_entries = 42; 3028848b8605Smrg num_gs_stack_entries = 42; 3029848b8605Smrg num_es_stack_entries = 42; 3030848b8605Smrg num_hs_stack_entries = 42; 3031848b8605Smrg num_ls_stack_entries = 42; 3032848b8605Smrg break; 3033848b8605Smrg case CHIP_JUNIPER: 3034848b8605Smrg num_ps_threads = 128; 3035848b8605Smrg num_vs_threads = 20; 3036848b8605Smrg num_gs_threads = 20; 3037848b8605Smrg num_es_threads = 20; 3038848b8605Smrg num_hs_threads = 20; 3039848b8605Smrg num_ls_threads = 20; 3040848b8605Smrg num_ps_stack_entries = 85; 3041848b8605Smrg num_vs_stack_entries = 85; 3042848b8605Smrg num_gs_stack_entries = 85; 3043848b8605Smrg num_es_stack_entries = 85; 3044848b8605Smrg num_hs_stack_entries = 85; 3045848b8605Smrg num_ls_stack_entries = 85; 3046848b8605Smrg break; 3047848b8605Smrg case CHIP_CYPRESS: 3048848b8605Smrg case CHIP_HEMLOCK: 3049848b8605Smrg num_ps_threads = 128; 3050848b8605Smrg num_vs_threads = 20; 3051848b8605Smrg num_gs_threads = 20; 3052848b8605Smrg num_es_threads = 20; 3053848b8605Smrg num_hs_threads = 20; 3054848b8605Smrg num_ls_threads = 20; 3055848b8605Smrg num_ps_stack_entries = 85; 3056848b8605Smrg num_vs_stack_entries = 85; 3057848b8605Smrg num_gs_stack_entries = 85; 3058848b8605Smrg num_es_stack_entries = 85; 3059848b8605Smrg num_hs_stack_entries = 85; 3060848b8605Smrg num_ls_stack_entries = 85; 3061848b8605Smrg break; 3062848b8605Smrg case CHIP_PALM: 3063848b8605Smrg num_ps_threads = 96; 3064848b8605Smrg num_vs_threads = 16; 3065848b8605Smrg num_gs_threads = 16; 3066848b8605Smrg num_es_threads = 16; 3067848b8605Smrg num_hs_threads = 16; 3068848b8605Smrg num_ls_threads = 16; 3069848b8605Smrg num_ps_stack_entries = 42; 3070848b8605Smrg num_vs_stack_entries = 42; 3071848b8605Smrg num_gs_stack_entries = 42; 3072848b8605Smrg num_es_stack_entries = 42; 3073848b8605Smrg num_hs_stack_entries = 42; 3074848b8605Smrg num_ls_stack_entries = 42; 3075848b8605Smrg break; 3076848b8605Smrg case CHIP_SUMO: 3077848b8605Smrg num_ps_threads = 96; 3078848b8605Smrg num_vs_threads = 25; 3079848b8605Smrg num_gs_threads = 25; 3080848b8605Smrg num_es_threads = 25; 3081b8e80941Smrg num_hs_threads = 16; 3082b8e80941Smrg num_ls_threads = 16; 3083848b8605Smrg num_ps_stack_entries = 42; 3084848b8605Smrg num_vs_stack_entries = 42; 3085848b8605Smrg num_gs_stack_entries = 42; 3086848b8605Smrg num_es_stack_entries = 42; 3087848b8605Smrg num_hs_stack_entries = 42; 3088848b8605Smrg num_ls_stack_entries = 42; 3089848b8605Smrg break; 3090848b8605Smrg case CHIP_SUMO2: 3091848b8605Smrg num_ps_threads = 96; 3092848b8605Smrg num_vs_threads = 25; 3093848b8605Smrg num_gs_threads = 25; 3094848b8605Smrg num_es_threads = 25; 3095b8e80941Smrg num_hs_threads = 16; 3096b8e80941Smrg num_ls_threads = 16; 3097848b8605Smrg num_ps_stack_entries = 85; 3098848b8605Smrg num_vs_stack_entries = 85; 3099848b8605Smrg num_gs_stack_entries = 85; 3100848b8605Smrg num_es_stack_entries = 85; 3101848b8605Smrg num_hs_stack_entries = 85; 3102848b8605Smrg num_ls_stack_entries = 85; 3103848b8605Smrg break; 3104848b8605Smrg case CHIP_BARTS: 3105848b8605Smrg num_ps_threads = 128; 3106848b8605Smrg num_vs_threads = 20; 3107848b8605Smrg num_gs_threads = 20; 3108848b8605Smrg num_es_threads = 20; 3109848b8605Smrg num_hs_threads = 20; 3110848b8605Smrg num_ls_threads = 20; 3111848b8605Smrg num_ps_stack_entries = 85; 3112848b8605Smrg num_vs_stack_entries = 85; 3113848b8605Smrg num_gs_stack_entries = 85; 3114848b8605Smrg num_es_stack_entries = 85; 3115848b8605Smrg num_hs_stack_entries = 85; 3116848b8605Smrg num_ls_stack_entries = 85; 3117848b8605Smrg break; 3118848b8605Smrg case CHIP_TURKS: 3119848b8605Smrg num_ps_threads = 128; 3120848b8605Smrg num_vs_threads = 20; 3121848b8605Smrg num_gs_threads = 20; 3122848b8605Smrg num_es_threads = 20; 3123848b8605Smrg num_hs_threads = 20; 3124848b8605Smrg num_ls_threads = 20; 3125848b8605Smrg num_ps_stack_entries = 42; 3126848b8605Smrg num_vs_stack_entries = 42; 3127848b8605Smrg num_gs_stack_entries = 42; 3128848b8605Smrg num_es_stack_entries = 42; 3129848b8605Smrg num_hs_stack_entries = 42; 3130848b8605Smrg num_ls_stack_entries = 42; 3131848b8605Smrg break; 3132848b8605Smrg case CHIP_CAICOS: 3133b8e80941Smrg num_ps_threads = 96; 3134848b8605Smrg num_vs_threads = 10; 3135848b8605Smrg num_gs_threads = 10; 3136848b8605Smrg num_es_threads = 10; 3137848b8605Smrg num_hs_threads = 10; 3138848b8605Smrg num_ls_threads = 10; 3139848b8605Smrg num_ps_stack_entries = 42; 3140848b8605Smrg num_vs_stack_entries = 42; 3141848b8605Smrg num_gs_stack_entries = 42; 3142848b8605Smrg num_es_stack_entries = 42; 3143848b8605Smrg num_hs_stack_entries = 42; 3144848b8605Smrg num_ls_stack_entries = 42; 3145848b8605Smrg break; 3146848b8605Smrg } 3147848b8605Smrg 3148848b8605Smrg tmp = S_008C18_NUM_PS_THREADS(num_ps_threads); 3149848b8605Smrg tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads); 3150848b8605Smrg tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads); 3151848b8605Smrg tmp |= S_008C18_NUM_ES_THREADS(num_es_threads); 3152848b8605Smrg 3153848b8605Smrg r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5); 3154848b8605Smrg r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */ 3155848b8605Smrg 3156848b8605Smrg tmp = S_008C1C_NUM_HS_THREADS(num_hs_threads); 3157848b8605Smrg tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads); 3158848b8605Smrg r600_store_value(cb, tmp); /* R_008C1C_SQ_THREAD_RESOURCE_MGMT_2 */ 3159848b8605Smrg 3160848b8605Smrg tmp = S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); 3161848b8605Smrg tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); 3162848b8605Smrg r600_store_value(cb, tmp); /* R_008C20_SQ_STACK_RESOURCE_MGMT_1 */ 3163848b8605Smrg 3164848b8605Smrg tmp = S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); 3165848b8605Smrg tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries); 3166848b8605Smrg r600_store_value(cb, tmp); /* R_008C24_SQ_STACK_RESOURCE_MGMT_2 */ 3167848b8605Smrg 3168848b8605Smrg tmp = S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries); 3169848b8605Smrg tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries); 3170848b8605Smrg r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */ 3171848b8605Smrg 3172848b8605Smrg r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT, 3173848b8605Smrg S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000)); 3174848b8605Smrg 3175b8e80941Smrg /* remove LS/HS from one SIMD for hw workaround */ 3176b8e80941Smrg r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3); 3177b8e80941Smrg r600_store_value(cb, 0xffffffff); 3178b8e80941Smrg r600_store_value(cb, 0xffffffff); 3179b8e80941Smrg r600_store_value(cb, 0xfffffffe); 3180b8e80941Smrg 3181848b8605Smrg r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0); 3182848b8605Smrg r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4)); 3183848b8605Smrg 3184848b8605Smrg r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6); 3185848b8605Smrg r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */ 3186848b8605Smrg r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */ 3187848b8605Smrg r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */ 3188848b8605Smrg r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */ 3189848b8605Smrg r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */ 3190848b8605Smrg r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */ 3191848b8605Smrg 3192848b8605Smrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 3193848b8605Smrg r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */ 3194848b8605Smrg r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */ 3195848b8605Smrg r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */ 3196848b8605Smrg r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */ 3197848b8605Smrg 3198848b8605Smrg r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); 3199848b8605Smrg r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ 3200848b8605Smrg r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ 3201b8e80941Smrg r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */ 3202b8e80941Smrg r600_store_value(cb, fui(1.0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */ 3203b8e80941Smrg r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */ 3204848b8605Smrg r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */ 3205848b8605Smrg r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */ 3206848b8605Smrg r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */ 3207848b8605Smrg r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */ 3208848b8605Smrg r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */ 3209848b8605Smrg r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */ 3210848b8605Smrg r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ 3211848b8605Smrg r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */ 3212848b8605Smrg 3213848b8605Smrg r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1); 3214848b8605Smrg 3215848b8605Smrg r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0); 3216848b8605Smrg 3217848b8605Smrg r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2); 3218848b8605Smrg r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */ 3219848b8605Smrg r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */ 3220848b8605Smrg 3221848b8605Smrg r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 3222848b8605Smrg 3223848b8605Smrg r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0); 3224848b8605Smrg 3225848b8605Smrg r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); 3226848b8605Smrg r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3227848b8605Smrg r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3228848b8605Smrg 3229848b8605Smrg r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); 3230848b8605Smrg r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); 3231848b8605Smrg 3232848b8605Smrg r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3); 3233848b8605Smrg r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */ 3234848b8605Smrg r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */ 3235848b8605Smrg r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */ 3236848b8605Smrg 3237848b8605Smrg r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2); 3238848b8605Smrg r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */ 3239848b8605Smrg r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */ 3240848b8605Smrg 3241848b8605Smrg r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2); 3242848b8605Smrg r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */ 3243848b8605Smrg r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */ 3244848b8605Smrg 3245848b8605Smrg r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3246848b8605Smrg r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3247b8e80941Smrg r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3248b8e80941Smrg r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3249848b8605Smrg r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0); 3250b8e80941Smrg r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3251b8e80941Smrg r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN)); 3252848b8605Smrg 3253848b8605Smrg /* to avoid GPU doing any preloading of constant from random address */ 3254848b8605Smrg r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16); 3255848b8605Smrg for (i = 0; i < 16; i++) 3256848b8605Smrg r600_store_value(cb, 0); 3257848b8605Smrg 3258848b8605Smrg r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16); 3259848b8605Smrg for (i = 0; i < 16; i++) 3260848b8605Smrg r600_store_value(cb, 0); 3261848b8605Smrg 3262848b8605Smrg r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16); 3263848b8605Smrg for (i = 0; i < 16; i++) 3264848b8605Smrg r600_store_value(cb, 0); 3265848b8605Smrg 3266848b8605Smrg r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16); 3267848b8605Smrg for (i = 0; i < 16; i++) 3268848b8605Smrg r600_store_value(cb, 0); 3269848b8605Smrg 3270848b8605Smrg r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16); 3271848b8605Smrg for (i = 0; i < 16; i++) 3272848b8605Smrg r600_store_value(cb, 0); 3273848b8605Smrg 3274848b8605Smrg r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0); 3275848b8605Smrg 3276848b8605Smrg if (rctx->screen->b.has_streamout) { 3277848b8605Smrg r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3278848b8605Smrg } 3279848b8605Smrg 3280848b8605Smrg r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0); 3281848b8605Smrg r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3282848b8605Smrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); 3283848b8605Smrg r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2); 3284848b8605Smrg r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */ 3285848b8605Smrg r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */ 3286b8e80941Smrg 3287b8e80941Smrg r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2); 3288b8e80941Smrg r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */ 3289b8e80941Smrg r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */ 3290b8e80941Smrg 3291b8e80941Smrg if (rctx->b.family == CHIP_CAICOS) { 3292b8e80941Smrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2); 3293b8e80941Smrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 3294b8e80941Smrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 3295b8e80941Smrg r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0); 3296b8e80941Smrg } else { 3297b8e80941Smrg r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 7); 3298b8e80941Smrg r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */ 3299b8e80941Smrg r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */ 3300b8e80941Smrg r600_store_value(cb, 0); /* R028B5C_VGT_LS_SIZE */ 3301b8e80941Smrg r600_store_value(cb, 0); /* R028B60_VGT_HS_SIZE */ 3302b8e80941Smrg r600_store_value(cb, 0); /* R028B64_VGT_LS_HS_ALLOC */ 3303b8e80941Smrg r600_store_value(cb, 0); /* R028B68_VGT_HS_PATCH_CONST */ 3304b8e80941Smrg r600_store_value(cb, 0); /* R028B68_VGT_TF_PARAM */ 3305b8e80941Smrg } 3306848b8605Smrg 3307848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF); 3308848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF); 3309848b8605Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF); 3310b8e80941Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF); 3311b8e80941Smrg eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF); 3312848b8605Smrg} 3313848b8605Smrg 3314848b8605Smrgvoid evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3315848b8605Smrg{ 3316848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 3317848b8605Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3318848b8605Smrg struct r600_shader *rshader = &shader->shader; 3319848b8605Smrg unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0; 3320b8e80941Smrg int pos_index = -1, face_index = -1, fixed_pt_position_index = -1; 3321848b8605Smrg int ninterp = 0; 3322b8e80941Smrg boolean have_perspective = FALSE, have_linear = FALSE; 3323b8e80941Smrg static const unsigned spi_baryc_enable_bit[6] = { 3324b8e80941Smrg S_0286E0_PERSP_SAMPLE_ENA(1), 3325b8e80941Smrg S_0286E0_PERSP_CENTER_ENA(1), 3326b8e80941Smrg S_0286E0_PERSP_CENTROID_ENA(1), 3327b8e80941Smrg S_0286E0_LINEAR_SAMPLE_ENA(1), 3328b8e80941Smrg S_0286E0_LINEAR_CENTER_ENA(1), 3329b8e80941Smrg S_0286E0_LINEAR_CENTROID_ENA(1) 3330b8e80941Smrg }; 3331b8e80941Smrg unsigned spi_baryc_cntl = 0, sid, tmp, num = 0; 3332b8e80941Smrg unsigned z_export = 0, stencil_export = 0, mask_export = 0; 3333848b8605Smrg unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; 3334848b8605Smrg uint32_t spi_ps_input_cntl[32]; 3335848b8605Smrg 3336848b8605Smrg if (!cb->buf) { 3337848b8605Smrg r600_init_command_buffer(cb, 64); 3338848b8605Smrg } else { 3339848b8605Smrg cb->num_dw = 0; 3340848b8605Smrg } 3341848b8605Smrg 3342848b8605Smrg for (i = 0; i < rshader->ninput; i++) { 3343848b8605Smrg /* evergreen NUM_INTERP only contains values interpolated into the LDS, 3344848b8605Smrg POSITION goes via GPRs from the SC so isn't counted */ 3345848b8605Smrg if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 3346848b8605Smrg pos_index = i; 3347848b8605Smrg else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) { 3348848b8605Smrg if (face_index == -1) 3349848b8605Smrg face_index = i; 3350848b8605Smrg } 3351848b8605Smrg else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3352848b8605Smrg if (face_index == -1) 3353848b8605Smrg face_index = i; /* lives in same register, same enable bit */ 3354848b8605Smrg } 3355b8e80941Smrg else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) { 3356b8e80941Smrg fixed_pt_position_index = i; 3357b8e80941Smrg } 3358848b8605Smrg else { 3359848b8605Smrg ninterp++; 3360b8e80941Smrg int k = eg_get_interpolator_index( 3361b8e80941Smrg rshader->input[i].interpolate, 3362b8e80941Smrg rshader->input[i].interpolate_location); 3363b8e80941Smrg if (k >= 0) { 3364b8e80941Smrg spi_baryc_cntl |= spi_baryc_enable_bit[k]; 3365b8e80941Smrg have_perspective |= k < 3; 3366b8e80941Smrg have_linear |= !(k < 3); 3367b8e80941Smrg } 3368848b8605Smrg } 3369848b8605Smrg 3370848b8605Smrg sid = rshader->input[i].spi_sid; 3371848b8605Smrg 3372848b8605Smrg if (sid) { 3373848b8605Smrg tmp = S_028644_SEMANTIC(sid); 3374848b8605Smrg 3375b8e80941Smrg /* D3D 9 behaviour. GL is undefined */ 3376b8e80941Smrg if (rshader->input[i].name == TGSI_SEMANTIC_COLOR && rshader->input[i].sid == 0) 3377b8e80941Smrg tmp |= S_028644_DEFAULT_VAL(3); 3378b8e80941Smrg 3379848b8605Smrg if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || 3380848b8605Smrg rshader->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || 3381848b8605Smrg (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && 3382848b8605Smrg rctx->rasterizer && rctx->rasterizer->flatshade)) { 3383848b8605Smrg tmp |= S_028644_FLAT_SHADE(1); 3384848b8605Smrg } 3385848b8605Smrg 3386848b8605Smrg if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 3387848b8605Smrg (sprite_coord_enable & (1 << rshader->input[i].sid))) { 3388848b8605Smrg tmp |= S_028644_PT_SPRITE_TEX(1); 3389848b8605Smrg } 3390848b8605Smrg 3391848b8605Smrg spi_ps_input_cntl[num++] = tmp; 3392848b8605Smrg } 3393848b8605Smrg } 3394848b8605Smrg 3395848b8605Smrg r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num); 3396848b8605Smrg r600_store_array(cb, num, spi_ps_input_cntl); 3397848b8605Smrg 3398848b8605Smrg for (i = 0; i < rshader->noutput; i++) { 3399848b8605Smrg if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 3400848b8605Smrg z_export = 1; 3401848b8605Smrg if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 3402848b8605Smrg stencil_export = 1; 3403b8e80941Smrg if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && 3404b8e80941Smrg rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) 3405b8e80941Smrg mask_export = 1; 3406848b8605Smrg } 3407848b8605Smrg if (rshader->uses_kill) 3408848b8605Smrg db_shader_control |= S_02880C_KILL_ENABLE(1); 3409848b8605Smrg 3410848b8605Smrg db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); 3411848b8605Smrg db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export); 3412b8e80941Smrg db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); 3413b8e80941Smrg 3414b8e80941Smrg if (shader->selector->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) { 3415b8e80941Smrg db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | 3416b8e80941Smrg S_02880C_EXEC_ON_NOOP(shader->selector->info.writes_memory); 3417b8e80941Smrg } else if (shader->selector->info.writes_memory) { 3418b8e80941Smrg db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1); 3419b8e80941Smrg } 3420b8e80941Smrg 3421b8e80941Smrg switch (rshader->ps_conservative_z) { 3422b8e80941Smrg default: /* fall through */ 3423b8e80941Smrg case TGSI_FS_DEPTH_LAYOUT_ANY: 3424b8e80941Smrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z); 3425b8e80941Smrg break; 3426b8e80941Smrg case TGSI_FS_DEPTH_LAYOUT_GREATER: 3427b8e80941Smrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); 3428b8e80941Smrg break; 3429b8e80941Smrg case TGSI_FS_DEPTH_LAYOUT_LESS: 3430b8e80941Smrg db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); 3431b8e80941Smrg break; 3432b8e80941Smrg } 3433848b8605Smrg 3434848b8605Smrg exports_ps = 0; 3435848b8605Smrg for (i = 0; i < rshader->noutput; i++) { 3436848b8605Smrg if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || 3437b8e80941Smrg rshader->output[i].name == TGSI_SEMANTIC_STENCIL || 3438b8e80941Smrg rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) 3439848b8605Smrg exports_ps |= 1; 3440848b8605Smrg } 3441848b8605Smrg 3442b8e80941Smrg num_cout = rshader->ps_export_highest + 1; 3443848b8605Smrg 3444848b8605Smrg exports_ps |= S_02884C_EXPORT_COLORS(num_cout); 3445848b8605Smrg if (!exports_ps) { 3446848b8605Smrg /* always at least export 1 component per pixel */ 3447848b8605Smrg exports_ps = 2; 3448848b8605Smrg } 3449848b8605Smrg shader->nr_ps_color_outputs = num_cout; 3450b8e80941Smrg shader->ps_color_export_mask = rshader->ps_color_export_mask; 3451848b8605Smrg if (ninterp == 0) { 3452848b8605Smrg ninterp = 1; 3453848b8605Smrg have_perspective = TRUE; 3454848b8605Smrg } 3455b8e80941Smrg if (!spi_baryc_cntl) 3456b8e80941Smrg spi_baryc_cntl |= spi_baryc_enable_bit[0]; 3457848b8605Smrg 3458848b8605Smrg if (!have_perspective && !have_linear) 3459848b8605Smrg have_perspective = TRUE; 3460848b8605Smrg 3461848b8605Smrg spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | 3462848b8605Smrg S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | 3463848b8605Smrg S_0286CC_LINEAR_GRADIENT_ENA(have_linear); 3464848b8605Smrg spi_input_z = 0; 3465848b8605Smrg if (pos_index != -1) { 3466848b8605Smrg spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | 3467b8e80941Smrg S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) | 3468848b8605Smrg S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); 3469848b8605Smrg spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); 3470848b8605Smrg } 3471848b8605Smrg 3472848b8605Smrg spi_ps_in_control_1 = 0; 3473848b8605Smrg if (face_index != -1) { 3474848b8605Smrg spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 3475848b8605Smrg S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 3476848b8605Smrg } 3477b8e80941Smrg if (fixed_pt_position_index != -1) { 3478b8e80941Smrg spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) | 3479b8e80941Smrg S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr); 3480b8e80941Smrg } 3481848b8605Smrg 3482848b8605Smrg r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); 3483848b8605Smrg r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ 3484848b8605Smrg r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */ 3485848b8605Smrg 3486848b8605Smrg r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); 3487848b8605Smrg r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z); 3488848b8605Smrg r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps); 3489848b8605Smrg 3490848b8605Smrg r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2); 3491848b8605Smrg r600_store_value(cb, shader->bo->gpu_address >> 8); 3492848b8605Smrg r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ 3493848b8605Smrg S_028844_NUM_GPRS(rshader->bc.ngpr) | 3494848b8605Smrg S_028844_PRIME_CACHE_ON_DRAW(1) | 3495b8e80941Smrg S_028844_DX10_CLAMP(1) | 3496848b8605Smrg S_028844_STACK_SIZE(rshader->bc.nstack)); 3497848b8605Smrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3498848b8605Smrg 3499848b8605Smrg shader->db_shader_control = db_shader_control; 3500b8e80941Smrg shader->ps_depth_export = z_export | stencil_export | mask_export; 3501848b8605Smrg 3502848b8605Smrg shader->sprite_coord_enable = sprite_coord_enable; 3503848b8605Smrg if (rctx->rasterizer) 3504848b8605Smrg shader->flatshade = rctx->rasterizer->flatshade; 3505848b8605Smrg} 3506848b8605Smrg 3507848b8605Smrgvoid evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3508848b8605Smrg{ 3509848b8605Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3510848b8605Smrg struct r600_shader *rshader = &shader->shader; 3511848b8605Smrg 3512848b8605Smrg r600_init_command_buffer(cb, 32); 3513848b8605Smrg 3514848b8605Smrg r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, 3515848b8605Smrg S_028890_NUM_GPRS(rshader->bc.ngpr) | 3516b8e80941Smrg S_028890_DX10_CLAMP(1) | 3517848b8605Smrg S_028890_STACK_SIZE(rshader->bc.nstack)); 3518848b8605Smrg r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, 3519848b8605Smrg shader->bo->gpu_address >> 8); 3520848b8605Smrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3521848b8605Smrg} 3522848b8605Smrg 3523848b8605Smrgvoid evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3524848b8605Smrg{ 3525848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 3526848b8605Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3527848b8605Smrg struct r600_shader *rshader = &shader->shader; 3528848b8605Smrg struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; 3529b8e80941Smrg unsigned gsvs_itemsizes[4] = { 3530b8e80941Smrg (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2, 3531b8e80941Smrg (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2, 3532b8e80941Smrg (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2, 3533b8e80941Smrg (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2 3534b8e80941Smrg }; 3535848b8605Smrg 3536848b8605Smrg r600_init_command_buffer(cb, 64); 3537848b8605Smrg 3538848b8605Smrg /* VGT_GS_MODE is written by evergreen_emit_shader_stages */ 3539848b8605Smrg 3540848b8605Smrg 3541848b8605Smrg r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, 3542b8e80941Smrg S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); 3543848b8605Smrg r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 3544b8e80941Smrg r600_conv_prim_to_gs_out(shader->selector->gs_output_prim)); 3545848b8605Smrg 3546848b8605Smrg if (rctx->screen->b.info.drm_minor >= 35) { 3547848b8605Smrg r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT, 3548b8e80941Smrg S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) | 3549b8e80941Smrg S_028B90_ENABLE(shader->selector->gs_num_invocations > 0)); 3550848b8605Smrg } 3551848b8605Smrg r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); 3552b8e80941Smrg r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2); 3553b8e80941Smrg r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2); 3554b8e80941Smrg r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2); 3555b8e80941Smrg r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2); 3556848b8605Smrg 3557848b8605Smrg r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 3558b8e80941Smrg (rshader->ring_item_sizes[0]) >> 2); 3559848b8605Smrg 3560848b8605Smrg r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE, 3561b8e80941Smrg gsvs_itemsizes[0] + 3562b8e80941Smrg gsvs_itemsizes[1] + 3563b8e80941Smrg gsvs_itemsizes[2] + 3564b8e80941Smrg gsvs_itemsizes[3]); 3565848b8605Smrg 3566848b8605Smrg r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3); 3567b8e80941Smrg r600_store_value(cb, gsvs_itemsizes[0]); 3568b8e80941Smrg r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]); 3569b8e80941Smrg r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]); 3570848b8605Smrg 3571848b8605Smrg /* FIXME calculate these values somehow ??? */ 3572848b8605Smrg r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3); 3573848b8605Smrg r600_store_value(cb, 0x80); /* GS_PER_ES */ 3574848b8605Smrg r600_store_value(cb, 0x100); /* ES_PER_GS */ 3575848b8605Smrg r600_store_value(cb, 0x2); /* GS_PER_VS */ 3576848b8605Smrg 3577848b8605Smrg r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, 3578848b8605Smrg S_028878_NUM_GPRS(rshader->bc.ngpr) | 3579b8e80941Smrg S_028878_DX10_CLAMP(1) | 3580848b8605Smrg S_028878_STACK_SIZE(rshader->bc.nstack)); 3581848b8605Smrg r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, 3582848b8605Smrg shader->bo->gpu_address >> 8); 3583848b8605Smrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3584848b8605Smrg} 3585848b8605Smrg 3586848b8605Smrg 3587848b8605Smrgvoid evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3588848b8605Smrg{ 3589848b8605Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3590848b8605Smrg struct r600_shader *rshader = &shader->shader; 3591848b8605Smrg unsigned spi_vs_out_id[10] = {}; 3592848b8605Smrg unsigned i, tmp, nparams = 0; 3593848b8605Smrg 3594848b8605Smrg for (i = 0; i < rshader->noutput; i++) { 3595848b8605Smrg if (rshader->output[i].spi_sid) { 3596848b8605Smrg tmp = rshader->output[i].spi_sid << ((nparams & 3) * 8); 3597848b8605Smrg spi_vs_out_id[nparams / 4] |= tmp; 3598848b8605Smrg nparams++; 3599848b8605Smrg } 3600848b8605Smrg } 3601848b8605Smrg 3602848b8605Smrg r600_init_command_buffer(cb, 32); 3603848b8605Smrg 3604848b8605Smrg r600_store_context_reg_seq(cb, R_02861C_SPI_VS_OUT_ID_0, 10); 3605848b8605Smrg for (i = 0; i < 10; i++) { 3606848b8605Smrg r600_store_value(cb, spi_vs_out_id[i]); 3607848b8605Smrg } 3608848b8605Smrg 3609848b8605Smrg /* Certain attributes (position, psize, etc.) don't count as params. 3610848b8605Smrg * VS is required to export at least one param and r600_shader_from_tgsi() 3611848b8605Smrg * takes care of adding a dummy export. 3612848b8605Smrg */ 3613848b8605Smrg if (nparams < 1) 3614848b8605Smrg nparams = 1; 3615848b8605Smrg 3616848b8605Smrg r600_store_context_reg(cb, R_0286C4_SPI_VS_OUT_CONFIG, 3617848b8605Smrg S_0286C4_VS_EXPORT_COUNT(nparams - 1)); 3618848b8605Smrg r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS, 3619848b8605Smrg S_028860_NUM_GPRS(rshader->bc.ngpr) | 3620b8e80941Smrg S_028860_DX10_CLAMP(1) | 3621848b8605Smrg S_028860_STACK_SIZE(rshader->bc.nstack)); 3622848b8605Smrg if (rshader->vs_position_window_space) { 3623848b8605Smrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 3624848b8605Smrg S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1)); 3625848b8605Smrg } else { 3626848b8605Smrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 3627848b8605Smrg S_028818_VTX_W0_FMT(1) | 3628848b8605Smrg S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | 3629848b8605Smrg S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | 3630848b8605Smrg S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); 3631848b8605Smrg 3632848b8605Smrg } 3633848b8605Smrg r600_store_context_reg(cb, R_02885C_SQ_PGM_START_VS, 3634848b8605Smrg shader->bo->gpu_address >> 8); 3635848b8605Smrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 3636848b8605Smrg 3637848b8605Smrg shader->pa_cl_vs_out_cntl = 3638b8e80941Smrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->cc_dist_mask & 0x0F) != 0) | 3639b8e80941Smrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->cc_dist_mask & 0xF0) != 0) | 3640848b8605Smrg S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) | 3641848b8605Smrg S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) | 3642848b8605Smrg S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) | 3643848b8605Smrg S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport) | 3644848b8605Smrg S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer); 3645848b8605Smrg} 3646848b8605Smrg 3647b8e80941Smrgvoid evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3648b8e80941Smrg{ 3649b8e80941Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3650b8e80941Smrg struct r600_shader *rshader = &shader->shader; 3651b8e80941Smrg 3652b8e80941Smrg r600_init_command_buffer(cb, 32); 3653b8e80941Smrg r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS, 3654b8e80941Smrg S_0288BC_NUM_GPRS(rshader->bc.ngpr) | 3655b8e80941Smrg S_0288BC_DX10_CLAMP(1) | 3656b8e80941Smrg S_0288BC_STACK_SIZE(rshader->bc.nstack)); 3657b8e80941Smrg r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS, 3658b8e80941Smrg shader->bo->gpu_address >> 8); 3659b8e80941Smrg} 3660b8e80941Smrg 3661b8e80941Smrgvoid evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 3662b8e80941Smrg{ 3663b8e80941Smrg struct r600_command_buffer *cb = &shader->command_buffer; 3664b8e80941Smrg struct r600_shader *rshader = &shader->shader; 3665b8e80941Smrg 3666b8e80941Smrg r600_init_command_buffer(cb, 32); 3667b8e80941Smrg r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS, 3668b8e80941Smrg S_0288D4_NUM_GPRS(rshader->bc.ngpr) | 3669b8e80941Smrg S_0288D4_DX10_CLAMP(1) | 3670b8e80941Smrg S_0288D4_STACK_SIZE(rshader->bc.nstack)); 3671b8e80941Smrg r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS, 3672b8e80941Smrg shader->bo->gpu_address >> 8); 3673b8e80941Smrg} 3674848b8605Smrgvoid *evergreen_create_resolve_blend(struct r600_context *rctx) 3675848b8605Smrg{ 3676848b8605Smrg struct pipe_blend_state blend; 3677848b8605Smrg 3678848b8605Smrg memset(&blend, 0, sizeof(blend)); 3679848b8605Smrg blend.independent_blend_enable = true; 3680848b8605Smrg blend.rt[0].colormask = 0xf; 3681848b8605Smrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, V_028808_CB_RESOLVE); 3682848b8605Smrg} 3683848b8605Smrg 3684848b8605Smrgvoid *evergreen_create_decompress_blend(struct r600_context *rctx) 3685848b8605Smrg{ 3686848b8605Smrg struct pipe_blend_state blend; 3687848b8605Smrg unsigned mode = rctx->screen->has_compressed_msaa_texturing ? 3688848b8605Smrg V_028808_CB_FMASK_DECOMPRESS : V_028808_CB_DECOMPRESS; 3689848b8605Smrg 3690848b8605Smrg memset(&blend, 0, sizeof(blend)); 3691848b8605Smrg blend.independent_blend_enable = true; 3692848b8605Smrg blend.rt[0].colormask = 0xf; 3693848b8605Smrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode); 3694848b8605Smrg} 3695848b8605Smrg 3696848b8605Smrgvoid *evergreen_create_fastclear_blend(struct r600_context *rctx) 3697848b8605Smrg{ 3698848b8605Smrg struct pipe_blend_state blend; 3699848b8605Smrg unsigned mode = V_028808_CB_ELIMINATE_FAST_CLEAR; 3700848b8605Smrg 3701848b8605Smrg memset(&blend, 0, sizeof(blend)); 3702848b8605Smrg blend.independent_blend_enable = true; 3703848b8605Smrg blend.rt[0].colormask = 0xf; 3704848b8605Smrg return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode); 3705848b8605Smrg} 3706848b8605Smrg 3707848b8605Smrgvoid *evergreen_create_db_flush_dsa(struct r600_context *rctx) 3708848b8605Smrg{ 3709848b8605Smrg struct pipe_depth_stencil_alpha_state dsa = {{0}}; 3710848b8605Smrg 3711848b8605Smrg return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa); 3712848b8605Smrg} 3713848b8605Smrg 3714848b8605Smrgvoid evergreen_update_db_shader_control(struct r600_context * rctx) 3715848b8605Smrg{ 3716848b8605Smrg bool dual_export; 3717848b8605Smrg unsigned db_shader_control; 3718848b8605Smrg 3719848b8605Smrg if (!rctx->ps_shader) { 3720848b8605Smrg return; 3721848b8605Smrg } 3722848b8605Smrg 3723848b8605Smrg dual_export = rctx->framebuffer.export_16bpc && 3724848b8605Smrg !rctx->ps_shader->current->ps_depth_export; 3725848b8605Smrg 3726848b8605Smrg db_shader_control = rctx->ps_shader->current->db_shader_control | 3727848b8605Smrg S_02880C_DUAL_EXPORT_ENABLE(dual_export) | 3728848b8605Smrg S_02880C_DB_SOURCE_FORMAT(dual_export ? V_02880C_EXPORT_DB_TWO : 3729848b8605Smrg V_02880C_EXPORT_DB_FULL) | 3730848b8605Smrg S_02880C_ALPHA_TO_MASK_DISABLE(rctx->framebuffer.cb0_is_integer); 3731848b8605Smrg 3732848b8605Smrg /* When alpha test is enabled we can't trust the hw to make the proper 3733848b8605Smrg * decision on the order in which ztest should be run related to fragment 3734848b8605Smrg * shader execution. 3735848b8605Smrg * 3736848b8605Smrg * If alpha test is enabled perform early z rejection (RE_Z) but don't early 3737848b8605Smrg * write to the zbuffer. Write to zbuffer is delayed after fragment shader 3738848b8605Smrg * execution and thus after alpha test so if discarded by the alpha test 3739848b8605Smrg * the z value is not written. 3740848b8605Smrg * If ReZ is enabled, and the zfunc/zenable/zwrite values change you can 3741848b8605Smrg * get a hang unless you flush the DB in between. For now just use 3742848b8605Smrg * LATE_Z. 3743848b8605Smrg */ 3744b8e80941Smrg if (rctx->alphatest_state.sx_alpha_test_control || rctx->ps_shader->info.writes_memory) { 3745848b8605Smrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 3746848b8605Smrg } else { 3747848b8605Smrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); 3748848b8605Smrg } 3749848b8605Smrg 3750848b8605Smrg if (db_shader_control != rctx->db_misc_state.db_shader_control) { 3751848b8605Smrg rctx->db_misc_state.db_shader_control = db_shader_control; 3752b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 3753848b8605Smrg } 3754848b8605Smrg} 3755848b8605Smrg 3756848b8605Smrgstatic void evergreen_dma_copy_tile(struct r600_context *rctx, 3757848b8605Smrg struct pipe_resource *dst, 3758848b8605Smrg unsigned dst_level, 3759848b8605Smrg unsigned dst_x, 3760848b8605Smrg unsigned dst_y, 3761848b8605Smrg unsigned dst_z, 3762848b8605Smrg struct pipe_resource *src, 3763848b8605Smrg unsigned src_level, 3764848b8605Smrg unsigned src_x, 3765848b8605Smrg unsigned src_y, 3766848b8605Smrg unsigned src_z, 3767848b8605Smrg unsigned copy_height, 3768848b8605Smrg unsigned pitch, 3769848b8605Smrg unsigned bpp) 3770848b8605Smrg{ 3771b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.dma.cs; 3772848b8605Smrg struct r600_texture *rsrc = (struct r600_texture*)src; 3773848b8605Smrg struct r600_texture *rdst = (struct r600_texture*)dst; 3774848b8605Smrg unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; 3775848b8605Smrg unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode; 3776848b8605Smrg unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split, non_disp_tiling = 0; 3777848b8605Smrg uint64_t base, addr; 3778848b8605Smrg 3779b8e80941Smrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 3780b8e80941Smrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 3781848b8605Smrg assert(dst_mode != src_mode); 3782848b8605Smrg 3783848b8605Smrg /* non_disp_tiling bit needs to be set for depth, stencil, and fmask surfaces */ 3784848b8605Smrg if (util_format_has_depth(util_format_description(src->format))) 3785848b8605Smrg non_disp_tiling = 1; 3786848b8605Smrg 3787848b8605Smrg y = 0; 3788848b8605Smrg sub_cmd = EG_DMA_COPY_TILED; 3789848b8605Smrg lbpp = util_logbase2(bpp); 3790848b8605Smrg pitch_tile_max = ((pitch / bpp) / 8) - 1; 3791b8e80941Smrg nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks); 3792848b8605Smrg 3793b8e80941Smrg if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED) { 3794848b8605Smrg /* T2L */ 3795848b8605Smrg array_mode = evergreen_array_mode(src_mode); 3796b8e80941Smrg slice_tile_max = (rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.u.legacy.level[src_level].nblk_y) / (8*8); 3797848b8605Smrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 3798848b8605Smrg /* linear height must be the same as the slice tile max height, it's ok even 3799848b8605Smrg * if the linear destination/source have smaller heigh as the size of the 3800848b8605Smrg * dma packet will be using the copy_height which is always smaller or equal 3801848b8605Smrg * to the linear height 3802848b8605Smrg */ 3803b8e80941Smrg height = u_minify(rsrc->resource.b.b.height0, src_level); 3804848b8605Smrg detile = 1; 3805848b8605Smrg x = src_x; 3806848b8605Smrg y = src_y; 3807848b8605Smrg z = src_z; 3808b8e80941Smrg base = rsrc->surface.u.legacy.level[src_level].offset; 3809b8e80941Smrg addr = rdst->surface.u.legacy.level[dst_level].offset; 3810b8e80941Smrg addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 3811848b8605Smrg addr += dst_y * pitch + dst_x * bpp; 3812b8e80941Smrg bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh); 3813b8e80941Smrg bank_w = eg_bank_wh(rsrc->surface.u.legacy.bankw); 3814b8e80941Smrg mt_aspect = eg_macro_tile_aspect(rsrc->surface.u.legacy.mtilea); 3815b8e80941Smrg tile_split = eg_tile_split(rsrc->surface.u.legacy.tile_split); 3816848b8605Smrg base += rsrc->resource.gpu_address; 3817848b8605Smrg addr += rdst->resource.gpu_address; 3818848b8605Smrg } else { 3819848b8605Smrg /* L2T */ 3820848b8605Smrg array_mode = evergreen_array_mode(dst_mode); 3821b8e80941Smrg slice_tile_max = (rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.u.legacy.level[dst_level].nblk_y) / (8*8); 3822848b8605Smrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 3823848b8605Smrg /* linear height must be the same as the slice tile max height, it's ok even 3824848b8605Smrg * if the linear destination/source have smaller heigh as the size of the 3825848b8605Smrg * dma packet will be using the copy_height which is always smaller or equal 3826848b8605Smrg * to the linear height 3827848b8605Smrg */ 3828b8e80941Smrg height = u_minify(rdst->resource.b.b.height0, dst_level); 3829848b8605Smrg detile = 0; 3830848b8605Smrg x = dst_x; 3831848b8605Smrg y = dst_y; 3832848b8605Smrg z = dst_z; 3833b8e80941Smrg base = rdst->surface.u.legacy.level[dst_level].offset; 3834b8e80941Smrg addr = rsrc->surface.u.legacy.level[src_level].offset; 3835b8e80941Smrg addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z; 3836848b8605Smrg addr += src_y * pitch + src_x * bpp; 3837b8e80941Smrg bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh); 3838b8e80941Smrg bank_w = eg_bank_wh(rdst->surface.u.legacy.bankw); 3839b8e80941Smrg mt_aspect = eg_macro_tile_aspect(rdst->surface.u.legacy.mtilea); 3840b8e80941Smrg tile_split = eg_tile_split(rdst->surface.u.legacy.tile_split); 3841848b8605Smrg base += rdst->resource.gpu_address; 3842848b8605Smrg addr += rsrc->resource.gpu_address; 3843848b8605Smrg } 3844848b8605Smrg 3845848b8605Smrg size = (copy_height * pitch) / 4; 3846848b8605Smrg ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); 3847b8e80941Smrg r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource); 3848848b8605Smrg 3849848b8605Smrg for (i = 0; i < ncopy; i++) { 3850848b8605Smrg cheight = copy_height; 3851848b8605Smrg if (((cheight * pitch) / 4) > EG_DMA_COPY_MAX_SIZE) { 3852848b8605Smrg cheight = (EG_DMA_COPY_MAX_SIZE * 4) / pitch; 3853848b8605Smrg } 3854848b8605Smrg size = (cheight * pitch) / 4; 3855b8e80941Smrg /* emit reloc before writing cs so that cs is always in consistent state */ 3856b8e80941Smrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, 3857b8e80941Smrg RADEON_USAGE_READ, 0); 3858b8e80941Smrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, 3859b8e80941Smrg RADEON_USAGE_WRITE, 0); 3860b8e80941Smrg radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size)); 3861b8e80941Smrg radeon_emit(cs, base >> 8); 3862b8e80941Smrg radeon_emit(cs, (detile << 31) | (array_mode << 27) | 3863b8e80941Smrg (lbpp << 24) | (bank_h << 21) | 3864b8e80941Smrg (bank_w << 18) | (mt_aspect << 16)); 3865b8e80941Smrg radeon_emit(cs, (pitch_tile_max << 0) | ((height - 1) << 16)); 3866b8e80941Smrg radeon_emit(cs, (slice_tile_max << 0)); 3867b8e80941Smrg radeon_emit(cs, (x << 0) | (z << 18)); 3868b8e80941Smrg radeon_emit(cs, (y << 0) | (tile_split << 21) | (nbanks << 25) | (non_disp_tiling << 28)); 3869b8e80941Smrg radeon_emit(cs, addr & 0xfffffffc); 3870b8e80941Smrg radeon_emit(cs, (addr >> 32UL) & 0xff); 3871848b8605Smrg copy_height -= cheight; 3872848b8605Smrg addr += cheight * pitch; 3873848b8605Smrg y += cheight; 3874848b8605Smrg } 3875848b8605Smrg} 3876848b8605Smrg 3877848b8605Smrgstatic void evergreen_dma_copy(struct pipe_context *ctx, 3878848b8605Smrg struct pipe_resource *dst, 3879848b8605Smrg unsigned dst_level, 3880848b8605Smrg unsigned dstx, unsigned dsty, unsigned dstz, 3881848b8605Smrg struct pipe_resource *src, 3882848b8605Smrg unsigned src_level, 3883848b8605Smrg const struct pipe_box *src_box) 3884848b8605Smrg{ 3885848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 3886848b8605Smrg struct r600_texture *rsrc = (struct r600_texture*)src; 3887848b8605Smrg struct r600_texture *rdst = (struct r600_texture*)dst; 3888848b8605Smrg unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; 3889848b8605Smrg unsigned src_w, dst_w; 3890848b8605Smrg unsigned src_x, src_y; 3891848b8605Smrg unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; 3892848b8605Smrg 3893b8e80941Smrg if (rctx->b.dma.cs == NULL) { 3894848b8605Smrg goto fallback; 3895848b8605Smrg } 3896848b8605Smrg 3897b8e80941Smrg if (rctx->cmd_buf_is_compute) { 3898b8e80941Smrg rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL); 3899b8e80941Smrg rctx->cmd_buf_is_compute = false; 3900b8e80941Smrg } 3901b8e80941Smrg 3902848b8605Smrg if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 3903848b8605Smrg evergreen_dma_copy_buffer(rctx, dst, src, dst_x, src_box->x, src_box->width); 3904848b8605Smrg return; 3905848b8605Smrg } 3906848b8605Smrg 3907b8e80941Smrg if (src_box->depth > 1 || 3908b8e80941Smrg !r600_prepare_for_dma_blit(&rctx->b, rdst, dst_level, dstx, dsty, 3909b8e80941Smrg dstz, rsrc, src_level, src_box)) 3910848b8605Smrg goto fallback; 3911848b8605Smrg 3912848b8605Smrg src_x = util_format_get_nblocksx(src->format, src_box->x); 3913848b8605Smrg dst_x = util_format_get_nblocksx(src->format, dst_x); 3914848b8605Smrg src_y = util_format_get_nblocksy(src->format, src_box->y); 3915848b8605Smrg dst_y = util_format_get_nblocksy(src->format, dst_y); 3916848b8605Smrg 3917848b8605Smrg bpp = rdst->surface.bpe; 3918b8e80941Smrg dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.bpe; 3919b8e80941Smrg src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.bpe; 3920b8e80941Smrg src_w = u_minify(rsrc->resource.b.b.width0, src_level); 3921b8e80941Smrg dst_w = u_minify(rdst->resource.b.b.width0, dst_level); 3922848b8605Smrg copy_height = src_box->height / rsrc->surface.blk_h; 3923848b8605Smrg 3924b8e80941Smrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 3925b8e80941Smrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 3926848b8605Smrg 3927848b8605Smrg if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { 3928848b8605Smrg /* FIXME evergreen can do partial blit */ 3929848b8605Smrg goto fallback; 3930848b8605Smrg } 3931848b8605Smrg /* the x test here are currently useless (because we don't support partial blit) 3932848b8605Smrg * but keep them around so we don't forget about those 3933848b8605Smrg */ 3934848b8605Smrg if (src_pitch % 8 || src_box->x % 8 || dst_x % 8 || src_box->y % 8 || dst_y % 8) { 3935848b8605Smrg goto fallback; 3936848b8605Smrg } 3937848b8605Smrg 3938848b8605Smrg /* 128 bpp surfaces require non_disp_tiling for both 3939848b8605Smrg * tiled and linear buffers on cayman. However, async 3940848b8605Smrg * DMA only supports it on the tiled side. As such 3941848b8605Smrg * the tile order is backwards after a L2T/T2L packet. 3942848b8605Smrg */ 3943848b8605Smrg if ((rctx->b.chip_class == CAYMAN) && 3944848b8605Smrg (src_mode != dst_mode) && 3945848b8605Smrg (util_format_get_blocksize(src->format) >= 16)) { 3946848b8605Smrg goto fallback; 3947848b8605Smrg } 3948848b8605Smrg 3949848b8605Smrg if (src_mode == dst_mode) { 3950848b8605Smrg uint64_t dst_offset, src_offset; 3951848b8605Smrg /* simple dma blit would do NOTE code here assume : 3952848b8605Smrg * src_box.x/y == 0 3953848b8605Smrg * dst_x/y == 0 3954848b8605Smrg * dst_pitch == src_pitch 3955848b8605Smrg */ 3956b8e80941Smrg src_offset= rsrc->surface.u.legacy.level[src_level].offset; 3957b8e80941Smrg src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z; 3958848b8605Smrg src_offset += src_y * src_pitch + src_x * bpp; 3959b8e80941Smrg dst_offset = rdst->surface.u.legacy.level[dst_level].offset; 3960b8e80941Smrg dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 3961848b8605Smrg dst_offset += dst_y * dst_pitch + dst_x * bpp; 3962848b8605Smrg evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset, 3963848b8605Smrg src_box->height * src_pitch); 3964848b8605Smrg } else { 3965848b8605Smrg evergreen_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, 3966848b8605Smrg src, src_level, src_x, src_y, src_box->z, 3967848b8605Smrg copy_height, dst_pitch, bpp); 3968848b8605Smrg } 3969848b8605Smrg return; 3970848b8605Smrg 3971848b8605Smrgfallback: 3972b8e80941Smrg r600_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, 3973848b8605Smrg src, src_level, src_box); 3974848b8605Smrg} 3975848b8605Smrg 3976b8e80941Smrgstatic void evergreen_set_tess_state(struct pipe_context *ctx, 3977b8e80941Smrg const float default_outer_level[4], 3978b8e80941Smrg const float default_inner_level[2]) 3979b8e80941Smrg{ 3980b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 3981b8e80941Smrg 3982b8e80941Smrg memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); 3983b8e80941Smrg memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); 3984b8e80941Smrg rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true; 3985b8e80941Smrg} 3986b8e80941Smrg 3987b8e80941Smrgstatic void evergreen_setup_immed_buffer(struct r600_context *rctx, 3988b8e80941Smrg struct r600_image_view *rview, 3989b8e80941Smrg enum pipe_format pformat) 3990b8e80941Smrg{ 3991b8e80941Smrg struct r600_screen *rscreen = (struct r600_screen *)rctx->b.b.screen; 3992b8e80941Smrg uint32_t immed_size = rscreen->b.info.max_se * 256 * 64 * util_format_get_blocksize(pformat); 3993b8e80941Smrg struct eg_buf_res_params buf_params; 3994b8e80941Smrg bool skip_reloc = false; 3995b8e80941Smrg struct r600_resource *resource = (struct r600_resource *)rview->base.resource; 3996b8e80941Smrg if (!resource->immed_buffer) { 3997b8e80941Smrg eg_resource_alloc_immed(&rscreen->b, resource, immed_size); 3998b8e80941Smrg } 3999b8e80941Smrg 4000b8e80941Smrg memset(&buf_params, 0, sizeof(buf_params)); 4001b8e80941Smrg buf_params.pipe_format = pformat; 4002b8e80941Smrg buf_params.size = resource->immed_buffer->b.b.width0; 4003b8e80941Smrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 4004b8e80941Smrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 4005b8e80941Smrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 4006b8e80941Smrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 4007b8e80941Smrg buf_params.uncached = 1; 4008b8e80941Smrg evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b, 4009b8e80941Smrg &buf_params, &skip_reloc, 4010b8e80941Smrg rview->immed_resource_words); 4011b8e80941Smrg} 4012b8e80941Smrg 4013b8e80941Smrgstatic void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, 4014b8e80941Smrg unsigned start_slot, 4015b8e80941Smrg unsigned count, 4016b8e80941Smrg const struct pipe_shader_buffer *buffers) 4017b8e80941Smrg{ 4018b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 4019b8e80941Smrg struct r600_atomic_buffer_state *astate; 4020b8e80941Smrg unsigned i, idx; 4021b8e80941Smrg 4022b8e80941Smrg astate = &rctx->atomic_buffer_state; 4023b8e80941Smrg 4024b8e80941Smrg /* we'd probably like to expand this to 8 later so put the logic in */ 4025b8e80941Smrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 4026b8e80941Smrg const struct pipe_shader_buffer *buf; 4027b8e80941Smrg struct pipe_shader_buffer *abuf; 4028b8e80941Smrg 4029b8e80941Smrg abuf = &astate->buffer[i]; 4030b8e80941Smrg 4031b8e80941Smrg if (!buffers || !buffers[idx].buffer) { 4032b8e80941Smrg pipe_resource_reference(&abuf->buffer, NULL); 4033b8e80941Smrg continue; 4034b8e80941Smrg } 4035b8e80941Smrg buf = &buffers[idx]; 4036b8e80941Smrg 4037b8e80941Smrg pipe_resource_reference(&abuf->buffer, buf->buffer); 4038b8e80941Smrg abuf->buffer_offset = buf->buffer_offset; 4039b8e80941Smrg abuf->buffer_size = buf->buffer_size; 4040b8e80941Smrg } 4041b8e80941Smrg} 4042b8e80941Smrg 4043b8e80941Smrgstatic void evergreen_set_shader_buffers(struct pipe_context *ctx, 4044b8e80941Smrg enum pipe_shader_type shader, unsigned start_slot, 4045b8e80941Smrg unsigned count, 4046b8e80941Smrg const struct pipe_shader_buffer *buffers, 4047b8e80941Smrg unsigned writable_bitmask) 4048b8e80941Smrg{ 4049b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 4050b8e80941Smrg struct r600_image_state *istate = NULL; 4051b8e80941Smrg struct r600_image_view *rview; 4052b8e80941Smrg struct r600_tex_color_info color; 4053b8e80941Smrg struct eg_buf_res_params buf_params; 4054b8e80941Smrg struct r600_resource *resource; 4055b8e80941Smrg unsigned i, idx; 4056b8e80941Smrg unsigned old_mask; 4057b8e80941Smrg 4058b8e80941Smrg if (shader != PIPE_SHADER_FRAGMENT && 4059b8e80941Smrg shader != PIPE_SHADER_COMPUTE && count == 0) 4060b8e80941Smrg return; 4061b8e80941Smrg 4062b8e80941Smrg if (shader == PIPE_SHADER_FRAGMENT) 4063b8e80941Smrg istate = &rctx->fragment_buffers; 4064b8e80941Smrg else if (shader == PIPE_SHADER_COMPUTE) 4065b8e80941Smrg istate = &rctx->compute_buffers; 4066b8e80941Smrg 4067b8e80941Smrg old_mask = istate->enabled_mask; 4068b8e80941Smrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 4069b8e80941Smrg const struct pipe_shader_buffer *buf; 4070b8e80941Smrg unsigned res_type; 4071b8e80941Smrg 4072b8e80941Smrg rview = &istate->views[i]; 4073b8e80941Smrg 4074b8e80941Smrg if (!buffers || !buffers[idx].buffer) { 4075b8e80941Smrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); 4076b8e80941Smrg istate->enabled_mask &= ~(1 << i); 4077b8e80941Smrg continue; 4078b8e80941Smrg } 4079b8e80941Smrg 4080b8e80941Smrg buf = &buffers[idx]; 4081b8e80941Smrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer); 4082b8e80941Smrg 4083b8e80941Smrg resource = (struct r600_resource *)rview->base.resource; 4084b8e80941Smrg 4085b8e80941Smrg evergreen_setup_immed_buffer(rctx, rview, PIPE_FORMAT_R32_UINT); 4086b8e80941Smrg 4087b8e80941Smrg color.offset = 0; 4088b8e80941Smrg color.view = 0; 4089b8e80941Smrg evergreen_set_color_surface_buffer(rctx, resource, 4090b8e80941Smrg PIPE_FORMAT_R32_UINT, 4091b8e80941Smrg buf->buffer_offset, 4092b8e80941Smrg buf->buffer_offset + buf->buffer_size, 4093b8e80941Smrg &color); 4094b8e80941Smrg 4095b8e80941Smrg res_type = V_028C70_BUFFER; 4096b8e80941Smrg 4097b8e80941Smrg rview->cb_color_base = color.offset; 4098b8e80941Smrg rview->cb_color_dim = color.dim; 4099b8e80941Smrg rview->cb_color_info = color.info | 4100b8e80941Smrg S_028C70_RAT(1) | 4101b8e80941Smrg S_028C70_RESOURCE_TYPE(res_type); 4102b8e80941Smrg rview->cb_color_pitch = color.pitch; 4103b8e80941Smrg rview->cb_color_slice = color.slice; 4104b8e80941Smrg rview->cb_color_view = color.view; 4105b8e80941Smrg rview->cb_color_attrib = color.attrib; 4106b8e80941Smrg rview->cb_color_fmask = color.fmask; 4107b8e80941Smrg rview->cb_color_fmask_slice = color.fmask_slice; 4108b8e80941Smrg 4109b8e80941Smrg memset(&buf_params, 0, sizeof(buf_params)); 4110b8e80941Smrg buf_params.pipe_format = PIPE_FORMAT_R32_UINT; 4111b8e80941Smrg buf_params.offset = buf->buffer_offset; 4112b8e80941Smrg buf_params.size = buf->buffer_size; 4113b8e80941Smrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 4114b8e80941Smrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 4115b8e80941Smrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 4116b8e80941Smrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 4117b8e80941Smrg buf_params.force_swizzle = true; 4118b8e80941Smrg buf_params.uncached = 1; 4119b8e80941Smrg buf_params.size_in_bytes = true; 4120b8e80941Smrg evergreen_fill_buffer_resource_words(rctx, &resource->b.b, 4121b8e80941Smrg &buf_params, 4122b8e80941Smrg &rview->skip_mip_address_reloc, 4123b8e80941Smrg rview->resource_words); 4124b8e80941Smrg 4125b8e80941Smrg istate->enabled_mask |= (1 << i); 4126b8e80941Smrg } 4127b8e80941Smrg 4128b8e80941Smrg istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; 4129b8e80941Smrg 4130b8e80941Smrg if (old_mask != istate->enabled_mask) 4131b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 4132b8e80941Smrg 4133b8e80941Smrg /* construct the target mask */ 4134b8e80941Smrg if (rctx->cb_misc_state.buffer_rat_enabled_mask != istate->enabled_mask) { 4135b8e80941Smrg rctx->cb_misc_state.buffer_rat_enabled_mask = istate->enabled_mask; 4136b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 4137b8e80941Smrg } 4138b8e80941Smrg 4139b8e80941Smrg if (shader == PIPE_SHADER_FRAGMENT) 4140b8e80941Smrg r600_mark_atom_dirty(rctx, &istate->atom); 4141b8e80941Smrg} 4142b8e80941Smrg 4143b8e80941Smrgstatic void evergreen_set_shader_images(struct pipe_context *ctx, 4144b8e80941Smrg enum pipe_shader_type shader, unsigned start_slot, 4145b8e80941Smrg unsigned count, 4146b8e80941Smrg const struct pipe_image_view *images) 4147b8e80941Smrg{ 4148b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 4149b8e80941Smrg unsigned i; 4150b8e80941Smrg struct r600_image_view *rview; 4151b8e80941Smrg struct pipe_resource *image; 4152b8e80941Smrg struct r600_resource *resource; 4153b8e80941Smrg struct r600_tex_color_info color; 4154b8e80941Smrg struct eg_buf_res_params buf_params; 4155b8e80941Smrg struct eg_tex_res_params tex_params; 4156b8e80941Smrg unsigned old_mask; 4157b8e80941Smrg struct r600_image_state *istate = NULL; 4158b8e80941Smrg int idx; 4159b8e80941Smrg if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE && count == 0) 4160b8e80941Smrg return; 4161b8e80941Smrg 4162b8e80941Smrg if (shader == PIPE_SHADER_FRAGMENT) 4163b8e80941Smrg istate = &rctx->fragment_images; 4164b8e80941Smrg else if (shader == PIPE_SHADER_COMPUTE) 4165b8e80941Smrg istate = &rctx->compute_images; 4166b8e80941Smrg 4167b8e80941Smrg assert (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE); 4168b8e80941Smrg 4169b8e80941Smrg old_mask = istate->enabled_mask; 4170b8e80941Smrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 4171b8e80941Smrg unsigned res_type; 4172b8e80941Smrg const struct pipe_image_view *iview; 4173b8e80941Smrg rview = &istate->views[i]; 4174b8e80941Smrg 4175b8e80941Smrg if (!images || !images[idx].resource) { 4176b8e80941Smrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); 4177b8e80941Smrg istate->enabled_mask &= ~(1 << i); 4178b8e80941Smrg istate->compressed_colortex_mask &= ~(1 << i); 4179b8e80941Smrg istate->compressed_depthtex_mask &= ~(1 << i); 4180b8e80941Smrg continue; 4181b8e80941Smrg } 4182b8e80941Smrg 4183b8e80941Smrg iview = &images[idx]; 4184b8e80941Smrg image = iview->resource; 4185b8e80941Smrg resource = (struct r600_resource *)image; 4186b8e80941Smrg 4187b8e80941Smrg r600_context_add_resource_size(ctx, image); 4188b8e80941Smrg 4189b8e80941Smrg rview->base = *iview; 4190b8e80941Smrg rview->base.resource = NULL; 4191b8e80941Smrg pipe_resource_reference((struct pipe_resource **)&rview->base.resource, image); 4192b8e80941Smrg 4193b8e80941Smrg evergreen_setup_immed_buffer(rctx, rview, iview->format); 4194b8e80941Smrg 4195b8e80941Smrg bool is_buffer = image->target == PIPE_BUFFER; 4196b8e80941Smrg struct r600_texture *rtex = (struct r600_texture *)image; 4197b8e80941Smrg if (!is_buffer & rtex->db_compatible) 4198b8e80941Smrg istate->compressed_depthtex_mask |= 1 << i; 4199b8e80941Smrg else 4200b8e80941Smrg istate->compressed_depthtex_mask &= ~(1 << i); 4201b8e80941Smrg 4202b8e80941Smrg if (!is_buffer && rtex->cmask.size) 4203b8e80941Smrg istate->compressed_colortex_mask |= 1 << i; 4204b8e80941Smrg else 4205b8e80941Smrg istate->compressed_colortex_mask &= ~(1 << i); 4206b8e80941Smrg if (!is_buffer) { 4207b8e80941Smrg 4208b8e80941Smrg evergreen_set_color_surface_common(rctx, rtex, 4209b8e80941Smrg iview->u.tex.level, 4210b8e80941Smrg iview->u.tex.first_layer, 4211b8e80941Smrg iview->u.tex.last_layer, 4212b8e80941Smrg iview->format, 4213b8e80941Smrg &color); 4214b8e80941Smrg color.dim = S_028C78_WIDTH_MAX(u_minify(image->width0, iview->u.tex.level) - 1) | 4215b8e80941Smrg S_028C78_HEIGHT_MAX(u_minify(image->height0, iview->u.tex.level) - 1); 4216b8e80941Smrg } else { 4217b8e80941Smrg color.offset = 0; 4218b8e80941Smrg color.view = 0; 4219b8e80941Smrg evergreen_set_color_surface_buffer(rctx, resource, 4220b8e80941Smrg iview->format, 4221b8e80941Smrg iview->u.buf.offset, 4222b8e80941Smrg iview->u.buf.size, 4223b8e80941Smrg &color); 4224b8e80941Smrg } 4225b8e80941Smrg 4226b8e80941Smrg switch (image->target) { 4227b8e80941Smrg case PIPE_BUFFER: 4228b8e80941Smrg res_type = V_028C70_BUFFER; 4229b8e80941Smrg break; 4230b8e80941Smrg case PIPE_TEXTURE_1D: 4231b8e80941Smrg res_type = V_028C70_TEXTURE1D; 4232b8e80941Smrg break; 4233b8e80941Smrg case PIPE_TEXTURE_1D_ARRAY: 4234b8e80941Smrg res_type = V_028C70_TEXTURE1DARRAY; 4235b8e80941Smrg break; 4236b8e80941Smrg case PIPE_TEXTURE_2D: 4237b8e80941Smrg case PIPE_TEXTURE_RECT: 4238b8e80941Smrg res_type = V_028C70_TEXTURE2D; 4239b8e80941Smrg break; 4240b8e80941Smrg case PIPE_TEXTURE_3D: 4241b8e80941Smrg res_type = V_028C70_TEXTURE3D; 4242b8e80941Smrg break; 4243b8e80941Smrg case PIPE_TEXTURE_2D_ARRAY: 4244b8e80941Smrg case PIPE_TEXTURE_CUBE: 4245b8e80941Smrg case PIPE_TEXTURE_CUBE_ARRAY: 4246b8e80941Smrg res_type = V_028C70_TEXTURE2DARRAY; 4247b8e80941Smrg break; 4248b8e80941Smrg default: 4249b8e80941Smrg assert(0); 4250b8e80941Smrg res_type = 0; 4251b8e80941Smrg break; 4252b8e80941Smrg } 4253b8e80941Smrg 4254b8e80941Smrg rview->cb_color_base = color.offset; 4255b8e80941Smrg rview->cb_color_dim = color.dim; 4256b8e80941Smrg rview->cb_color_info = color.info | 4257b8e80941Smrg S_028C70_RAT(1) | 4258b8e80941Smrg S_028C70_RESOURCE_TYPE(res_type); 4259b8e80941Smrg rview->cb_color_pitch = color.pitch; 4260b8e80941Smrg rview->cb_color_slice = color.slice; 4261b8e80941Smrg rview->cb_color_view = color.view; 4262b8e80941Smrg rview->cb_color_attrib = color.attrib; 4263b8e80941Smrg rview->cb_color_fmask = color.fmask; 4264b8e80941Smrg rview->cb_color_fmask_slice = color.fmask_slice; 4265b8e80941Smrg 4266b8e80941Smrg if (image->target != PIPE_BUFFER) { 4267b8e80941Smrg memset(&tex_params, 0, sizeof(tex_params)); 4268b8e80941Smrg tex_params.pipe_format = iview->format; 4269b8e80941Smrg tex_params.force_level = 0; 4270b8e80941Smrg tex_params.width0 = image->width0; 4271b8e80941Smrg tex_params.height0 = image->height0; 4272b8e80941Smrg tex_params.first_level = iview->u.tex.level; 4273b8e80941Smrg tex_params.last_level = iview->u.tex.level; 4274b8e80941Smrg tex_params.first_layer = iview->u.tex.first_layer; 4275b8e80941Smrg tex_params.last_layer = iview->u.tex.last_layer; 4276b8e80941Smrg tex_params.target = image->target; 4277b8e80941Smrg tex_params.swizzle[0] = PIPE_SWIZZLE_X; 4278b8e80941Smrg tex_params.swizzle[1] = PIPE_SWIZZLE_Y; 4279b8e80941Smrg tex_params.swizzle[2] = PIPE_SWIZZLE_Z; 4280b8e80941Smrg tex_params.swizzle[3] = PIPE_SWIZZLE_W; 4281b8e80941Smrg evergreen_fill_tex_resource_words(rctx, &resource->b.b, &tex_params, 4282b8e80941Smrg &rview->skip_mip_address_reloc, 4283b8e80941Smrg rview->resource_words); 4284b8e80941Smrg 4285b8e80941Smrg } else { 4286b8e80941Smrg memset(&buf_params, 0, sizeof(buf_params)); 4287b8e80941Smrg buf_params.pipe_format = iview->format; 4288b8e80941Smrg buf_params.size = iview->u.buf.size; 4289b8e80941Smrg buf_params.offset = iview->u.buf.offset; 4290b8e80941Smrg buf_params.swizzle[0] = PIPE_SWIZZLE_X; 4291b8e80941Smrg buf_params.swizzle[1] = PIPE_SWIZZLE_Y; 4292b8e80941Smrg buf_params.swizzle[2] = PIPE_SWIZZLE_Z; 4293b8e80941Smrg buf_params.swizzle[3] = PIPE_SWIZZLE_W; 4294b8e80941Smrg evergreen_fill_buffer_resource_words(rctx, &resource->b.b, 4295b8e80941Smrg &buf_params, 4296b8e80941Smrg &rview->skip_mip_address_reloc, 4297b8e80941Smrg rview->resource_words); 4298b8e80941Smrg } 4299b8e80941Smrg istate->enabled_mask |= (1 << i); 4300b8e80941Smrg } 4301b8e80941Smrg 4302b8e80941Smrg istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; 4303b8e80941Smrg istate->dirty_buffer_constants = TRUE; 4304b8e80941Smrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; 4305b8e80941Smrg rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | 4306b8e80941Smrg R600_CONTEXT_FLUSH_AND_INV_CB_META; 4307b8e80941Smrg 4308b8e80941Smrg if (old_mask != istate->enabled_mask) 4309b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 4310b8e80941Smrg 4311b8e80941Smrg if (rctx->cb_misc_state.image_rat_enabled_mask != istate->enabled_mask) { 4312b8e80941Smrg rctx->cb_misc_state.image_rat_enabled_mask = istate->enabled_mask; 4313b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 4314b8e80941Smrg } 4315b8e80941Smrg 4316b8e80941Smrg if (shader == PIPE_SHADER_FRAGMENT) 4317b8e80941Smrg r600_mark_atom_dirty(rctx, &istate->atom); 4318b8e80941Smrg} 4319b8e80941Smrg 4320b8e80941Smrgstatic void evergreen_get_pipe_constant_buffer(struct r600_context *rctx, 4321b8e80941Smrg enum pipe_shader_type shader, uint slot, 4322b8e80941Smrg struct pipe_constant_buffer *cbuf) 4323b8e80941Smrg{ 4324b8e80941Smrg struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; 4325b8e80941Smrg struct pipe_constant_buffer *cb; 4326b8e80941Smrg cbuf->user_buffer = NULL; 4327b8e80941Smrg 4328b8e80941Smrg cb = &state->cb[slot]; 4329b8e80941Smrg 4330b8e80941Smrg cbuf->buffer_size = cb->buffer_size; 4331b8e80941Smrg pipe_resource_reference(&cbuf->buffer, cb->buffer); 4332b8e80941Smrg} 4333b8e80941Smrg 4334b8e80941Smrgstatic void evergreen_get_shader_buffers(struct r600_context *rctx, 4335b8e80941Smrg enum pipe_shader_type shader, 4336b8e80941Smrg uint start_slot, uint count, 4337b8e80941Smrg struct pipe_shader_buffer *sbuf) 4338b8e80941Smrg{ 4339b8e80941Smrg assert(shader == PIPE_SHADER_COMPUTE); 4340b8e80941Smrg int idx, i; 4341b8e80941Smrg struct r600_image_state *istate = &rctx->compute_buffers; 4342b8e80941Smrg struct r600_image_view *rview; 4343b8e80941Smrg 4344b8e80941Smrg for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { 4345b8e80941Smrg 4346b8e80941Smrg rview = &istate->views[i]; 4347b8e80941Smrg 4348b8e80941Smrg pipe_resource_reference(&sbuf[idx].buffer, rview->base.resource); 4349b8e80941Smrg if (rview->base.resource) { 4350b8e80941Smrg uint64_t rview_va = ((struct r600_resource *)rview->base.resource)->gpu_address; 4351b8e80941Smrg 4352b8e80941Smrg uint64_t prog_va = rview->resource_words[0]; 4353b8e80941Smrg 4354b8e80941Smrg prog_va += ((uint64_t)G_030008_BASE_ADDRESS_HI(rview->resource_words[2])) << 32; 4355b8e80941Smrg prog_va -= rview_va; 4356b8e80941Smrg 4357b8e80941Smrg sbuf[idx].buffer_offset = prog_va & 0xffffffff; 4358b8e80941Smrg sbuf[idx].buffer_size = rview->resource_words[1] + 1;; 4359b8e80941Smrg } else { 4360b8e80941Smrg sbuf[idx].buffer_offset = 0; 4361b8e80941Smrg sbuf[idx].buffer_size = 0; 4362b8e80941Smrg } 4363b8e80941Smrg } 4364b8e80941Smrg} 4365b8e80941Smrg 4366b8e80941Smrgstatic void evergreen_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 4367b8e80941Smrg{ 4368b8e80941Smrg struct r600_context *rctx = (struct r600_context *)ctx; 4369b8e80941Smrg st->saved_compute = rctx->cs_shader_state.shader; 4370b8e80941Smrg 4371b8e80941Smrg /* save constant buffer 0 */ 4372b8e80941Smrg evergreen_get_pipe_constant_buffer(rctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 4373b8e80941Smrg /* save ssbo 0 */ 4374b8e80941Smrg evergreen_get_shader_buffers(rctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 4375b8e80941Smrg} 4376b8e80941Smrg 4377b8e80941Smrg 4378848b8605Smrgvoid evergreen_init_state_functions(struct r600_context *rctx) 4379848b8605Smrg{ 4380b8e80941Smrg unsigned id = 1; 4381b8e80941Smrg unsigned i; 4382848b8605Smrg /* !!! 4383b8e80941Smrg * To avoid GPU lockup registers must be emitted in a specific order 4384848b8605Smrg * (no kidding ...). The order below is important and have been 4385b8e80941Smrg * partially inferred from analyzing fglrx command stream. 4386848b8605Smrg * 4387848b8605Smrg * Don't reorder atom without carefully checking the effect (GPU lockup 4388848b8605Smrg * or piglit regression). 4389848b8605Smrg * !!! 4390848b8605Smrg */ 4391b8e80941Smrg if (rctx->b.chip_class == EVERGREEN) { 4392b8e80941Smrg r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11); 4393b8e80941Smrg rctx->config_state.dyn_gpr_enabled = true; 4394b8e80941Smrg } 4395848b8605Smrg r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); 4396b8e80941Smrg r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0); 4397b8e80941Smrg r600_init_atom(rctx, &rctx->compute_images.atom, id++, evergreen_emit_compute_image_state, 0); 4398b8e80941Smrg r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0); 4399b8e80941Smrg r600_init_atom(rctx, &rctx->compute_buffers.atom, id++, evergreen_emit_compute_buffer_state, 0); 4400848b8605Smrg /* shader const */ 4401848b8605Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0); 4402848b8605Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0); 4403848b8605Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, evergreen_emit_ps_constant_buffers, 0); 4404b8e80941Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL].atom, id++, evergreen_emit_tcs_constant_buffers, 0); 4405b8e80941Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL].atom, id++, evergreen_emit_tes_constant_buffers, 0); 4406848b8605Smrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom, id++, evergreen_emit_cs_constant_buffers, 0); 4407848b8605Smrg /* shader program */ 4408848b8605Smrg r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0); 4409848b8605Smrg /* sampler */ 4410848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0); 4411848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0); 4412b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].states.atom, id++, evergreen_emit_tcs_sampler_states, 0); 4413b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].states.atom, id++, evergreen_emit_tes_sampler_states, 0); 4414848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0); 4415b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0); 4416848b8605Smrg /* resources */ 4417848b8605Smrg r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0); 4418848b8605Smrg r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0); 4419848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0); 4420848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0); 4421b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views.atom, id++, evergreen_emit_tcs_sampler_views, 0); 4422b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views.atom, id++, evergreen_emit_tes_sampler_views, 0); 4423848b8605Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0); 4424b8e80941Smrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0); 4425848b8605Smrg 4426b8e80941Smrg r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); 4427848b8605Smrg 4428848b8605Smrg if (rctx->b.chip_class == EVERGREEN) { 4429848b8605Smrg r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3); 4430848b8605Smrg } else { 4431848b8605Smrg r600_init_atom(rctx, &rctx->sample_mask.atom, id++, cayman_emit_sample_mask, 4); 4432848b8605Smrg } 4433848b8605Smrg rctx->sample_mask.sample_mask = ~0; 4434848b8605Smrg 4435848b8605Smrg r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6); 4436848b8605Smrg r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6); 4437848b8605Smrg r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0); 4438848b8605Smrg r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4); 4439b8e80941Smrg r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 9); 4440848b8605Smrg r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26); 4441848b8605Smrg r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10); 4442848b8605Smrg r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14); 4443848b8605Smrg r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0); 4444b8e80941Smrg r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 9); 4445848b8605Smrg r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0); 4446b8e80941Smrg r600_add_atom(rctx, &rctx->b.scissors.atom, id++); 4447b8e80941Smrg r600_add_atom(rctx, &rctx->b.viewports.atom, id++); 4448848b8605Smrg r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); 4449848b8605Smrg r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); 4450b8e80941Smrg r600_add_atom(rctx, &rctx->b.render_cond_atom, id++); 4451b8e80941Smrg r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); 4452b8e80941Smrg r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); 4453b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) 4454b8e80941Smrg r600_init_atom(rctx, &rctx->hw_shader_stages[i].atom, id++, r600_emit_shader, 0); 4455b8e80941Smrg r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 15); 4456848b8605Smrg r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26); 4457848b8605Smrg 4458848b8605Smrg rctx->b.b.create_blend_state = evergreen_create_blend_state; 4459848b8605Smrg rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state; 4460848b8605Smrg rctx->b.b.create_rasterizer_state = evergreen_create_rs_state; 4461848b8605Smrg rctx->b.b.create_sampler_state = evergreen_create_sampler_state; 4462848b8605Smrg rctx->b.b.create_sampler_view = evergreen_create_sampler_view; 4463848b8605Smrg rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state; 4464848b8605Smrg rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple; 4465b8e80941Smrg rctx->b.b.set_min_samples = evergreen_set_min_samples; 4466b8e80941Smrg rctx->b.b.set_tess_state = evergreen_set_tess_state; 4467b8e80941Smrg rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers; 4468b8e80941Smrg rctx->b.b.set_shader_images = evergreen_set_shader_images; 4469b8e80941Smrg rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers; 4470848b8605Smrg if (rctx->b.chip_class == EVERGREEN) 4471848b8605Smrg rctx->b.b.get_sample_position = evergreen_get_sample_position; 4472848b8605Smrg else 4473848b8605Smrg rctx->b.b.get_sample_position = cayman_get_sample_position; 4474848b8605Smrg rctx->b.dma_copy = evergreen_dma_copy; 4475b8e80941Smrg rctx->b.save_qbo_state = evergreen_save_qbo_state; 4476848b8605Smrg 4477848b8605Smrg evergreen_init_compute_state_functions(rctx); 4478848b8605Smrg} 4479b8e80941Smrg 4480b8e80941Smrg/** 4481b8e80941Smrg * This calculates the LDS size for tessellation shaders (VS, TCS, TES). 4482b8e80941Smrg * 4483b8e80941Smrg * The information about LDS and other non-compile-time parameters is then 4484b8e80941Smrg * written to the const buffer. 4485b8e80941Smrg 4486b8e80941Smrg * const buffer contains - 4487b8e80941Smrg * uint32_t input_patch_size 4488b8e80941Smrg * uint32_t input_vertex_size 4489b8e80941Smrg * uint32_t num_tcs_input_cp 4490b8e80941Smrg * uint32_t num_tcs_output_cp; 4491b8e80941Smrg * uint32_t output_patch_size 4492b8e80941Smrg * uint32_t output_vertex_size 4493b8e80941Smrg * uint32_t output_patch0_offset 4494b8e80941Smrg * uint32_t perpatch_output_offset 4495b8e80941Smrg * and the same constbuf is bound to LS/HS/VS(ES). 4496b8e80941Smrg */ 4497b8e80941Smrgvoid evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches) 4498b8e80941Smrg{ 4499b8e80941Smrg struct pipe_constant_buffer constbuf = {0}; 4500b8e80941Smrg struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader; 4501b8e80941Smrg struct r600_pipe_shader_selector *ls = rctx->vs_shader; 4502b8e80941Smrg unsigned num_tcs_input_cp = info->vertices_per_patch; 4503b8e80941Smrg unsigned num_tcs_outputs; 4504b8e80941Smrg unsigned num_tcs_output_cp; 4505b8e80941Smrg unsigned num_tcs_patch_outputs; 4506b8e80941Smrg unsigned num_tcs_inputs; 4507b8e80941Smrg unsigned input_vertex_size, output_vertex_size; 4508b8e80941Smrg unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; 4509b8e80941Smrg unsigned output_patch0_offset, perpatch_output_offset, lds_size; 4510b8e80941Smrg uint32_t values[8]; 4511b8e80941Smrg unsigned num_waves; 4512b8e80941Smrg unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; 4513b8e80941Smrg unsigned wave_divisor = (16 * num_pipes); 4514b8e80941Smrg 4515b8e80941Smrg *num_patches = 1; 4516b8e80941Smrg 4517b8e80941Smrg if (!rctx->tes_shader) { 4518b8e80941Smrg rctx->lds_alloc = 0; 4519b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 4520b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, NULL); 4521b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, 4522b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, NULL); 4523b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 4524b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, NULL); 4525b8e80941Smrg return; 4526b8e80941Smrg } 4527b8e80941Smrg 4528b8e80941Smrg if (rctx->lds_alloc != 0 && 4529b8e80941Smrg rctx->last_ls == ls && 4530b8e80941Smrg rctx->last_num_tcs_input_cp == num_tcs_input_cp && 4531b8e80941Smrg rctx->last_tcs == tcs) 4532b8e80941Smrg return; 4533b8e80941Smrg 4534b8e80941Smrg num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask); 4535b8e80941Smrg 4536b8e80941Smrg if (rctx->tcs_shader) { 4537b8e80941Smrg num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask); 4538b8e80941Smrg num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; 4539b8e80941Smrg num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask); 4540b8e80941Smrg } else { 4541b8e80941Smrg num_tcs_outputs = num_tcs_inputs; 4542b8e80941Smrg num_tcs_output_cp = num_tcs_input_cp; 4543b8e80941Smrg num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ 4544b8e80941Smrg } 4545b8e80941Smrg 4546b8e80941Smrg /* size in bytes */ 4547b8e80941Smrg input_vertex_size = num_tcs_inputs * 16; 4548b8e80941Smrg output_vertex_size = num_tcs_outputs * 16; 4549b8e80941Smrg 4550b8e80941Smrg input_patch_size = num_tcs_input_cp * input_vertex_size; 4551b8e80941Smrg 4552b8e80941Smrg pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; 4553b8e80941Smrg output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; 4554b8e80941Smrg 4555b8e80941Smrg output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0; 4556b8e80941Smrg perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; 4557b8e80941Smrg 4558b8e80941Smrg lds_size = output_patch0_offset + output_patch_size * *num_patches; 4559b8e80941Smrg 4560b8e80941Smrg values[0] = input_patch_size; 4561b8e80941Smrg values[1] = input_vertex_size; 4562b8e80941Smrg values[2] = num_tcs_input_cp; 4563b8e80941Smrg values[3] = num_tcs_output_cp; 4564b8e80941Smrg 4565b8e80941Smrg values[4] = output_patch_size; 4566b8e80941Smrg values[5] = output_vertex_size; 4567b8e80941Smrg values[6] = output_patch0_offset; 4568b8e80941Smrg values[7] = perpatch_output_offset; 4569b8e80941Smrg 4570b8e80941Smrg /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES * 4571b8e80941Smrg LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */ 4572b8e80941Smrg num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor); 4573b8e80941Smrg 4574b8e80941Smrg rctx->lds_alloc = (lds_size | (num_waves << 14)); 4575b8e80941Smrg 4576b8e80941Smrg rctx->last_ls = ls; 4577b8e80941Smrg rctx->last_tcs = tcs; 4578b8e80941Smrg rctx->last_num_tcs_input_cp = num_tcs_input_cp; 4579b8e80941Smrg 4580b8e80941Smrg constbuf.user_buffer = values; 4581b8e80941Smrg constbuf.buffer_size = 8 * 4; 4582b8e80941Smrg 4583b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, 4584b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, &constbuf); 4585b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, 4586b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, &constbuf); 4587b8e80941Smrg rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, 4588b8e80941Smrg R600_LDS_INFO_CONST_BUFFER, &constbuf); 4589b8e80941Smrg pipe_resource_reference(&constbuf.buffer, NULL); 4590b8e80941Smrg} 4591b8e80941Smrg 4592b8e80941Smrguint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, 4593b8e80941Smrg const struct pipe_draw_info *info, 4594b8e80941Smrg unsigned num_patches) 4595b8e80941Smrg{ 4596b8e80941Smrg unsigned num_output_cp; 4597b8e80941Smrg 4598b8e80941Smrg if (!rctx->tes_shader) 4599b8e80941Smrg return 0; 4600b8e80941Smrg 4601b8e80941Smrg num_output_cp = rctx->tcs_shader ? 4602b8e80941Smrg rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 4603b8e80941Smrg info->vertices_per_patch; 4604b8e80941Smrg 4605b8e80941Smrg return S_028B58_NUM_PATCHES(num_patches) | 4606b8e80941Smrg S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) | 4607b8e80941Smrg S_028B58_HS_NUM_OUTPUT_CP(num_output_cp); 4608b8e80941Smrg} 4609b8e80941Smrg 4610b8e80941Smrgvoid evergreen_set_ls_hs_config(struct r600_context *rctx, 4611b8e80941Smrg struct radeon_cmdbuf *cs, 4612b8e80941Smrg uint32_t ls_hs_config) 4613b8e80941Smrg{ 4614b8e80941Smrg radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); 4615b8e80941Smrg} 4616b8e80941Smrg 4617b8e80941Smrgvoid evergreen_set_lds_alloc(struct r600_context *rctx, 4618b8e80941Smrg struct radeon_cmdbuf *cs, 4619b8e80941Smrg uint32_t lds_alloc) 4620b8e80941Smrg{ 4621b8e80941Smrg radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc); 4622b8e80941Smrg} 4623b8e80941Smrg 4624b8e80941Smrg/* on evergreen if you are running tessellation you need to disable dynamic 4625b8e80941Smrg GPRs to workaround a hardware bug.*/ 4626b8e80941Smrgbool evergreen_adjust_gprs(struct r600_context *rctx) 4627b8e80941Smrg{ 4628b8e80941Smrg unsigned num_gprs[EG_NUM_HW_STAGES]; 4629b8e80941Smrg unsigned def_gprs[EG_NUM_HW_STAGES]; 4630b8e80941Smrg unsigned cur_gprs[EG_NUM_HW_STAGES]; 4631b8e80941Smrg unsigned new_gprs[EG_NUM_HW_STAGES]; 4632b8e80941Smrg unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; 4633b8e80941Smrg unsigned max_gprs; 4634b8e80941Smrg unsigned i; 4635b8e80941Smrg unsigned total_gprs; 4636b8e80941Smrg unsigned tmp[3]; 4637b8e80941Smrg bool rework = false, set_default = false, set_dirty = false; 4638b8e80941Smrg max_gprs = 0; 4639b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4640b8e80941Smrg def_gprs[i] = rctx->default_gprs[i]; 4641b8e80941Smrg max_gprs += def_gprs[i]; 4642b8e80941Smrg } 4643b8e80941Smrg max_gprs += def_num_clause_temp_gprs * 2; 4644b8e80941Smrg 4645b8e80941Smrg /* if we have no TESS and dyn gpr is enabled then do nothing. */ 4646b8e80941Smrg if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader) { 4647b8e80941Smrg if (rctx->config_state.dyn_gpr_enabled) 4648b8e80941Smrg return true; 4649b8e80941Smrg 4650b8e80941Smrg /* transition back to dyn gpr enabled state */ 4651b8e80941Smrg rctx->config_state.dyn_gpr_enabled = true; 4652b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->config_state.atom); 4653b8e80941Smrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 4654b8e80941Smrg return true; 4655b8e80941Smrg } 4656b8e80941Smrg 4657b8e80941Smrg 4658b8e80941Smrg /* gather required shader gprs */ 4659b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4660b8e80941Smrg if (rctx->hw_shader_stages[i].shader) 4661b8e80941Smrg num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr; 4662b8e80941Smrg else 4663b8e80941Smrg num_gprs[i] = 0; 4664b8e80941Smrg } 4665b8e80941Smrg 4666b8e80941Smrg cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 4667b8e80941Smrg cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 4668b8e80941Smrg cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 4669b8e80941Smrg cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 4670b8e80941Smrg cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); 4671b8e80941Smrg cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); 4672b8e80941Smrg 4673b8e80941Smrg total_gprs = 0; 4674b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4675b8e80941Smrg new_gprs[i] = num_gprs[i]; 4676b8e80941Smrg total_gprs += num_gprs[i]; 4677b8e80941Smrg } 4678b8e80941Smrg 4679b8e80941Smrg if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs))) 4680b8e80941Smrg return false; 4681b8e80941Smrg 4682b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4683b8e80941Smrg if (new_gprs[i] > cur_gprs[i]) { 4684b8e80941Smrg rework = true; 4685b8e80941Smrg break; 4686b8e80941Smrg } 4687b8e80941Smrg } 4688b8e80941Smrg 4689b8e80941Smrg if (rctx->config_state.dyn_gpr_enabled) { 4690b8e80941Smrg set_dirty = true; 4691b8e80941Smrg rctx->config_state.dyn_gpr_enabled = false; 4692b8e80941Smrg } 4693b8e80941Smrg 4694b8e80941Smrg if (rework) { 4695b8e80941Smrg set_default = true; 4696b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4697b8e80941Smrg if (new_gprs[i] > def_gprs[i]) 4698b8e80941Smrg set_default = false; 4699b8e80941Smrg } 4700b8e80941Smrg 4701b8e80941Smrg if (set_default) { 4702b8e80941Smrg for (i = 0; i < EG_NUM_HW_STAGES; i++) { 4703b8e80941Smrg new_gprs[i] = def_gprs[i]; 4704b8e80941Smrg } 4705b8e80941Smrg } else { 4706b8e80941Smrg unsigned ps_value = max_gprs; 4707b8e80941Smrg 4708b8e80941Smrg ps_value -= (def_num_clause_temp_gprs * 2); 4709b8e80941Smrg for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++) 4710b8e80941Smrg ps_value -= new_gprs[i]; 4711b8e80941Smrg 4712b8e80941Smrg new_gprs[R600_HW_STAGE_PS] = ps_value; 4713b8e80941Smrg } 4714b8e80941Smrg 4715b8e80941Smrg tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | 4716b8e80941Smrg S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | 4717b8e80941Smrg S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); 4718b8e80941Smrg 4719b8e80941Smrg tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | 4720b8e80941Smrg S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); 4721b8e80941Smrg 4722b8e80941Smrg tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) | 4723b8e80941Smrg S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]); 4724b8e80941Smrg 4725b8e80941Smrg if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] || 4726b8e80941Smrg rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] || 4727b8e80941Smrg rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) { 4728b8e80941Smrg rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0]; 4729b8e80941Smrg rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1]; 4730b8e80941Smrg rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2]; 4731b8e80941Smrg set_dirty = true; 4732b8e80941Smrg } 4733b8e80941Smrg } 4734b8e80941Smrg 4735b8e80941Smrg 4736b8e80941Smrg if (set_dirty) { 4737b8e80941Smrg r600_mark_atom_dirty(rctx, &rctx->config_state.atom); 4738b8e80941Smrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 4739b8e80941Smrg } 4740b8e80941Smrg return true; 4741b8e80941Smrg} 4742b8e80941Smrg 4743b8e80941Smrg#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) 4744b8e80941Smrg 4745b8e80941Smrgvoid eg_trace_emit(struct r600_context *rctx) 4746b8e80941Smrg{ 4747b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4748b8e80941Smrg unsigned reloc; 4749b8e80941Smrg 4750b8e80941Smrg if (rctx->b.chip_class < EVERGREEN) 4751b8e80941Smrg return; 4752b8e80941Smrg 4753b8e80941Smrg /* This must be done after r600_need_cs_space. */ 4754b8e80941Smrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4755b8e80941Smrg (struct r600_resource*)rctx->trace_buf, RADEON_USAGE_WRITE, 4756b8e80941Smrg RADEON_PRIO_CP_DMA); 4757b8e80941Smrg 4758b8e80941Smrg rctx->trace_id++; 4759b8e80941Smrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rctx->trace_buf, 4760b8e80941Smrg RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); 4761b8e80941Smrg radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); 4762b8e80941Smrg radeon_emit(cs, rctx->trace_buf->gpu_address); 4763b8e80941Smrg radeon_emit(cs, rctx->trace_buf->gpu_address >> 32 | MEM_WRITE_32_BITS | MEM_WRITE_CONFIRM); 4764b8e80941Smrg radeon_emit(cs, rctx->trace_id); 4765b8e80941Smrg radeon_emit(cs, 0); 4766b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4767b8e80941Smrg radeon_emit(cs, reloc); 4768b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4769b8e80941Smrg radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id)); 4770b8e80941Smrg} 4771b8e80941Smrg 4772b8e80941Smrgstatic void evergreen_emit_set_append_cnt(struct r600_context *rctx, 4773b8e80941Smrg struct r600_shader_atomic *atomic, 4774b8e80941Smrg struct r600_resource *resource, 4775b8e80941Smrg uint32_t pkt_flags) 4776b8e80941Smrg{ 4777b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4778b8e80941Smrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4779b8e80941Smrg resource, 4780b8e80941Smrg RADEON_USAGE_READ, 4781b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 4782b8e80941Smrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 4783b8e80941Smrg uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; 4784b8e80941Smrg 4785b8e80941Smrg uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; 4786b8e80941Smrg 4787b8e80941Smrg radeon_emit(cs, PKT3(PKT3_SET_APPEND_CNT, 2, 0) | pkt_flags); 4788b8e80941Smrg radeon_emit(cs, (reg_val << 16) | 0x3); 4789b8e80941Smrg radeon_emit(cs, dst_offset & 0xfffffffc); 4790b8e80941Smrg radeon_emit(cs, (dst_offset >> 32) & 0xff); 4791b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4792b8e80941Smrg radeon_emit(cs, reloc); 4793b8e80941Smrg} 4794b8e80941Smrg 4795b8e80941Smrgstatic void evergreen_emit_event_write_eos(struct r600_context *rctx, 4796b8e80941Smrg struct r600_shader_atomic *atomic, 4797b8e80941Smrg struct r600_resource *resource, 4798b8e80941Smrg uint32_t pkt_flags) 4799b8e80941Smrg{ 4800b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4801b8e80941Smrg uint32_t event = EVENT_TYPE_PS_DONE; 4802b8e80941Smrg uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; 4803b8e80941Smrg uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4804b8e80941Smrg resource, 4805b8e80941Smrg RADEON_USAGE_WRITE, 4806b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 4807b8e80941Smrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 4808b8e80941Smrg uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2; 4809b8e80941Smrg 4810b8e80941Smrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 4811b8e80941Smrg event = EVENT_TYPE_CS_DONE; 4812b8e80941Smrg 4813b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 4814b8e80941Smrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 4815b8e80941Smrg radeon_emit(cs, (dst_offset) & 0xffffffff); 4816b8e80941Smrg radeon_emit(cs, (0 << 29) | ((dst_offset >> 32) & 0xff)); 4817b8e80941Smrg radeon_emit(cs, reg_val); 4818b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4819b8e80941Smrg radeon_emit(cs, reloc); 4820b8e80941Smrg} 4821b8e80941Smrg 4822b8e80941Smrgstatic void cayman_emit_event_write_eos(struct r600_context *rctx, 4823b8e80941Smrg struct r600_shader_atomic *atomic, 4824b8e80941Smrg struct r600_resource *resource, 4825b8e80941Smrg uint32_t pkt_flags) 4826b8e80941Smrg{ 4827b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4828b8e80941Smrg uint32_t event = EVENT_TYPE_PS_DONE; 4829b8e80941Smrg uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4830b8e80941Smrg resource, 4831b8e80941Smrg RADEON_USAGE_WRITE, 4832b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 4833b8e80941Smrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 4834b8e80941Smrg 4835b8e80941Smrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 4836b8e80941Smrg event = EVENT_TYPE_CS_DONE; 4837b8e80941Smrg 4838b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 4839b8e80941Smrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 4840b8e80941Smrg radeon_emit(cs, (dst_offset) & 0xffffffff); 4841b8e80941Smrg radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff)); 4842b8e80941Smrg radeon_emit(cs, (atomic->hw_idx) | (1 << 16)); 4843b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4844b8e80941Smrg radeon_emit(cs, reloc); 4845b8e80941Smrg} 4846b8e80941Smrg 4847b8e80941Smrg/* writes count from a buffer into GDS */ 4848b8e80941Smrgstatic void cayman_write_count_to_gds(struct r600_context *rctx, 4849b8e80941Smrg struct r600_shader_atomic *atomic, 4850b8e80941Smrg struct r600_resource *resource, 4851b8e80941Smrg uint32_t pkt_flags) 4852b8e80941Smrg{ 4853b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4854b8e80941Smrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4855b8e80941Smrg resource, 4856b8e80941Smrg RADEON_USAGE_READ, 4857b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 4858b8e80941Smrg uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); 4859b8e80941Smrg 4860b8e80941Smrg radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags); 4861b8e80941Smrg radeon_emit(cs, dst_offset & 0xffffffff); 4862b8e80941Smrg radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset >> 32) & 0xff));// GDS 4863b8e80941Smrg radeon_emit(cs, atomic->hw_idx * 4); 4864b8e80941Smrg radeon_emit(cs, 0); 4865b8e80941Smrg radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4); 4866b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4867b8e80941Smrg radeon_emit(cs, reloc); 4868b8e80941Smrg} 4869b8e80941Smrg 4870b8e80941Smrgvoid evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, 4871b8e80941Smrg struct r600_pipe_shader *cs_shader, 4872b8e80941Smrg struct r600_shader_atomic *combined_atomics, 4873b8e80941Smrg uint8_t *atomic_used_mask_p) 4874b8e80941Smrg{ 4875b8e80941Smrg uint8_t atomic_used_mask = 0; 4876b8e80941Smrg int i, j, k; 4877b8e80941Smrg bool is_compute = cs_shader ? true : false; 4878b8e80941Smrg 4879b8e80941Smrg for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { 4880b8e80941Smrg uint8_t num_atomic_stage; 4881b8e80941Smrg struct r600_pipe_shader *pshader; 4882b8e80941Smrg 4883b8e80941Smrg if (is_compute) 4884b8e80941Smrg pshader = cs_shader; 4885b8e80941Smrg else 4886b8e80941Smrg pshader = rctx->hw_shader_stages[i].shader; 4887b8e80941Smrg if (!pshader) 4888b8e80941Smrg continue; 4889b8e80941Smrg 4890b8e80941Smrg num_atomic_stage = pshader->shader.nhwatomic_ranges; 4891b8e80941Smrg if (!num_atomic_stage) 4892b8e80941Smrg continue; 4893b8e80941Smrg 4894b8e80941Smrg for (j = 0; j < num_atomic_stage; j++) { 4895b8e80941Smrg struct r600_shader_atomic *atomic = &pshader->shader.atomics[j]; 4896b8e80941Smrg int natomics = atomic->end - atomic->start + 1; 4897b8e80941Smrg 4898b8e80941Smrg for (k = 0; k < natomics; k++) { 4899b8e80941Smrg /* seen this in a previous stage */ 4900b8e80941Smrg if (atomic_used_mask & (1u << (atomic->hw_idx + k))) 4901b8e80941Smrg continue; 4902b8e80941Smrg 4903b8e80941Smrg combined_atomics[atomic->hw_idx + k].hw_idx = atomic->hw_idx + k; 4904b8e80941Smrg combined_atomics[atomic->hw_idx + k].buffer_id = atomic->buffer_id; 4905b8e80941Smrg combined_atomics[atomic->hw_idx + k].start = atomic->start + k; 4906b8e80941Smrg combined_atomics[atomic->hw_idx + k].end = combined_atomics[atomic->hw_idx + k].start + 1; 4907b8e80941Smrg atomic_used_mask |= (1u << (atomic->hw_idx + k)); 4908b8e80941Smrg } 4909b8e80941Smrg } 4910b8e80941Smrg } 4911b8e80941Smrg *atomic_used_mask_p = atomic_used_mask; 4912b8e80941Smrg} 4913b8e80941Smrg 4914b8e80941Smrgvoid evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, 4915b8e80941Smrg bool is_compute, 4916b8e80941Smrg struct r600_shader_atomic *combined_atomics, 4917b8e80941Smrg uint8_t atomic_used_mask) 4918b8e80941Smrg{ 4919b8e80941Smrg struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; 4920b8e80941Smrg unsigned pkt_flags = 0; 4921b8e80941Smrg uint32_t mask; 4922b8e80941Smrg 4923b8e80941Smrg if (is_compute) 4924b8e80941Smrg pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; 4925b8e80941Smrg 4926b8e80941Smrg mask = atomic_used_mask; 4927b8e80941Smrg if (!mask) 4928b8e80941Smrg return; 4929b8e80941Smrg 4930b8e80941Smrg while (mask) { 4931b8e80941Smrg unsigned atomic_index = u_bit_scan(&mask); 4932b8e80941Smrg struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; 4933b8e80941Smrg struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer); 4934b8e80941Smrg assert(resource); 4935b8e80941Smrg 4936b8e80941Smrg if (rctx->b.chip_class == CAYMAN) 4937b8e80941Smrg cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags); 4938b8e80941Smrg else 4939b8e80941Smrg evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); 4940b8e80941Smrg } 4941b8e80941Smrg} 4942b8e80941Smrg 4943b8e80941Smrgvoid evergreen_emit_atomic_buffer_save(struct r600_context *rctx, 4944b8e80941Smrg bool is_compute, 4945b8e80941Smrg struct r600_shader_atomic *combined_atomics, 4946b8e80941Smrg uint8_t *atomic_used_mask_p) 4947b8e80941Smrg{ 4948b8e80941Smrg struct radeon_cmdbuf *cs = rctx->b.gfx.cs; 4949b8e80941Smrg struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; 4950b8e80941Smrg uint32_t pkt_flags = 0; 4951b8e80941Smrg uint32_t event = EVENT_TYPE_PS_DONE; 4952b8e80941Smrg uint32_t mask; 4953b8e80941Smrg uint64_t dst_offset; 4954b8e80941Smrg unsigned reloc; 4955b8e80941Smrg 4956b8e80941Smrg if (is_compute) 4957b8e80941Smrg pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; 4958b8e80941Smrg 4959b8e80941Smrg mask = *atomic_used_mask_p; 4960b8e80941Smrg if (!mask) 4961b8e80941Smrg return; 4962b8e80941Smrg 4963b8e80941Smrg while (mask) { 4964b8e80941Smrg unsigned atomic_index = u_bit_scan(&mask); 4965b8e80941Smrg struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; 4966b8e80941Smrg struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer); 4967b8e80941Smrg assert(resource); 4968b8e80941Smrg 4969b8e80941Smrg if (rctx->b.chip_class == CAYMAN) 4970b8e80941Smrg cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags); 4971b8e80941Smrg else 4972b8e80941Smrg evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags); 4973b8e80941Smrg } 4974b8e80941Smrg 4975b8e80941Smrg if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) 4976b8e80941Smrg event = EVENT_TYPE_CS_DONE; 4977b8e80941Smrg 4978b8e80941Smrg ++rctx->append_fence_id; 4979b8e80941Smrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 4980b8e80941Smrg r600_resource(rctx->append_fence), 4981b8e80941Smrg RADEON_USAGE_READWRITE, 4982b8e80941Smrg RADEON_PRIO_SHADER_RW_BUFFER); 4983b8e80941Smrg dst_offset = r600_resource(rctx->append_fence)->gpu_address; 4984b8e80941Smrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); 4985b8e80941Smrg radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); 4986b8e80941Smrg radeon_emit(cs, dst_offset & 0xffffffff); 4987b8e80941Smrg radeon_emit(cs, (2 << 29) | ((dst_offset >> 32) & 0xff)); 4988b8e80941Smrg radeon_emit(cs, rctx->append_fence_id); 4989b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 4990b8e80941Smrg radeon_emit(cs, reloc); 4991b8e80941Smrg 4992b8e80941Smrg radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0) | pkt_flags); 4993b8e80941Smrg radeon_emit(cs, WAIT_REG_MEM_GEQUAL | WAIT_REG_MEM_MEMORY | (1 << 8)); 4994b8e80941Smrg radeon_emit(cs, dst_offset & 0xffffffff); 4995b8e80941Smrg radeon_emit(cs, ((dst_offset >> 32) & 0xff)); 4996b8e80941Smrg radeon_emit(cs, rctx->append_fence_id); 4997b8e80941Smrg radeon_emit(cs, 0xffffffff); 4998b8e80941Smrg radeon_emit(cs, 0xa); 4999b8e80941Smrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 5000b8e80941Smrg radeon_emit(cs, reloc); 5001b8e80941Smrg} 5002