13464ebd5Sriastradh/* 23464ebd5Sriastradh * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 33464ebd5Sriastradh * 43464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 53464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 63464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 73464ebd5Sriastradh * on the rights to use, copy, modify, merge, publish, distribute, sub 83464ebd5Sriastradh * license, and/or sell copies of the Software, and to permit persons to whom 93464ebd5Sriastradh * the Software is furnished to do so, subject to the following conditions: 103464ebd5Sriastradh * 113464ebd5Sriastradh * The above copyright notice and this permission notice (including the next 123464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the 133464ebd5Sriastradh * Software. 143464ebd5Sriastradh * 153464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 163464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 173464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 183464ebd5Sriastradh * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 193464ebd5Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 203464ebd5Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 213464ebd5Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE. 223464ebd5Sriastradh */ 23af69d88dSmrg#include "r600_formats.h" 243464ebd5Sriastradh#include "r600_shader.h" 25af69d88dSmrg#include "r600d.h" 26af69d88dSmrg 27af69d88dSmrg#include "pipe/p_shader_tokens.h" 28af69d88dSmrg#include "util/u_pack_color.h" 29af69d88dSmrg#include "util/u_memory.h" 30af69d88dSmrg#include "util/u_framebuffer.h" 31af69d88dSmrg#include "util/u_dual_blend.h" 32af69d88dSmrg 33af69d88dSmrgstatic uint32_t r600_translate_blend_function(int blend_func) 34af69d88dSmrg{ 35af69d88dSmrg switch (blend_func) { 36af69d88dSmrg case PIPE_BLEND_ADD: 37af69d88dSmrg return V_028804_COMB_DST_PLUS_SRC; 38af69d88dSmrg case PIPE_BLEND_SUBTRACT: 39af69d88dSmrg return V_028804_COMB_SRC_MINUS_DST; 40af69d88dSmrg case PIPE_BLEND_REVERSE_SUBTRACT: 41af69d88dSmrg return V_028804_COMB_DST_MINUS_SRC; 42af69d88dSmrg case PIPE_BLEND_MIN: 43af69d88dSmrg return V_028804_COMB_MIN_DST_SRC; 44af69d88dSmrg case PIPE_BLEND_MAX: 45af69d88dSmrg return V_028804_COMB_MAX_DST_SRC; 46af69d88dSmrg default: 47af69d88dSmrg R600_ERR("Unknown blend function %d\n", blend_func); 48af69d88dSmrg assert(0); 49af69d88dSmrg break; 50af69d88dSmrg } 51af69d88dSmrg return 0; 52af69d88dSmrg} 53af69d88dSmrg 54af69d88dSmrgstatic uint32_t r600_translate_blend_factor(int blend_fact) 55af69d88dSmrg{ 56af69d88dSmrg switch (blend_fact) { 57af69d88dSmrg case PIPE_BLENDFACTOR_ONE: 58af69d88dSmrg return V_028804_BLEND_ONE; 59af69d88dSmrg case PIPE_BLENDFACTOR_SRC_COLOR: 60af69d88dSmrg return V_028804_BLEND_SRC_COLOR; 61af69d88dSmrg case PIPE_BLENDFACTOR_SRC_ALPHA: 62af69d88dSmrg return V_028804_BLEND_SRC_ALPHA; 63af69d88dSmrg case PIPE_BLENDFACTOR_DST_ALPHA: 64af69d88dSmrg return V_028804_BLEND_DST_ALPHA; 65af69d88dSmrg case PIPE_BLENDFACTOR_DST_COLOR: 66af69d88dSmrg return V_028804_BLEND_DST_COLOR; 67af69d88dSmrg case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 68af69d88dSmrg return V_028804_BLEND_SRC_ALPHA_SATURATE; 69af69d88dSmrg case PIPE_BLENDFACTOR_CONST_COLOR: 70af69d88dSmrg return V_028804_BLEND_CONST_COLOR; 71af69d88dSmrg case PIPE_BLENDFACTOR_CONST_ALPHA: 72af69d88dSmrg return V_028804_BLEND_CONST_ALPHA; 73af69d88dSmrg case PIPE_BLENDFACTOR_ZERO: 74af69d88dSmrg return V_028804_BLEND_ZERO; 75af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC_COLOR: 76af69d88dSmrg return V_028804_BLEND_ONE_MINUS_SRC_COLOR; 77af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 78af69d88dSmrg return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; 79af69d88dSmrg case PIPE_BLENDFACTOR_INV_DST_ALPHA: 80af69d88dSmrg return V_028804_BLEND_ONE_MINUS_DST_ALPHA; 81af69d88dSmrg case PIPE_BLENDFACTOR_INV_DST_COLOR: 82af69d88dSmrg return V_028804_BLEND_ONE_MINUS_DST_COLOR; 83af69d88dSmrg case PIPE_BLENDFACTOR_INV_CONST_COLOR: 84af69d88dSmrg return V_028804_BLEND_ONE_MINUS_CONST_COLOR; 85af69d88dSmrg case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 86af69d88dSmrg return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; 87af69d88dSmrg case PIPE_BLENDFACTOR_SRC1_COLOR: 88af69d88dSmrg return V_028804_BLEND_SRC1_COLOR; 89af69d88dSmrg case PIPE_BLENDFACTOR_SRC1_ALPHA: 90af69d88dSmrg return V_028804_BLEND_SRC1_ALPHA; 91af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 92af69d88dSmrg return V_028804_BLEND_INV_SRC1_COLOR; 93af69d88dSmrg case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 94af69d88dSmrg return V_028804_BLEND_INV_SRC1_ALPHA; 95af69d88dSmrg default: 96af69d88dSmrg R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 97af69d88dSmrg assert(0); 98af69d88dSmrg break; 99af69d88dSmrg } 100af69d88dSmrg return 0; 101af69d88dSmrg} 1023464ebd5Sriastradh 103af69d88dSmrgstatic unsigned r600_tex_dim(unsigned dim, unsigned nr_samples) 1043464ebd5Sriastradh{ 105af69d88dSmrg switch (dim) { 106af69d88dSmrg default: 107af69d88dSmrg case PIPE_TEXTURE_1D: 108af69d88dSmrg return V_038000_SQ_TEX_DIM_1D; 109af69d88dSmrg case PIPE_TEXTURE_1D_ARRAY: 110af69d88dSmrg return V_038000_SQ_TEX_DIM_1D_ARRAY; 111af69d88dSmrg case PIPE_TEXTURE_2D: 112af69d88dSmrg case PIPE_TEXTURE_RECT: 113af69d88dSmrg return nr_samples > 1 ? V_038000_SQ_TEX_DIM_2D_MSAA : 114af69d88dSmrg V_038000_SQ_TEX_DIM_2D; 115af69d88dSmrg case PIPE_TEXTURE_2D_ARRAY: 116af69d88dSmrg return nr_samples > 1 ? V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA : 117af69d88dSmrg V_038000_SQ_TEX_DIM_2D_ARRAY; 118af69d88dSmrg case PIPE_TEXTURE_3D: 119af69d88dSmrg return V_038000_SQ_TEX_DIM_3D; 120af69d88dSmrg case PIPE_TEXTURE_CUBE: 121af69d88dSmrg case PIPE_TEXTURE_CUBE_ARRAY: 122af69d88dSmrg return V_038000_SQ_TEX_DIM_CUBEMAP; 123af69d88dSmrg } 124af69d88dSmrg} 1253464ebd5Sriastradh 126af69d88dSmrgstatic uint32_t r600_translate_dbformat(enum pipe_format format) 127af69d88dSmrg{ 128af69d88dSmrg switch (format) { 129af69d88dSmrg case PIPE_FORMAT_Z16_UNORM: 130af69d88dSmrg return V_028010_DEPTH_16; 131af69d88dSmrg case PIPE_FORMAT_Z24X8_UNORM: 132af69d88dSmrg return V_028010_DEPTH_X8_24; 133af69d88dSmrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 134af69d88dSmrg return V_028010_DEPTH_8_24; 135af69d88dSmrg case PIPE_FORMAT_Z32_FLOAT: 136af69d88dSmrg return V_028010_DEPTH_32_FLOAT; 137af69d88dSmrg case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 138af69d88dSmrg return V_028010_DEPTH_X24_8_32_FLOAT; 139af69d88dSmrg default: 140af69d88dSmrg return ~0U; 141af69d88dSmrg } 142af69d88dSmrg} 1433464ebd5Sriastradh 144af69d88dSmrgstatic bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 145af69d88dSmrg{ 14601e04c3fSmrg return r600_translate_texformat(screen, format, NULL, NULL, NULL, 14701e04c3fSmrg FALSE) != ~0U; 148af69d88dSmrg} 149af69d88dSmrg 150af69d88dSmrgstatic bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format) 151af69d88dSmrg{ 15201e04c3fSmrg return r600_translate_colorformat(chip, format, FALSE) != ~0U && 15301e04c3fSmrg r600_translate_colorswap(format, FALSE) != ~0U; 154af69d88dSmrg} 155af69d88dSmrg 156af69d88dSmrgstatic bool r600_is_zs_format_supported(enum pipe_format format) 157af69d88dSmrg{ 158af69d88dSmrg return r600_translate_dbformat(format) != ~0U; 159af69d88dSmrg} 160af69d88dSmrg 1617ec681f3Smrgbool r600_is_format_supported(struct pipe_screen *screen, 1627ec681f3Smrg enum pipe_format format, 1637ec681f3Smrg enum pipe_texture_target target, 1647ec681f3Smrg unsigned sample_count, 1657ec681f3Smrg unsigned storage_sample_count, 1667ec681f3Smrg unsigned usage) 167af69d88dSmrg{ 168af69d88dSmrg struct r600_screen *rscreen = (struct r600_screen*)screen; 169af69d88dSmrg unsigned retval = 0; 170af69d88dSmrg 171af69d88dSmrg if (target >= PIPE_MAX_TEXTURE_TYPES) { 172af69d88dSmrg R600_ERR("r600: unsupported texture type %d\n", target); 1737ec681f3Smrg return false; 174af69d88dSmrg } 175af69d88dSmrg 17601e04c3fSmrg if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) 17701e04c3fSmrg return false; 178af69d88dSmrg 179af69d88dSmrg if (sample_count > 1) { 180af69d88dSmrg if (!rscreen->has_msaa) 1817ec681f3Smrg return false; 182af69d88dSmrg 183af69d88dSmrg /* R11G11B10 is broken on R6xx. */ 184af69d88dSmrg if (rscreen->b.chip_class == R600 && 185af69d88dSmrg format == PIPE_FORMAT_R11G11B10_FLOAT) 1867ec681f3Smrg return false; 187af69d88dSmrg 188af69d88dSmrg /* MSAA integer colorbuffers hang. */ 189af69d88dSmrg if (util_format_is_pure_integer(format) && 190af69d88dSmrg !util_format_is_depth_or_stencil(format)) 1917ec681f3Smrg return false; 192af69d88dSmrg 193af69d88dSmrg switch (sample_count) { 194af69d88dSmrg case 2: 195af69d88dSmrg case 4: 196af69d88dSmrg case 8: 1973464ebd5Sriastradh break; 1983464ebd5Sriastradh default: 1997ec681f3Smrg return false; 2003464ebd5Sriastradh } 201af69d88dSmrg } 2023464ebd5Sriastradh 203af69d88dSmrg if (usage & PIPE_BIND_SAMPLER_VIEW) { 204af69d88dSmrg if (target == PIPE_BUFFER) { 205af69d88dSmrg if (r600_is_vertex_format_supported(format)) 206af69d88dSmrg retval |= PIPE_BIND_SAMPLER_VIEW; 207af69d88dSmrg } else { 208af69d88dSmrg if (r600_is_sampler_format_supported(screen, format)) 209af69d88dSmrg retval |= PIPE_BIND_SAMPLER_VIEW; 210af69d88dSmrg } 211af69d88dSmrg } 212af69d88dSmrg 213af69d88dSmrg if ((usage & (PIPE_BIND_RENDER_TARGET | 214af69d88dSmrg PIPE_BIND_DISPLAY_TARGET | 215af69d88dSmrg PIPE_BIND_SCANOUT | 21601e04c3fSmrg PIPE_BIND_SHARED | 21701e04c3fSmrg PIPE_BIND_BLENDABLE)) && 218af69d88dSmrg r600_is_colorbuffer_format_supported(rscreen->b.chip_class, format)) { 219af69d88dSmrg retval |= usage & 220af69d88dSmrg (PIPE_BIND_RENDER_TARGET | 221af69d88dSmrg PIPE_BIND_DISPLAY_TARGET | 222af69d88dSmrg PIPE_BIND_SCANOUT | 223af69d88dSmrg PIPE_BIND_SHARED); 22401e04c3fSmrg if (!util_format_is_pure_integer(format) && 22501e04c3fSmrg !util_format_is_depth_or_stencil(format)) 22601e04c3fSmrg retval |= usage & PIPE_BIND_BLENDABLE; 227af69d88dSmrg } 228af69d88dSmrg 229af69d88dSmrg if ((usage & PIPE_BIND_DEPTH_STENCIL) && 230af69d88dSmrg r600_is_zs_format_supported(format)) { 231af69d88dSmrg retval |= PIPE_BIND_DEPTH_STENCIL; 232af69d88dSmrg } 233af69d88dSmrg 234af69d88dSmrg if ((usage & PIPE_BIND_VERTEX_BUFFER) && 235af69d88dSmrg r600_is_vertex_format_supported(format)) { 236af69d88dSmrg retval |= PIPE_BIND_VERTEX_BUFFER; 237af69d88dSmrg } 238af69d88dSmrg 2397ec681f3Smrg if (usage & PIPE_BIND_INDEX_BUFFER && 2407ec681f3Smrg r600_is_index_format_supported(format)) { 2417ec681f3Smrg retval |= PIPE_BIND_INDEX_BUFFER; 2427ec681f3Smrg } 2437ec681f3Smrg 24401e04c3fSmrg if ((usage & PIPE_BIND_LINEAR) && 24501e04c3fSmrg !util_format_is_compressed(format) && 24601e04c3fSmrg !(usage & PIPE_BIND_DEPTH_STENCIL)) 24701e04c3fSmrg retval |= PIPE_BIND_LINEAR; 248af69d88dSmrg 249af69d88dSmrg return retval == usage; 2503464ebd5Sriastradh} 2513464ebd5Sriastradh 252af69d88dSmrgstatic void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) 2533464ebd5Sriastradh{ 2547ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 255af69d88dSmrg struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; 256af69d88dSmrg float offset_units = state->offset_units; 257af69d88dSmrg float offset_scale = state->offset_scale; 25801e04c3fSmrg uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 25901e04c3fSmrg 26001e04c3fSmrg if (!state->offset_units_unscaled) { 26101e04c3fSmrg switch (state->zs_format) { 26201e04c3fSmrg case PIPE_FORMAT_Z24X8_UNORM: 26301e04c3fSmrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 26401e04c3fSmrg offset_units *= 2.0f; 26501e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 26601e04c3fSmrg S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24); 26701e04c3fSmrg break; 26801e04c3fSmrg case PIPE_FORMAT_Z16_UNORM: 26901e04c3fSmrg offset_units *= 4.0f; 27001e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 27101e04c3fSmrg S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16); 27201e04c3fSmrg break; 27301e04c3fSmrg default: 27401e04c3fSmrg pa_su_poly_offset_db_fmt_cntl = 27501e04c3fSmrg S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) | 27601e04c3fSmrg S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 27701e04c3fSmrg } 278af69d88dSmrg } 2793464ebd5Sriastradh 28001e04c3fSmrg radeon_set_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4); 281af69d88dSmrg radeon_emit(cs, fui(offset_scale)); 282af69d88dSmrg radeon_emit(cs, fui(offset_units)); 283af69d88dSmrg radeon_emit(cs, fui(offset_scale)); 284af69d88dSmrg radeon_emit(cs, fui(offset_units)); 28501e04c3fSmrg 28601e04c3fSmrg radeon_set_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 28701e04c3fSmrg pa_su_poly_offset_db_fmt_cntl); 288af69d88dSmrg} 289af69d88dSmrg 290af69d88dSmrgstatic uint32_t r600_get_blend_control(const struct pipe_blend_state *state, unsigned i) 291af69d88dSmrg{ 292af69d88dSmrg int j = state->independent_blend_enable ? i : 0; 293af69d88dSmrg 294af69d88dSmrg unsigned eqRGB = state->rt[j].rgb_func; 295af69d88dSmrg unsigned srcRGB = state->rt[j].rgb_src_factor; 296af69d88dSmrg unsigned dstRGB = state->rt[j].rgb_dst_factor; 297af69d88dSmrg 298af69d88dSmrg unsigned eqA = state->rt[j].alpha_func; 299af69d88dSmrg unsigned srcA = state->rt[j].alpha_src_factor; 300af69d88dSmrg unsigned dstA = state->rt[j].alpha_dst_factor; 301af69d88dSmrg uint32_t bc = 0; 302af69d88dSmrg 303af69d88dSmrg if (!state->rt[j].blend_enable) 304af69d88dSmrg return 0; 305af69d88dSmrg 306af69d88dSmrg bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); 307af69d88dSmrg bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); 308af69d88dSmrg bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); 309af69d88dSmrg 310af69d88dSmrg if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 311af69d88dSmrg bc |= S_028804_SEPARATE_ALPHA_BLEND(1); 312af69d88dSmrg bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); 313af69d88dSmrg bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); 314af69d88dSmrg bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); 3153464ebd5Sriastradh } 316af69d88dSmrg return bc; 317af69d88dSmrg} 318af69d88dSmrg 319af69d88dSmrgstatic void *r600_create_blend_state_mode(struct pipe_context *ctx, 320af69d88dSmrg const struct pipe_blend_state *state, 321af69d88dSmrg int mode) 322af69d88dSmrg{ 323af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 324af69d88dSmrg uint32_t color_control = 0, target_mask = 0; 325af69d88dSmrg struct r600_blend_state *blend = CALLOC_STRUCT(r600_blend_state); 3263464ebd5Sriastradh 327af69d88dSmrg if (!blend) { 328af69d88dSmrg return NULL; 329af69d88dSmrg } 3303464ebd5Sriastradh 331af69d88dSmrg r600_init_command_buffer(&blend->buffer, 20); 332af69d88dSmrg r600_init_command_buffer(&blend->buffer_no_blend, 20); 3333464ebd5Sriastradh 3343464ebd5Sriastradh /* R600 does not support per-MRT blends */ 335af69d88dSmrg if (rctx->b.family > CHIP_R600) 3363464ebd5Sriastradh color_control |= S_028808_PER_MRT_BLEND(1); 337af69d88dSmrg 3383464ebd5Sriastradh if (state->logicop_enable) { 3393464ebd5Sriastradh color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); 3403464ebd5Sriastradh } else { 3413464ebd5Sriastradh color_control |= (0xcc << 16); 3423464ebd5Sriastradh } 3433464ebd5Sriastradh /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 3443464ebd5Sriastradh if (state->independent_blend_enable) { 3453464ebd5Sriastradh for (int i = 0; i < 8; i++) { 3463464ebd5Sriastradh if (state->rt[i].blend_enable) { 3473464ebd5Sriastradh color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); 3483464ebd5Sriastradh } 3493464ebd5Sriastradh target_mask |= (state->rt[i].colormask << (4 * i)); 3503464ebd5Sriastradh } 3513464ebd5Sriastradh } else { 3523464ebd5Sriastradh for (int i = 0; i < 8; i++) { 3533464ebd5Sriastradh if (state->rt[0].blend_enable) { 3543464ebd5Sriastradh color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); 3553464ebd5Sriastradh } 3563464ebd5Sriastradh target_mask |= (state->rt[0].colormask << (4 * i)); 3573464ebd5Sriastradh } 3583464ebd5Sriastradh } 3593464ebd5Sriastradh 360af69d88dSmrg if (target_mask) 361af69d88dSmrg color_control |= S_028808_SPECIAL_OP(mode); 362af69d88dSmrg else 363af69d88dSmrg color_control |= S_028808_SPECIAL_OP(V_028808_DISABLE); 3643464ebd5Sriastradh 365af69d88dSmrg /* only MRT0 has dual src blend */ 366af69d88dSmrg blend->dual_src_blend = util_blend_state_is_dual(state, 0); 367af69d88dSmrg blend->cb_target_mask = target_mask; 368af69d88dSmrg blend->cb_color_control = color_control; 369af69d88dSmrg blend->cb_color_control_no_blend = color_control & C_028808_TARGET_BLEND_ENABLE; 370af69d88dSmrg blend->alpha_to_one = state->alpha_to_one; 371af69d88dSmrg 372af69d88dSmrg r600_store_context_reg(&blend->buffer, R_028D44_DB_ALPHA_TO_MASK, 373af69d88dSmrg S_028D44_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 374af69d88dSmrg S_028D44_ALPHA_TO_MASK_OFFSET0(2) | 375af69d88dSmrg S_028D44_ALPHA_TO_MASK_OFFSET1(2) | 376af69d88dSmrg S_028D44_ALPHA_TO_MASK_OFFSET2(2) | 377af69d88dSmrg S_028D44_ALPHA_TO_MASK_OFFSET3(2)); 378af69d88dSmrg 379af69d88dSmrg /* Copy over the registers set so far into buffer_no_blend. */ 380af69d88dSmrg memcpy(blend->buffer_no_blend.buf, blend->buffer.buf, blend->buffer.num_dw * 4); 381af69d88dSmrg blend->buffer_no_blend.num_dw = blend->buffer.num_dw; 382af69d88dSmrg 383af69d88dSmrg /* Only add blend registers if blending is enabled. */ 384af69d88dSmrg if (!G_028808_TARGET_BLEND_ENABLE(color_control)) { 385af69d88dSmrg return blend; 386af69d88dSmrg } 3873464ebd5Sriastradh 388af69d88dSmrg /* The first R600 does not support per-MRT blends */ 389af69d88dSmrg r600_store_context_reg(&blend->buffer, R_028804_CB_BLEND_CONTROL, 390af69d88dSmrg r600_get_blend_control(state, 0)); 3913464ebd5Sriastradh 392af69d88dSmrg if (rctx->b.family > CHIP_R600) { 393af69d88dSmrg r600_store_context_reg_seq(&blend->buffer, R_028780_CB_BLEND0_CONTROL, 8); 394af69d88dSmrg for (int i = 0; i < 8; i++) { 395af69d88dSmrg r600_store_value(&blend->buffer, r600_get_blend_control(state, i)); 3963464ebd5Sriastradh } 3973464ebd5Sriastradh } 398af69d88dSmrg return blend; 399af69d88dSmrg} 400af69d88dSmrg 401af69d88dSmrgstatic void *r600_create_blend_state(struct pipe_context *ctx, 402af69d88dSmrg const struct pipe_blend_state *state) 403af69d88dSmrg{ 404af69d88dSmrg return r600_create_blend_state_mode(ctx, state, V_028808_SPECIAL_NORMAL); 4053464ebd5Sriastradh} 4063464ebd5Sriastradh 4073464ebd5Sriastradhstatic void *r600_create_dsa_state(struct pipe_context *ctx, 4083464ebd5Sriastradh const struct pipe_depth_stencil_alpha_state *state) 4093464ebd5Sriastradh{ 410af69d88dSmrg unsigned db_depth_control, alpha_test_control, alpha_ref; 411af69d88dSmrg struct r600_dsa_state *dsa = CALLOC_STRUCT(r600_dsa_state); 4123464ebd5Sriastradh 41301e04c3fSmrg if (!dsa) { 4143464ebd5Sriastradh return NULL; 4153464ebd5Sriastradh } 4163464ebd5Sriastradh 417af69d88dSmrg r600_init_command_buffer(&dsa->buffer, 3); 418af69d88dSmrg 419af69d88dSmrg dsa->valuemask[0] = state->stencil[0].valuemask; 420af69d88dSmrg dsa->valuemask[1] = state->stencil[1].valuemask; 421af69d88dSmrg dsa->writemask[0] = state->stencil[0].writemask; 422af69d88dSmrg dsa->writemask[1] = state->stencil[1].writemask; 4237ec681f3Smrg dsa->zwritemask = state->depth_writemask; 4243464ebd5Sriastradh 4257ec681f3Smrg db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) | 4267ec681f3Smrg S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 4277ec681f3Smrg S_028800_ZFUNC(state->depth_func); 4283464ebd5Sriastradh 4293464ebd5Sriastradh /* stencil */ 4303464ebd5Sriastradh if (state->stencil[0].enabled) { 4313464ebd5Sriastradh db_depth_control |= S_028800_STENCIL_ENABLE(1); 432af69d88dSmrg db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); /* translates straight */ 4333464ebd5Sriastradh db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); 4343464ebd5Sriastradh db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); 4353464ebd5Sriastradh db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); 4363464ebd5Sriastradh 4373464ebd5Sriastradh if (state->stencil[1].enabled) { 4383464ebd5Sriastradh db_depth_control |= S_028800_BACKFACE_ENABLE(1); 439af69d88dSmrg db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); /* translates straight */ 4403464ebd5Sriastradh db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); 4413464ebd5Sriastradh db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); 4423464ebd5Sriastradh db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); 4433464ebd5Sriastradh } 4443464ebd5Sriastradh } 4453464ebd5Sriastradh 4463464ebd5Sriastradh /* alpha */ 4473464ebd5Sriastradh alpha_test_control = 0; 4483464ebd5Sriastradh alpha_ref = 0; 4497ec681f3Smrg if (state->alpha_enabled) { 4507ec681f3Smrg alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func); 4513464ebd5Sriastradh alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); 4527ec681f3Smrg alpha_ref = fui(state->alpha_ref_value); 4533464ebd5Sriastradh } 454af69d88dSmrg dsa->sx_alpha_test_control = alpha_test_control & 0xff; 4553464ebd5Sriastradh dsa->alpha_ref = alpha_ref; 4563464ebd5Sriastradh 457af69d88dSmrg r600_store_context_reg(&dsa->buffer, R_028800_DB_DEPTH_CONTROL, db_depth_control); 458af69d88dSmrg return dsa; 4593464ebd5Sriastradh} 4603464ebd5Sriastradh 4613464ebd5Sriastradhstatic void *r600_create_rs_state(struct pipe_context *ctx, 462af69d88dSmrg const struct pipe_rasterizer_state *state) 4633464ebd5Sriastradh{ 464af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 465af69d88dSmrg unsigned tmp, sc_mode_cntl, spi_interp; 466af69d88dSmrg float psize_min, psize_max; 467af69d88dSmrg struct r600_rasterizer_state *rs = CALLOC_STRUCT(r600_rasterizer_state); 4683464ebd5Sriastradh 46901e04c3fSmrg if (!rs) { 4703464ebd5Sriastradh return NULL; 4713464ebd5Sriastradh } 4723464ebd5Sriastradh 473af69d88dSmrg r600_init_command_buffer(&rs->buffer, 30); 474af69d88dSmrg 47501e04c3fSmrg rs->scissor_enable = state->scissor; 47601e04c3fSmrg rs->clip_halfz = state->clip_halfz; 4773464ebd5Sriastradh rs->flatshade = state->flatshade; 4783464ebd5Sriastradh rs->sprite_coord_enable = state->sprite_coord_enable; 47901e04c3fSmrg rs->rasterizer_discard = state->rasterizer_discard; 480af69d88dSmrg rs->two_side = state->light_twoside; 481af69d88dSmrg rs->clip_plane_enable = state->clip_plane_enable; 482af69d88dSmrg rs->pa_sc_line_stipple = state->line_stipple_enable ? 483af69d88dSmrg S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 484af69d88dSmrg S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 485af69d88dSmrg rs->pa_cl_clip_cntl = 48601e04c3fSmrg S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 48701e04c3fSmrg S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 48801e04c3fSmrg S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 489af69d88dSmrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 490af69d88dSmrg if (rctx->b.chip_class == R700) { 491af69d88dSmrg rs->pa_cl_clip_cntl |= 492af69d88dSmrg S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard); 493af69d88dSmrg } 494af69d88dSmrg rs->multisample_enable = state->multisample; 4953464ebd5Sriastradh 4963464ebd5Sriastradh /* offset */ 4973464ebd5Sriastradh rs->offset_units = state->offset_units; 49801e04c3fSmrg rs->offset_scale = state->offset_scale * 16.0f; 499af69d88dSmrg rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri; 50001e04c3fSmrg rs->offset_units_unscaled = state->offset_units_unscaled; 501af69d88dSmrg 502af69d88dSmrg if (state->point_size_per_vertex) { 503af69d88dSmrg psize_min = util_get_min_point_size(state); 504af69d88dSmrg psize_max = 8192; 505af69d88dSmrg } else { 506af69d88dSmrg /* Force the point size to be as if the vertex output was disabled. */ 507af69d88dSmrg psize_min = state->point_size; 508af69d88dSmrg psize_max = state->point_size; 509af69d88dSmrg } 5103464ebd5Sriastradh 511af69d88dSmrg sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) | 512af69d88dSmrg S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 51301e04c3fSmrg S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 51401e04c3fSmrg S_028A4C_PS_ITER_SAMPLE(state->multisample && rctx->ps_iter_samples > 1); 51501e04c3fSmrg if (rctx->b.family == CHIP_RV770) { 51601e04c3fSmrg /* workaround possible rendering corruption on RV770 with hyperz together with sample shading */ 51701e04c3fSmrg sc_mode_cntl |= S_028A4C_TILE_COVER_DISABLE(state->multisample && rctx->ps_iter_samples > 1); 51801e04c3fSmrg } 519af69d88dSmrg if (rctx->b.chip_class >= R700) { 520af69d88dSmrg sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) | 521af69d88dSmrg S_028A4C_R700_ZMM_LINE_OFFSET(1) | 52201e04c3fSmrg S_028A4C_R700_VPORT_SCISSOR_ENABLE(1); 523af69d88dSmrg } else { 524af69d88dSmrg sc_mode_cntl |= S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1); 525af69d88dSmrg } 526af69d88dSmrg 527af69d88dSmrg spi_interp = S_0286D4_FLAT_SHADE_ENA(1); 5287ec681f3Smrg spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | 5297ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_X(2) | 5307ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Y(3) | 5317ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_Z(0) | 5327ec681f3Smrg S_0286D4_PNT_SPRITE_OVRD_W(1); 5337ec681f3Smrg if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 5347ec681f3Smrg spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); 5353464ebd5Sriastradh } 536af69d88dSmrg 537af69d88dSmrg r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3); 538af69d88dSmrg /* point size 12.4 fixed point (divide by two, because 0.5 = 1 pixel. */ 539af69d88dSmrg tmp = r600_pack_float_12p4(state->point_size/2); 540af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A00_PA_SU_POINT_SIZE */ 541af69d88dSmrg S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 542af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A04_PA_SU_POINT_MINMAX */ 543af69d88dSmrg S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) | 544af69d88dSmrg S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2))); 545af69d88dSmrg r600_store_value(&rs->buffer, /* R_028A08_PA_SU_LINE_CNTL */ 546af69d88dSmrg S_028A08_WIDTH(r600_pack_float_12p4(state->line_width/2))); 547af69d88dSmrg 548af69d88dSmrg r600_store_context_reg(&rs->buffer, R_0286D4_SPI_INTERP_CONTROL_0, spi_interp); 549af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028A4C_PA_SC_MODE_CNTL, sc_mode_cntl); 550af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028C08_PA_SU_VTX_CNTL, 551af69d88dSmrg S_028C08_PIX_CENTER_HALF(state->half_pixel_center) | 552af69d88dSmrg S_028C08_QUANT_MODE(V_028C08_X_1_256TH)); 553af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 554af69d88dSmrg 555af69d88dSmrg rs->pa_su_sc_mode_cntl = S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 556af69d88dSmrg S_028814_CULL_FRONT(state->cull_face & PIPE_FACE_FRONT ? 1 : 0) | 557af69d88dSmrg S_028814_CULL_BACK(state->cull_face & PIPE_FACE_BACK ? 1 : 0) | 558af69d88dSmrg S_028814_FACE(!state->front_ccw) | 559af69d88dSmrg S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 560af69d88dSmrg S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 561af69d88dSmrg S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 562af69d88dSmrg S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 563af69d88dSmrg state->fill_back != PIPE_POLYGON_MODE_FILL) | 564af69d88dSmrg S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | 565af69d88dSmrg S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)); 566af69d88dSmrg if (rctx->b.chip_class == R700) { 567af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL, rs->pa_su_sc_mode_cntl); 568af69d88dSmrg } 569af69d88dSmrg if (rctx->b.chip_class == R600) { 570af69d88dSmrg r600_store_context_reg(&rs->buffer, R_028350_SX_MISC, 571af69d88dSmrg S_028350_MULTIPASS(state->rasterizer_discard)); 572af69d88dSmrg } 573af69d88dSmrg return rs; 5743464ebd5Sriastradh} 5753464ebd5Sriastradh 57601e04c3fSmrgstatic unsigned r600_tex_filter(unsigned filter, unsigned max_aniso) 57701e04c3fSmrg{ 57801e04c3fSmrg if (filter == PIPE_TEX_FILTER_LINEAR) 57901e04c3fSmrg return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_BILINEAR 58001e04c3fSmrg : V_03C000_SQ_TEX_XY_FILTER_BILINEAR; 58101e04c3fSmrg else 58201e04c3fSmrg return max_aniso > 1 ? V_03C000_SQ_TEX_XY_FILTER_ANISO_POINT 58301e04c3fSmrg : V_03C000_SQ_TEX_XY_FILTER_POINT; 58401e04c3fSmrg} 58501e04c3fSmrg 5863464ebd5Sriastradhstatic void *r600_create_sampler_state(struct pipe_context *ctx, 5873464ebd5Sriastradh const struct pipe_sampler_state *state) 5883464ebd5Sriastradh{ 58901e04c3fSmrg struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; 5903464ebd5Sriastradh struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); 59101e04c3fSmrg unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 59201e04c3fSmrg : state->max_anisotropy; 59301e04c3fSmrg unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 5943464ebd5Sriastradh 59501e04c3fSmrg if (!ss) { 5963464ebd5Sriastradh return NULL; 5973464ebd5Sriastradh } 5983464ebd5Sriastradh 5993464ebd5Sriastradh ss->seamless_cube_map = state->seamless_cube_map; 600af69d88dSmrg ss->border_color_use = sampler_state_needs_border_color(state); 601af69d88dSmrg 602af69d88dSmrg /* R_03C000_SQ_TEX_SAMPLER_WORD0_0 */ 603af69d88dSmrg ss->tex_sampler_words[0] = 604af69d88dSmrg S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | 605af69d88dSmrg S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | 606af69d88dSmrg S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | 60701e04c3fSmrg S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter, max_aniso)) | 60801e04c3fSmrg S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter, max_aniso)) | 609af69d88dSmrg S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | 61001e04c3fSmrg S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) | 611af69d88dSmrg S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | 612af69d88dSmrg S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0); 613af69d88dSmrg /* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */ 614af69d88dSmrg ss->tex_sampler_words[1] = 615af69d88dSmrg S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | 616af69d88dSmrg S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | 617af69d88dSmrg S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); 618af69d88dSmrg /* R_03C008_SQ_TEX_SAMPLER_WORD2_0 */ 619af69d88dSmrg ss->tex_sampler_words[2] = S_03C008_TYPE(1); 620af69d88dSmrg 621af69d88dSmrg if (ss->border_color_use) { 622af69d88dSmrg memcpy(&ss->border_color, &state->border_color, sizeof(state->border_color)); 623af69d88dSmrg } 624af69d88dSmrg return ss; 625af69d88dSmrg} 626af69d88dSmrg 627af69d88dSmrgstatic struct pipe_sampler_view * 628af69d88dSmrgtexture_buffer_sampler_view(struct r600_pipe_sampler_view *view, 629af69d88dSmrg unsigned width0, unsigned height0) 63001e04c3fSmrg 631af69d88dSmrg{ 632af69d88dSmrg struct r600_texture *tmp = (struct r600_texture*)view->base.texture; 633af69d88dSmrg int stride = util_format_get_blocksize(view->base.format); 634af69d88dSmrg unsigned format, num_format, format_comp, endian; 63501e04c3fSmrg uint64_t offset = view->base.u.buf.offset; 63601e04c3fSmrg unsigned size = view->base.u.buf.size; 637af69d88dSmrg 638af69d88dSmrg r600_vertex_data_type(view->base.format, 639af69d88dSmrg &format, &num_format, &format_comp, 640af69d88dSmrg &endian); 641af69d88dSmrg 642af69d88dSmrg view->tex_resource = &tmp->resource; 643af69d88dSmrg view->skip_mip_address_reloc = true; 644af69d88dSmrg 645af69d88dSmrg view->tex_resource_words[0] = offset; 646af69d88dSmrg view->tex_resource_words[1] = size - 1; 647af69d88dSmrg view->tex_resource_words[2] = S_038008_BASE_ADDRESS_HI(offset >> 32UL) | 648af69d88dSmrg S_038008_STRIDE(stride) | 649af69d88dSmrg S_038008_DATA_FORMAT(format) | 650af69d88dSmrg S_038008_NUM_FORMAT_ALL(num_format) | 651af69d88dSmrg S_038008_FORMAT_COMP_ALL(format_comp) | 652af69d88dSmrg S_038008_ENDIAN_SWAP(endian); 653af69d88dSmrg view->tex_resource_words[3] = 0; 654af69d88dSmrg /* 655af69d88dSmrg * in theory dword 4 is for number of elements, for use with resinfo, 656af69d88dSmrg * but it seems to utterly fail to work, the amd gpu shader analyser 657af69d88dSmrg * uses a const buffer to store the element sizes for buffer txq 658af69d88dSmrg */ 659af69d88dSmrg view->tex_resource_words[4] = 0; 660af69d88dSmrg view->tex_resource_words[5] = 0; 661af69d88dSmrg view->tex_resource_words[6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_BUFFER); 662af69d88dSmrg return &view->base; 663af69d88dSmrg} 664af69d88dSmrg 665af69d88dSmrgstruct pipe_sampler_view * 666af69d88dSmrgr600_create_sampler_view_custom(struct pipe_context *ctx, 667af69d88dSmrg struct pipe_resource *texture, 668af69d88dSmrg const struct pipe_sampler_view *state, 669af69d88dSmrg unsigned width_first_level, unsigned height_first_level) 670af69d88dSmrg{ 671af69d88dSmrg struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view); 672af69d88dSmrg struct r600_texture *tmp = (struct r600_texture*)texture; 6733464ebd5Sriastradh unsigned format, endian; 6743464ebd5Sriastradh uint32_t word4 = 0, yuv_format = 0, pitch = 0; 675af69d88dSmrg unsigned char swizzle[4], array_mode = 0; 6763464ebd5Sriastradh unsigned width, height, depth, offset_level, last_level; 67701e04c3fSmrg bool do_endian_swap = FALSE; 6783464ebd5Sriastradh 67901e04c3fSmrg if (!view) 6803464ebd5Sriastradh return NULL; 6813464ebd5Sriastradh 6823464ebd5Sriastradh /* initialize base object */ 683af69d88dSmrg view->base = *state; 684af69d88dSmrg view->base.texture = NULL; 6853464ebd5Sriastradh pipe_reference(NULL, &texture->reference); 686af69d88dSmrg view->base.texture = texture; 687af69d88dSmrg view->base.reference.count = 1; 688af69d88dSmrg view->base.context = ctx; 689af69d88dSmrg 690af69d88dSmrg if (texture->target == PIPE_BUFFER) 691af69d88dSmrg return texture_buffer_sampler_view(view, texture->width0, 1); 6923464ebd5Sriastradh 6933464ebd5Sriastradh swizzle[0] = state->swizzle_r; 6943464ebd5Sriastradh swizzle[1] = state->swizzle_g; 6953464ebd5Sriastradh swizzle[2] = state->swizzle_b; 6963464ebd5Sriastradh swizzle[3] = state->swizzle_a; 697af69d88dSmrg 69801e04c3fSmrg if (R600_BIG_ENDIAN) 69901e04c3fSmrg do_endian_swap = !tmp->db_compatible; 70001e04c3fSmrg 7013464ebd5Sriastradh format = r600_translate_texformat(ctx->screen, state->format, 7023464ebd5Sriastradh swizzle, 70301e04c3fSmrg &word4, &yuv_format, do_endian_swap); 704af69d88dSmrg assert(format != ~0); 7053464ebd5Sriastradh if (format == ~0) { 706af69d88dSmrg FREE(view); 707af69d88dSmrg return NULL; 7083464ebd5Sriastradh } 709af69d88dSmrg 71001e04c3fSmrg if (state->format == PIPE_FORMAT_X24S8_UINT || 71101e04c3fSmrg state->format == PIPE_FORMAT_S8X24_UINT || 71201e04c3fSmrg state->format == PIPE_FORMAT_X32_S8X24_UINT || 71301e04c3fSmrg state->format == PIPE_FORMAT_S8_UINT) 71401e04c3fSmrg view->is_stencil_sampler = true; 71501e04c3fSmrg 71601e04c3fSmrg if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 717af69d88dSmrg if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) { 718af69d88dSmrg FREE(view); 719af69d88dSmrg return NULL; 720af69d88dSmrg } 7213464ebd5Sriastradh tmp = tmp->flushed_depth_texture; 7223464ebd5Sriastradh } 7233464ebd5Sriastradh 72401e04c3fSmrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 7253464ebd5Sriastradh 7263464ebd5Sriastradh offset_level = state->u.tex.first_level; 7273464ebd5Sriastradh last_level = state->u.tex.last_level - offset_level; 728af69d88dSmrg width = width_first_level; 729af69d88dSmrg height = height_first_level; 730af69d88dSmrg depth = u_minify(texture->depth0, offset_level); 73101e04c3fSmrg pitch = tmp->surface.u.legacy.level[offset_level].nblk_x * util_format_get_blockwidth(state->format); 7323464ebd5Sriastradh 7333464ebd5Sriastradh if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 734af69d88dSmrg height = 1; 7353464ebd5Sriastradh depth = texture->array_size; 7363464ebd5Sriastradh } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 7373464ebd5Sriastradh depth = texture->array_size; 738af69d88dSmrg } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 739af69d88dSmrg depth = texture->array_size / 6; 74001e04c3fSmrg 74101e04c3fSmrg switch (tmp->surface.u.legacy.level[offset_level].mode) { 74201e04c3fSmrg default: 743af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 744af69d88dSmrg array_mode = V_038000_ARRAY_LINEAR_ALIGNED; 745af69d88dSmrg break; 746af69d88dSmrg case RADEON_SURF_MODE_1D: 747af69d88dSmrg array_mode = V_038000_ARRAY_1D_TILED_THIN1; 748af69d88dSmrg break; 749af69d88dSmrg case RADEON_SURF_MODE_2D: 750af69d88dSmrg array_mode = V_038000_ARRAY_2D_TILED_THIN1; 751af69d88dSmrg break; 7523464ebd5Sriastradh } 7533464ebd5Sriastradh 754af69d88dSmrg view->tex_resource = &tmp->resource; 755af69d88dSmrg view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) | 756af69d88dSmrg S_038000_TILE_MODE(array_mode) | 757af69d88dSmrg S_038000_TILE_TYPE(tmp->non_disp_tiling) | 758af69d88dSmrg S_038000_PITCH((pitch / 8) - 1) | 759af69d88dSmrg S_038000_TEX_WIDTH(width - 1)); 760af69d88dSmrg view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) | 761af69d88dSmrg S_038004_TEX_DEPTH(depth - 1) | 762af69d88dSmrg S_038004_DATA_FORMAT(format)); 7637ec681f3Smrg view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset_256B; 76401e04c3fSmrg if (offset_level >= tmp->resource.b.b.last_level) { 7657ec681f3Smrg view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset_256B; 766af69d88dSmrg } else { 7677ec681f3Smrg view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset_256B; 7683464ebd5Sriastradh } 769af69d88dSmrg view->tex_resource_words[4] = (word4 | 770af69d88dSmrg S_038010_REQUEST_SIZE(1) | 771af69d88dSmrg S_038010_ENDIAN_SWAP(endian) | 772af69d88dSmrg S_038010_BASE_LEVEL(0)); 773af69d88dSmrg view->tex_resource_words[5] = (S_038014_BASE_ARRAY(state->u.tex.first_layer) | 774af69d88dSmrg S_038014_LAST_ARRAY(state->u.tex.last_layer)); 775af69d88dSmrg if (texture->nr_samples > 1) { 776af69d88dSmrg /* LAST_LEVEL holds log2(nr_samples) for multisample textures */ 777af69d88dSmrg view->tex_resource_words[5] |= S_038014_LAST_LEVEL(util_logbase2(texture->nr_samples)); 778af69d88dSmrg } else { 779af69d88dSmrg view->tex_resource_words[5] |= S_038014_LAST_LEVEL(last_level); 780af69d88dSmrg } 781af69d88dSmrg view->tex_resource_words[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | 782af69d88dSmrg S_038018_MAX_ANISO(4 /* max 16 samples */)); 783af69d88dSmrg return &view->base; 784af69d88dSmrg} 785af69d88dSmrg 786af69d88dSmrgstatic struct pipe_sampler_view * 787af69d88dSmrgr600_create_sampler_view(struct pipe_context *ctx, 788af69d88dSmrg struct pipe_resource *tex, 789af69d88dSmrg const struct pipe_sampler_view *state) 790af69d88dSmrg{ 791af69d88dSmrg return r600_create_sampler_view_custom(ctx, tex, state, 792af69d88dSmrg u_minify(tex->width0, state->u.tex.first_level), 793af69d88dSmrg u_minify(tex->height0, state->u.tex.first_level)); 7943464ebd5Sriastradh} 7953464ebd5Sriastradh 796af69d88dSmrgstatic void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) 7973464ebd5Sriastradh{ 7987ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 799af69d88dSmrg struct pipe_clip_state *state = &rctx->clip_state.state; 800af69d88dSmrg 80101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4); 802af69d88dSmrg radeon_emit_array(cs, (unsigned*)state, 6*4); 803af69d88dSmrg} 804af69d88dSmrg 805af69d88dSmrgstatic void r600_set_polygon_stipple(struct pipe_context *ctx, 806af69d88dSmrg const struct pipe_poly_stipple *state) 807af69d88dSmrg{ 808af69d88dSmrg} 809af69d88dSmrg 810af69d88dSmrgstatic void r600_init_color_surface(struct r600_context *rctx, 811af69d88dSmrg struct r600_surface *surf, 812af69d88dSmrg bool force_cmask_fmask) 813af69d88dSmrg{ 814af69d88dSmrg struct r600_screen *rscreen = rctx->screen; 815af69d88dSmrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 816af69d88dSmrg unsigned level = surf->base.u.tex.level; 817af69d88dSmrg unsigned pitch, slice; 818af69d88dSmrg unsigned color_info; 819af69d88dSmrg unsigned color_view; 820af69d88dSmrg unsigned format, swap, ntype, endian; 821af69d88dSmrg unsigned offset; 822af69d88dSmrg const struct util_format_description *desc; 823af69d88dSmrg int i; 82401e04c3fSmrg bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE; 825af69d88dSmrg 82601e04c3fSmrg if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) { 827af69d88dSmrg r600_init_flushed_depth_texture(&rctx->b.b, surf->base.texture, NULL); 828af69d88dSmrg rtex = rtex->flushed_depth_texture; 829af69d88dSmrg assert(rtex); 830af69d88dSmrg } 831af69d88dSmrg 8327ec681f3Smrg offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; 83301e04c3fSmrg color_view = S_028080_SLICE_START(surf->base.u.tex.first_layer) | 83401e04c3fSmrg S_028080_SLICE_MAX(surf->base.u.tex.last_layer); 835af69d88dSmrg 83601e04c3fSmrg pitch = rtex->surface.u.legacy.level[level].nblk_x / 8 - 1; 83701e04c3fSmrg slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64; 838af69d88dSmrg if (slice) { 839af69d88dSmrg slice = slice - 1; 840af69d88dSmrg } 841af69d88dSmrg color_info = 0; 84201e04c3fSmrg switch (rtex->surface.u.legacy.level[level].mode) { 84301e04c3fSmrg default: 844af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 845af69d88dSmrg color_info = S_0280A0_ARRAY_MODE(V_038000_ARRAY_LINEAR_ALIGNED); 846af69d88dSmrg break; 847af69d88dSmrg case RADEON_SURF_MODE_1D: 848af69d88dSmrg color_info = S_0280A0_ARRAY_MODE(V_038000_ARRAY_1D_TILED_THIN1); 849af69d88dSmrg break; 850af69d88dSmrg case RADEON_SURF_MODE_2D: 851af69d88dSmrg color_info = S_0280A0_ARRAY_MODE(V_038000_ARRAY_2D_TILED_THIN1); 852af69d88dSmrg break; 853af69d88dSmrg } 854af69d88dSmrg 855af69d88dSmrg desc = util_format_description(surf->base.format); 856af69d88dSmrg 857af69d88dSmrg for (i = 0; i < 4; i++) { 858af69d88dSmrg if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 859af69d88dSmrg break; 860af69d88dSmrg } 861af69d88dSmrg } 862af69d88dSmrg 863af69d88dSmrg ntype = V_0280A0_NUMBER_UNORM; 864af69d88dSmrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 865af69d88dSmrg ntype = V_0280A0_NUMBER_SRGB; 866af69d88dSmrg else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 867af69d88dSmrg if (desc->channel[i].normalized) 868af69d88dSmrg ntype = V_0280A0_NUMBER_SNORM; 869af69d88dSmrg else if (desc->channel[i].pure_integer) 870af69d88dSmrg ntype = V_0280A0_NUMBER_SINT; 871af69d88dSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 872af69d88dSmrg if (desc->channel[i].normalized) 873af69d88dSmrg ntype = V_0280A0_NUMBER_UNORM; 874af69d88dSmrg else if (desc->channel[i].pure_integer) 875af69d88dSmrg ntype = V_0280A0_NUMBER_UINT; 87601e04c3fSmrg } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 87701e04c3fSmrg ntype = V_0280A0_NUMBER_FLOAT; 878af69d88dSmrg } 879af69d88dSmrg 88001e04c3fSmrg if (R600_BIG_ENDIAN) 88101e04c3fSmrg do_endian_swap = !rtex->db_compatible; 88201e04c3fSmrg 88301e04c3fSmrg format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format, 88401e04c3fSmrg do_endian_swap); 885af69d88dSmrg assert(format != ~0); 886af69d88dSmrg 88701e04c3fSmrg swap = r600_translate_colorswap(surf->base.format, do_endian_swap); 888af69d88dSmrg assert(swap != ~0); 889af69d88dSmrg 89001e04c3fSmrg endian = r600_colorformat_endian_swap(format, do_endian_swap); 89101e04c3fSmrg 89201e04c3fSmrg /* blend clamp should be set for all NORM/SRGB types */ 89301e04c3fSmrg if (ntype == V_0280A0_NUMBER_UNORM || ntype == V_0280A0_NUMBER_SNORM || 89401e04c3fSmrg ntype == V_0280A0_NUMBER_SRGB) 89501e04c3fSmrg blend_clamp = 1; 896af69d88dSmrg 897af69d88dSmrg /* set blend bypass according to docs if SINT/UINT or 898af69d88dSmrg 8/24 COLOR variants */ 899af69d88dSmrg if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT || 900af69d88dSmrg format == V_0280A0_COLOR_8_24 || format == V_0280A0_COLOR_24_8 || 901af69d88dSmrg format == V_0280A0_COLOR_X24_8_32_FLOAT) { 902af69d88dSmrg blend_clamp = 0; 903af69d88dSmrg blend_bypass = 1; 904af69d88dSmrg } 905af69d88dSmrg 906af69d88dSmrg surf->alphatest_bypass = ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT; 907af69d88dSmrg 908af69d88dSmrg color_info |= S_0280A0_FORMAT(format) | 909af69d88dSmrg S_0280A0_COMP_SWAP(swap) | 910af69d88dSmrg S_0280A0_BLEND_BYPASS(blend_bypass) | 911af69d88dSmrg S_0280A0_BLEND_CLAMP(blend_clamp) | 91201e04c3fSmrg S_0280A0_SIMPLE_FLOAT(1) | 913af69d88dSmrg S_0280A0_NUMBER_TYPE(ntype) | 914af69d88dSmrg S_0280A0_ENDIAN(endian); 915af69d88dSmrg 9167ec681f3Smrg /* EXPORT_NORM is an optimization that can be enabled for better 917af69d88dSmrg * performance in certain cases 918af69d88dSmrg */ 919af69d88dSmrg if (rctx->b.chip_class == R600) { 920af69d88dSmrg /* EXPORT_NORM can be enabled if: 921af69d88dSmrg * - 11-bit or smaller UNORM/SNORM/SRGB 922af69d88dSmrg * - BLEND_CLAMP is enabled 923af69d88dSmrg * - BLEND_FLOAT32 is disabled 924af69d88dSmrg */ 925af69d88dSmrg if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && 926af69d88dSmrg (desc->channel[i].size < 12 && 927af69d88dSmrg desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && 928af69d88dSmrg ntype != V_0280A0_NUMBER_UINT && 929af69d88dSmrg ntype != V_0280A0_NUMBER_SINT) && 930af69d88dSmrg G_0280A0_BLEND_CLAMP(color_info) && 93101e04c3fSmrg /* XXX this condition is always true since BLEND_FLOAT32 is never set (bug?). */ 932af69d88dSmrg !G_0280A0_BLEND_FLOAT32(color_info)) { 933af69d88dSmrg color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); 934af69d88dSmrg surf->export_16bpc = true; 935af69d88dSmrg } 936af69d88dSmrg } else { 937af69d88dSmrg /* EXPORT_NORM can be enabled if: 938af69d88dSmrg * - 11-bit or smaller UNORM/SNORM/SRGB 939af69d88dSmrg * - 16-bit or smaller FLOAT 940af69d88dSmrg */ 941af69d88dSmrg if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && 942af69d88dSmrg ((desc->channel[i].size < 12 && 943af69d88dSmrg desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && 944af69d88dSmrg ntype != V_0280A0_NUMBER_UINT && ntype != V_0280A0_NUMBER_SINT) || 945af69d88dSmrg (desc->channel[i].size < 17 && 946af69d88dSmrg desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) { 947af69d88dSmrg color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); 948af69d88dSmrg surf->export_16bpc = true; 9493464ebd5Sriastradh } 9503464ebd5Sriastradh } 951af69d88dSmrg 952af69d88dSmrg /* These might not always be initialized to zero. */ 953af69d88dSmrg surf->cb_color_base = offset >> 8; 954af69d88dSmrg surf->cb_color_size = S_028060_PITCH_TILE_MAX(pitch) | 955af69d88dSmrg S_028060_SLICE_TILE_MAX(slice); 956af69d88dSmrg surf->cb_color_fmask = surf->cb_color_base; 957af69d88dSmrg surf->cb_color_cmask = surf->cb_color_base; 958af69d88dSmrg surf->cb_color_mask = 0; 959af69d88dSmrg 96001e04c3fSmrg r600_resource_reference(&surf->cb_buffer_cmask, &rtex->resource); 96101e04c3fSmrg r600_resource_reference(&surf->cb_buffer_fmask, &rtex->resource); 962af69d88dSmrg 963af69d88dSmrg if (rtex->cmask.size) { 964af69d88dSmrg surf->cb_color_cmask = rtex->cmask.offset >> 8; 965af69d88dSmrg surf->cb_color_mask |= S_028100_CMASK_BLOCK_MAX(rtex->cmask.slice_tile_max); 966af69d88dSmrg 967af69d88dSmrg if (rtex->fmask.size) { 968af69d88dSmrg color_info |= S_0280A0_TILE_MODE(V_0280A0_FRAG_ENABLE); 969af69d88dSmrg surf->cb_color_fmask = rtex->fmask.offset >> 8; 970af69d88dSmrg surf->cb_color_mask |= S_028100_FMASK_TILE_MAX(rtex->fmask.slice_tile_max); 971af69d88dSmrg } else { /* cmask only */ 972af69d88dSmrg color_info |= S_0280A0_TILE_MODE(V_0280A0_CLEAR_ENABLE); 9733464ebd5Sriastradh } 974af69d88dSmrg } else if (force_cmask_fmask) { 975af69d88dSmrg /* Allocate dummy FMASK and CMASK if they aren't allocated already. 976af69d88dSmrg * 977af69d88dSmrg * R6xx needs FMASK and CMASK for the destination buffer of color resolve, 978af69d88dSmrg * otherwise it hangs. We don't have FMASK and CMASK pre-allocated, 979af69d88dSmrg * because it's not an MSAA buffer. 980af69d88dSmrg */ 981af69d88dSmrg struct r600_cmask_info cmask; 982af69d88dSmrg struct r600_fmask_info fmask; 983af69d88dSmrg 984af69d88dSmrg r600_texture_get_cmask_info(&rscreen->b, rtex, &cmask); 985af69d88dSmrg r600_texture_get_fmask_info(&rscreen->b, rtex, 8, &fmask); 986af69d88dSmrg 987af69d88dSmrg /* CMASK. */ 988af69d88dSmrg if (!rctx->dummy_cmask || 98901e04c3fSmrg rctx->dummy_cmask->b.b.width0 < cmask.size || 9907ec681f3Smrg (1 << rctx->dummy_cmask->buf->alignment_log2) % cmask.alignment != 0) { 991af69d88dSmrg struct pipe_transfer *transfer; 992af69d88dSmrg void *ptr; 993af69d88dSmrg 99401e04c3fSmrg r600_resource_reference(&rctx->dummy_cmask, NULL); 99501e04c3fSmrg rctx->dummy_cmask = (struct r600_resource*) 99601e04c3fSmrg r600_aligned_buffer_create(&rscreen->b.b, 0, 99701e04c3fSmrg PIPE_USAGE_DEFAULT, 99801e04c3fSmrg cmask.size, cmask.alignment); 99901e04c3fSmrg 100001e04c3fSmrg if (unlikely(!rctx->dummy_cmask)) { 100101e04c3fSmrg surf->color_initialized = false; 100201e04c3fSmrg return; 100301e04c3fSmrg } 1004af69d88dSmrg 1005af69d88dSmrg /* Set the contents to 0xCC. */ 10067ec681f3Smrg ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_MAP_WRITE, &transfer); 1007af69d88dSmrg memset(ptr, 0xCC, cmask.size); 1008af69d88dSmrg pipe_buffer_unmap(&rctx->b.b, transfer); 1009af69d88dSmrg } 101001e04c3fSmrg r600_resource_reference(&surf->cb_buffer_cmask, rctx->dummy_cmask); 1011af69d88dSmrg 1012af69d88dSmrg /* FMASK. */ 1013af69d88dSmrg if (!rctx->dummy_fmask || 101401e04c3fSmrg rctx->dummy_fmask->b.b.width0 < fmask.size || 10157ec681f3Smrg (1 << rctx->dummy_fmask->buf->alignment_log2) % fmask.alignment != 0) { 101601e04c3fSmrg r600_resource_reference(&rctx->dummy_fmask, NULL); 101701e04c3fSmrg rctx->dummy_fmask = (struct r600_resource*) 101801e04c3fSmrg r600_aligned_buffer_create(&rscreen->b.b, 0, 101901e04c3fSmrg PIPE_USAGE_DEFAULT, 102001e04c3fSmrg fmask.size, fmask.alignment); 1021af69d88dSmrg 102201e04c3fSmrg if (unlikely(!rctx->dummy_fmask)) { 102301e04c3fSmrg surf->color_initialized = false; 102401e04c3fSmrg return; 102501e04c3fSmrg } 1026af69d88dSmrg } 102701e04c3fSmrg r600_resource_reference(&surf->cb_buffer_fmask, rctx->dummy_fmask); 1028af69d88dSmrg 1029af69d88dSmrg /* Init the registers. */ 1030af69d88dSmrg color_info |= S_0280A0_TILE_MODE(V_0280A0_FRAG_ENABLE); 1031af69d88dSmrg surf->cb_color_cmask = 0; 1032af69d88dSmrg surf->cb_color_fmask = 0; 1033af69d88dSmrg surf->cb_color_mask = S_028100_CMASK_BLOCK_MAX(cmask.slice_tile_max) | 1034af69d88dSmrg S_028100_FMASK_TILE_MAX(fmask.slice_tile_max); 10353464ebd5Sriastradh } 1036af69d88dSmrg 1037af69d88dSmrg surf->cb_color_info = color_info; 1038af69d88dSmrg surf->cb_color_view = color_view; 1039af69d88dSmrg surf->color_initialized = true; 10403464ebd5Sriastradh} 10413464ebd5Sriastradh 1042af69d88dSmrgstatic void r600_init_depth_surface(struct r600_context *rctx, 1043af69d88dSmrg struct r600_surface *surf) 10443464ebd5Sriastradh{ 1045af69d88dSmrg struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1046af69d88dSmrg unsigned level, pitch, slice, format, offset, array_mode; 10473464ebd5Sriastradh 1048af69d88dSmrg level = surf->base.u.tex.level; 10497ec681f3Smrg offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; 105001e04c3fSmrg pitch = rtex->surface.u.legacy.level[level].nblk_x / 8 - 1; 105101e04c3fSmrg slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64; 1052af69d88dSmrg if (slice) { 1053af69d88dSmrg slice = slice - 1; 1054af69d88dSmrg } 105501e04c3fSmrg switch (rtex->surface.u.legacy.level[level].mode) { 1056af69d88dSmrg case RADEON_SURF_MODE_2D: 1057af69d88dSmrg array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; 1058af69d88dSmrg break; 1059af69d88dSmrg case RADEON_SURF_MODE_1D: 1060af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1061af69d88dSmrg default: 1062af69d88dSmrg array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; 1063af69d88dSmrg break; 1064af69d88dSmrg } 10653464ebd5Sriastradh 1066af69d88dSmrg format = r600_translate_dbformat(surf->base.format); 1067af69d88dSmrg assert(format != ~0); 1068af69d88dSmrg 1069af69d88dSmrg surf->db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format); 1070af69d88dSmrg surf->db_depth_base = offset >> 8; 1071af69d88dSmrg surf->db_depth_view = S_028004_SLICE_START(surf->base.u.tex.first_layer) | 1072af69d88dSmrg S_028004_SLICE_MAX(surf->base.u.tex.last_layer); 1073af69d88dSmrg surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); 107401e04c3fSmrg surf->db_prefetch_limit = (rtex->surface.u.legacy.level[level].nblk_y / 8) - 1; 1075af69d88dSmrg 107601e04c3fSmrg if (r600_htile_enabled(rtex, level)) { 107701e04c3fSmrg surf->db_htile_data_base = rtex->htile_offset >> 8; 1078af69d88dSmrg surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) | 107901e04c3fSmrg S_028D24_HTILE_HEIGHT(1) | 108001e04c3fSmrg S_028D24_FULL_CACHE(1); 1081af69d88dSmrg /* preload is not working properly on r6xx/r7xx */ 1082af69d88dSmrg surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1); 1083af69d88dSmrg } 1084af69d88dSmrg 1085af69d88dSmrg surf->depth_initialized = true; 10863464ebd5Sriastradh} 10873464ebd5Sriastradh 1088af69d88dSmrgstatic void r600_set_framebuffer_state(struct pipe_context *ctx, 1089af69d88dSmrg const struct pipe_framebuffer_state *state) 10903464ebd5Sriastradh{ 1091af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 1092af69d88dSmrg struct r600_surface *surf; 1093af69d88dSmrg struct r600_texture *rtex; 1094af69d88dSmrg unsigned i; 109501e04c3fSmrg uint32_t target_mask = 0; 1096af69d88dSmrg 109701e04c3fSmrg /* Flush TC when changing the framebuffer state, because the only 109801e04c3fSmrg * client not using TC that can change textures is the framebuffer. 109901e04c3fSmrg * Other places don't typically have to flush TC. 110001e04c3fSmrg */ 110101e04c3fSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | 110201e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV | 110301e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_CB | 110401e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_CB_META | 110501e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_DB | 110601e04c3fSmrg R600_CONTEXT_FLUSH_AND_INV_DB_META | 110701e04c3fSmrg R600_CONTEXT_INV_TEX_CACHE; 1108af69d88dSmrg 1109af69d88dSmrg /* Set the new state. */ 1110af69d88dSmrg util_copy_framebuffer_state(&rctx->framebuffer.state, state); 1111af69d88dSmrg 1112af69d88dSmrg rctx->framebuffer.export_16bpc = state->nr_cbufs != 0; 1113af69d88dSmrg rctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 1114af69d88dSmrg util_format_is_pure_integer(state->cbufs[0]->format); 1115af69d88dSmrg rctx->framebuffer.compressed_cb_mask = 0; 1116af69d88dSmrg rctx->framebuffer.is_msaa_resolve = state->nr_cbufs == 2 && 1117af69d88dSmrg state->cbufs[0] && state->cbufs[1] && 1118af69d88dSmrg state->cbufs[0]->texture->nr_samples > 1 && 1119af69d88dSmrg state->cbufs[1]->texture->nr_samples <= 1; 1120af69d88dSmrg rctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 1121af69d88dSmrg 1122af69d88dSmrg /* Colorbuffers. */ 1123af69d88dSmrg for (i = 0; i < state->nr_cbufs; i++) { 1124af69d88dSmrg /* The resolve buffer must have CMASK and FMASK to prevent hardlocks on R6xx. */ 1125af69d88dSmrg bool force_cmask_fmask = rctx->b.chip_class == R600 && 1126af69d88dSmrg rctx->framebuffer.is_msaa_resolve && 1127af69d88dSmrg i == 1; 1128af69d88dSmrg 1129af69d88dSmrg surf = (struct r600_surface*)state->cbufs[i]; 1130af69d88dSmrg if (!surf) 1131af69d88dSmrg continue; 1132af69d88dSmrg 1133af69d88dSmrg rtex = (struct r600_texture*)surf->base.texture; 1134af69d88dSmrg r600_context_add_resource_size(ctx, state->cbufs[i]->texture); 1135af69d88dSmrg 113601e04c3fSmrg target_mask |= (0xf << (i * 4)); 113701e04c3fSmrg 1138af69d88dSmrg if (!surf->color_initialized || force_cmask_fmask) { 1139af69d88dSmrg r600_init_color_surface(rctx, surf, force_cmask_fmask); 1140af69d88dSmrg if (force_cmask_fmask) { 1141af69d88dSmrg /* re-initialize later without compression */ 1142af69d88dSmrg surf->color_initialized = false; 1143af69d88dSmrg } 1144af69d88dSmrg } 1145af69d88dSmrg 1146af69d88dSmrg if (!surf->export_16bpc) { 1147af69d88dSmrg rctx->framebuffer.export_16bpc = false; 1148af69d88dSmrg } 1149af69d88dSmrg 115001e04c3fSmrg if (rtex->fmask.size) { 1151af69d88dSmrg rctx->framebuffer.compressed_cb_mask |= 1 << i; 1152af69d88dSmrg } 1153af69d88dSmrg } 1154af69d88dSmrg 1155af69d88dSmrg /* Update alpha-test state dependencies. 1156af69d88dSmrg * Alpha-test is done on the first colorbuffer only. */ 1157af69d88dSmrg if (state->nr_cbufs) { 1158af69d88dSmrg bool alphatest_bypass = false; 11593464ebd5Sriastradh 1160af69d88dSmrg surf = (struct r600_surface*)state->cbufs[0]; 1161af69d88dSmrg if (surf) { 1162af69d88dSmrg alphatest_bypass = surf->alphatest_bypass; 1163af69d88dSmrg } 11643464ebd5Sriastradh 1165af69d88dSmrg if (rctx->alphatest_state.bypass != alphatest_bypass) { 1166af69d88dSmrg rctx->alphatest_state.bypass = alphatest_bypass; 116701e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1168af69d88dSmrg } 1169af69d88dSmrg } 1170af69d88dSmrg 1171af69d88dSmrg /* ZS buffer. */ 1172af69d88dSmrg if (state->zsbuf) { 1173af69d88dSmrg surf = (struct r600_surface*)state->zsbuf; 11743464ebd5Sriastradh 1175af69d88dSmrg r600_context_add_resource_size(ctx, state->zsbuf->texture); 1176af69d88dSmrg 1177af69d88dSmrg if (!surf->depth_initialized) { 1178af69d88dSmrg r600_init_depth_surface(rctx, surf); 1179af69d88dSmrg } 1180af69d88dSmrg 1181af69d88dSmrg if (state->zsbuf->format != rctx->poly_offset_state.zs_format) { 1182af69d88dSmrg rctx->poly_offset_state.zs_format = state->zsbuf->format; 118301e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); 1184af69d88dSmrg } 1185af69d88dSmrg 1186af69d88dSmrg if (rctx->db_state.rsurf != surf) { 1187af69d88dSmrg rctx->db_state.rsurf = surf; 118801e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 118901e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 1190af69d88dSmrg } 1191af69d88dSmrg } else if (rctx->db_state.rsurf) { 1192af69d88dSmrg rctx->db_state.rsurf = NULL; 119301e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_state.atom); 119401e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 11953464ebd5Sriastradh } 11963464ebd5Sriastradh 119701e04c3fSmrg if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs || 119801e04c3fSmrg rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) { 119901e04c3fSmrg rctx->cb_misc_state.bound_cbufs_target_mask = target_mask; 1200af69d88dSmrg rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; 120101e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); 1202af69d88dSmrg } 1203af69d88dSmrg 1204af69d88dSmrg if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) { 1205af69d88dSmrg rctx->alphatest_state.bypass = false; 120601e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); 1207af69d88dSmrg } 1208af69d88dSmrg 1209af69d88dSmrg /* Calculate the CS size. */ 1210af69d88dSmrg rctx->framebuffer.atom.num_dw = 1211af69d88dSmrg 10 /*COLOR_INFO*/ + 4 /*SCISSOR*/ + 3 /*SHADER_CONTROL*/ + 8 /*MSAA*/; 1212af69d88dSmrg 1213af69d88dSmrg if (rctx->framebuffer.state.nr_cbufs) { 1214af69d88dSmrg rctx->framebuffer.atom.num_dw += 15 * rctx->framebuffer.state.nr_cbufs; 1215af69d88dSmrg rctx->framebuffer.atom.num_dw += 3 * (2 + rctx->framebuffer.state.nr_cbufs); 1216af69d88dSmrg } 1217af69d88dSmrg if (rctx->framebuffer.state.zsbuf) { 1218af69d88dSmrg rctx->framebuffer.atom.num_dw += 16; 1219af69d88dSmrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1220af69d88dSmrg rctx->framebuffer.atom.num_dw += 3; 1221af69d88dSmrg } 1222af69d88dSmrg if (rctx->b.family > CHIP_R600 && rctx->b.family < CHIP_RV770) { 1223af69d88dSmrg rctx->framebuffer.atom.num_dw += 2; 1224af69d88dSmrg } 1225af69d88dSmrg 122601e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); 122701e04c3fSmrg 122801e04c3fSmrg r600_set_sample_locations_constant_buffer(rctx); 122901e04c3fSmrg rctx->framebuffer.do_update_surf_dirtiness = true; 12303464ebd5Sriastradh} 12313464ebd5Sriastradh 123201e04c3fSmrgstatic const uint32_t sample_locs_2x[] = { 1233af69d88dSmrg FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 1234af69d88dSmrg FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 1235af69d88dSmrg}; 123601e04c3fSmrgstatic const unsigned max_dist_2x = 4; 1237af69d88dSmrg 123801e04c3fSmrgstatic const uint32_t sample_locs_4x[] = { 1239af69d88dSmrg FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 1240af69d88dSmrg FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 1241af69d88dSmrg}; 124201e04c3fSmrgstatic const unsigned max_dist_4x = 6; 124301e04c3fSmrgstatic const uint32_t sample_locs_8x[] = { 1244af69d88dSmrg FILL_SREG(-1, 1, 1, 5, 3, -5, 5, 3), 1245af69d88dSmrg FILL_SREG(-7, -1, -3, -7, 7, -3, -5, 7), 1246af69d88dSmrg}; 124701e04c3fSmrgstatic const unsigned max_dist_8x = 7; 1248af69d88dSmrg 1249af69d88dSmrgstatic void r600_get_sample_position(struct pipe_context *ctx, 1250af69d88dSmrg unsigned sample_count, 1251af69d88dSmrg unsigned sample_index, 1252af69d88dSmrg float *out_value) 12533464ebd5Sriastradh{ 1254af69d88dSmrg int offset, index; 1255af69d88dSmrg struct { 1256af69d88dSmrg int idx:4; 1257af69d88dSmrg } val; 1258af69d88dSmrg switch (sample_count) { 1259af69d88dSmrg case 1: 1260af69d88dSmrg default: 1261af69d88dSmrg out_value[0] = out_value[1] = 0.5; 1262af69d88dSmrg break; 1263af69d88dSmrg case 2: 1264af69d88dSmrg offset = 4 * (sample_index * 2); 1265af69d88dSmrg val.idx = (sample_locs_2x[0] >> offset) & 0xf; 1266af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1267af69d88dSmrg val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf; 1268af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1269af69d88dSmrg break; 1270af69d88dSmrg case 4: 1271af69d88dSmrg offset = 4 * (sample_index * 2); 1272af69d88dSmrg val.idx = (sample_locs_4x[0] >> offset) & 0xf; 1273af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1274af69d88dSmrg val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf; 1275af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1276af69d88dSmrg break; 1277af69d88dSmrg case 8: 1278af69d88dSmrg offset = 4 * (sample_index % 4 * 2); 1279af69d88dSmrg index = (sample_index / 4); 1280af69d88dSmrg val.idx = (sample_locs_8x[index] >> offset) & 0xf; 1281af69d88dSmrg out_value[0] = (float)(val.idx + 8) / 16.0f; 1282af69d88dSmrg val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf; 1283af69d88dSmrg out_value[1] = (float)(val.idx + 8) / 16.0f; 1284af69d88dSmrg break; 1285af69d88dSmrg } 1286af69d88dSmrg} 12873464ebd5Sriastradh 1288af69d88dSmrgstatic void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) 1289af69d88dSmrg{ 12907ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1291af69d88dSmrg unsigned max_dist = 0; 12923464ebd5Sriastradh 1293af69d88dSmrg if (rctx->b.family == CHIP_R600) { 1294af69d88dSmrg switch (nr_samples) { 1295af69d88dSmrg default: 1296af69d88dSmrg nr_samples = 0; 1297af69d88dSmrg break; 1298af69d88dSmrg case 2: 129901e04c3fSmrg radeon_set_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]); 1300af69d88dSmrg max_dist = max_dist_2x; 1301af69d88dSmrg break; 1302af69d88dSmrg case 4: 130301e04c3fSmrg radeon_set_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]); 1304af69d88dSmrg max_dist = max_dist_4x; 1305af69d88dSmrg break; 1306af69d88dSmrg case 8: 130701e04c3fSmrg radeon_set_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2); 1308af69d88dSmrg radeon_emit(cs, sample_locs_8x[0]); /* R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0 */ 1309af69d88dSmrg radeon_emit(cs, sample_locs_8x[1]); /* R_008B4C_PA_SC_AA_SAMPLE_LOCS_8S_WD1 */ 1310af69d88dSmrg max_dist = max_dist_8x; 1311af69d88dSmrg break; 1312af69d88dSmrg } 1313af69d88dSmrg } else { 1314af69d88dSmrg switch (nr_samples) { 1315af69d88dSmrg default: 131601e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2); 1317af69d88dSmrg radeon_emit(cs, 0); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */ 1318af69d88dSmrg radeon_emit(cs, 0); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */ 1319af69d88dSmrg nr_samples = 0; 1320af69d88dSmrg break; 1321af69d88dSmrg case 2: 132201e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2); 1323af69d88dSmrg radeon_emit(cs, sample_locs_2x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */ 1324af69d88dSmrg radeon_emit(cs, sample_locs_2x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */ 1325af69d88dSmrg max_dist = max_dist_2x; 1326af69d88dSmrg break; 1327af69d88dSmrg case 4: 132801e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2); 1329af69d88dSmrg radeon_emit(cs, sample_locs_4x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */ 1330af69d88dSmrg radeon_emit(cs, sample_locs_4x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */ 1331af69d88dSmrg max_dist = max_dist_4x; 1332af69d88dSmrg break; 1333af69d88dSmrg case 8: 133401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2); 1335af69d88dSmrg radeon_emit(cs, sample_locs_8x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */ 1336af69d88dSmrg radeon_emit(cs, sample_locs_8x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */ 1337af69d88dSmrg max_dist = max_dist_8x; 1338af69d88dSmrg break; 1339af69d88dSmrg } 13403464ebd5Sriastradh } 13413464ebd5Sriastradh 1342af69d88dSmrg if (nr_samples > 1) { 134301e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1344af69d88dSmrg radeon_emit(cs, S_028C00_LAST_PIXEL(1) | 1345af69d88dSmrg S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1346af69d88dSmrg radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) | 1347af69d88dSmrg S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */ 1348af69d88dSmrg } else { 134901e04c3fSmrg radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); 1350af69d88dSmrg radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */ 1351af69d88dSmrg radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */ 1352af69d88dSmrg } 13533464ebd5Sriastradh} 13543464ebd5Sriastradh 1355af69d88dSmrgstatic void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) 13563464ebd5Sriastradh{ 13577ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1358af69d88dSmrg struct pipe_framebuffer_state *state = &rctx->framebuffer.state; 1359af69d88dSmrg unsigned nr_cbufs = state->nr_cbufs; 1360af69d88dSmrg struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0]; 1361af69d88dSmrg unsigned i, sbu = 0; 1362af69d88dSmrg 1363af69d88dSmrg /* Colorbuffers. */ 136401e04c3fSmrg radeon_set_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8); 1365af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1366af69d88dSmrg radeon_emit(cs, cb[i] ? cb[i]->cb_color_info : 0); 1367af69d88dSmrg } 1368af69d88dSmrg /* set CB_COLOR1_INFO for possible dual-src blending */ 136901e04c3fSmrg if (rctx->framebuffer.dual_src_blend && i == 1 && cb[0]) { 1370af69d88dSmrg radeon_emit(cs, cb[0]->cb_color_info); 1371af69d88dSmrg i++; 1372af69d88dSmrg } 1373af69d88dSmrg for (; i < 8; i++) { 1374af69d88dSmrg radeon_emit(cs, 0); 1375af69d88dSmrg } 13763464ebd5Sriastradh 1377af69d88dSmrg if (nr_cbufs) { 1378af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1379af69d88dSmrg unsigned reloc; 1380af69d88dSmrg 1381af69d88dSmrg if (!cb[i]) 1382af69d88dSmrg continue; 1383af69d88dSmrg 1384af69d88dSmrg /* COLOR_BASE */ 138501e04c3fSmrg radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base); 1386af69d88dSmrg 138701e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, 138801e04c3fSmrg &rctx->b.gfx, 1389af69d88dSmrg (struct r600_resource*)cb[i]->base.texture, 1390af69d88dSmrg RADEON_USAGE_READWRITE, 1391af69d88dSmrg cb[i]->base.texture->nr_samples > 1 ? 1392af69d88dSmrg RADEON_PRIO_COLOR_BUFFER_MSAA : 1393af69d88dSmrg RADEON_PRIO_COLOR_BUFFER); 1394af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1395af69d88dSmrg radeon_emit(cs, reloc); 1396af69d88dSmrg 1397af69d88dSmrg /* FMASK */ 139801e04c3fSmrg radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask); 1399af69d88dSmrg 140001e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, 140101e04c3fSmrg &rctx->b.gfx, 1402af69d88dSmrg cb[i]->cb_buffer_fmask, 1403af69d88dSmrg RADEON_USAGE_READWRITE, 1404af69d88dSmrg cb[i]->base.texture->nr_samples > 1 ? 1405af69d88dSmrg RADEON_PRIO_COLOR_BUFFER_MSAA : 1406af69d88dSmrg RADEON_PRIO_COLOR_BUFFER); 1407af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1408af69d88dSmrg radeon_emit(cs, reloc); 1409af69d88dSmrg 1410af69d88dSmrg /* CMASK */ 141101e04c3fSmrg radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask); 1412af69d88dSmrg 141301e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, 141401e04c3fSmrg &rctx->b.gfx, 1415af69d88dSmrg cb[i]->cb_buffer_cmask, 1416af69d88dSmrg RADEON_USAGE_READWRITE, 1417af69d88dSmrg cb[i]->base.texture->nr_samples > 1 ? 1418af69d88dSmrg RADEON_PRIO_COLOR_BUFFER_MSAA : 1419af69d88dSmrg RADEON_PRIO_COLOR_BUFFER); 1420af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1421af69d88dSmrg radeon_emit(cs, reloc); 1422af69d88dSmrg } 14233464ebd5Sriastradh 142401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs); 1425af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1426af69d88dSmrg radeon_emit(cs, cb[i] ? cb[i]->cb_color_size : 0); 1427af69d88dSmrg } 1428af69d88dSmrg 142901e04c3fSmrg radeon_set_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs); 1430af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1431af69d88dSmrg radeon_emit(cs, cb[i] ? cb[i]->cb_color_view : 0); 1432af69d88dSmrg } 1433af69d88dSmrg 143401e04c3fSmrg radeon_set_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs); 1435af69d88dSmrg for (i = 0; i < nr_cbufs; i++) { 1436af69d88dSmrg radeon_emit(cs, cb[i] ? cb[i]->cb_color_mask : 0); 1437af69d88dSmrg } 1438af69d88dSmrg 1439af69d88dSmrg sbu |= SURFACE_BASE_UPDATE_COLOR_NUM(nr_cbufs); 1440af69d88dSmrg } 1441af69d88dSmrg 1442af69d88dSmrg /* SURFACE_BASE_UPDATE */ 1443af69d88dSmrg if (rctx->b.family > CHIP_R600 && rctx->b.family < CHIP_RV770 && sbu) { 1444af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); 1445af69d88dSmrg radeon_emit(cs, sbu); 1446af69d88dSmrg sbu = 0; 1447af69d88dSmrg } 1448af69d88dSmrg 1449af69d88dSmrg /* Zbuffer. */ 1450af69d88dSmrg if (state->zsbuf) { 1451af69d88dSmrg struct r600_surface *surf = (struct r600_surface*)state->zsbuf; 145201e04c3fSmrg unsigned reloc = radeon_add_to_buffer_list(&rctx->b, 145301e04c3fSmrg &rctx->b.gfx, 1454af69d88dSmrg (struct r600_resource*)state->zsbuf->texture, 1455af69d88dSmrg RADEON_USAGE_READWRITE, 1456af69d88dSmrg surf->base.texture->nr_samples > 1 ? 1457af69d88dSmrg RADEON_PRIO_DEPTH_BUFFER_MSAA : 1458af69d88dSmrg RADEON_PRIO_DEPTH_BUFFER); 1459af69d88dSmrg 146001e04c3fSmrg radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2); 1461af69d88dSmrg radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE */ 1462af69d88dSmrg radeon_emit(cs, surf->db_depth_view); /* R_028004_DB_DEPTH_VIEW */ 146301e04c3fSmrg radeon_set_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2); 1464af69d88dSmrg radeon_emit(cs, surf->db_depth_base); /* R_02800C_DB_DEPTH_BASE */ 1465af69d88dSmrg radeon_emit(cs, surf->db_depth_info); /* R_028010_DB_DEPTH_INFO */ 1466af69d88dSmrg 1467af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1468af69d88dSmrg radeon_emit(cs, reloc); 1469af69d88dSmrg 147001e04c3fSmrg radeon_set_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit); 1471af69d88dSmrg 1472af69d88dSmrg sbu |= SURFACE_BASE_UPDATE_DEPTH; 1473af69d88dSmrg } else if (rctx->screen->b.info.drm_minor >= 18) { 1474af69d88dSmrg /* DRM 2.6.18 allows the INVALID format to disable depth/stencil. 1475af69d88dSmrg * Older kernels are out of luck. */ 147601e04c3fSmrg radeon_set_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID)); 1477af69d88dSmrg } 1478af69d88dSmrg 1479af69d88dSmrg /* SURFACE_BASE_UPDATE */ 1480af69d88dSmrg if (rctx->b.family > CHIP_R600 && rctx->b.family < CHIP_RV770 && sbu) { 1481af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); 1482af69d88dSmrg radeon_emit(cs, sbu); 1483af69d88dSmrg sbu = 0; 1484af69d88dSmrg } 1485af69d88dSmrg 1486af69d88dSmrg /* Framebuffer dimensions. */ 148701e04c3fSmrg radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2); 1488af69d88dSmrg radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) | 1489af69d88dSmrg S_028240_WINDOW_OFFSET_DISABLE(1)); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */ 1490af69d88dSmrg radeon_emit(cs, S_028244_BR_X(state->width) | 1491af69d88dSmrg S_028244_BR_Y(state->height)); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */ 1492af69d88dSmrg 1493af69d88dSmrg if (rctx->framebuffer.is_msaa_resolve) { 149401e04c3fSmrg radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1); 1495af69d88dSmrg } else { 1496af69d88dSmrg /* Always enable the first colorbuffer in CB_SHADER_CONTROL. This 1497af69d88dSmrg * will assure that the alpha-test will work even if there is 1498af69d88dSmrg * no colorbuffer bound. */ 149901e04c3fSmrg radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1500af69d88dSmrg (1ull << MAX2(nr_cbufs, 1)) - 1); 1501af69d88dSmrg } 1502af69d88dSmrg 1503af69d88dSmrg r600_emit_msaa_state(rctx, rctx->framebuffer.nr_samples); 15043464ebd5Sriastradh} 15053464ebd5Sriastradh 150601e04c3fSmrgstatic void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 150701e04c3fSmrg{ 150801e04c3fSmrg struct r600_context *rctx = (struct r600_context *)ctx; 150901e04c3fSmrg 151001e04c3fSmrg if (rctx->ps_iter_samples == min_samples) 151101e04c3fSmrg return; 151201e04c3fSmrg 151301e04c3fSmrg rctx->ps_iter_samples = min_samples; 151401e04c3fSmrg if (rctx->framebuffer.nr_samples > 1) { 151501e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom); 151601e04c3fSmrg if (rctx->b.chip_class == R600) 151701e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 151801e04c3fSmrg } 151901e04c3fSmrg} 152001e04c3fSmrg 1521af69d88dSmrgstatic void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) 15223464ebd5Sriastradh{ 15237ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1524af69d88dSmrg struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; 1525af69d88dSmrg 1526af69d88dSmrg if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { 152701e04c3fSmrg radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); 1528af69d88dSmrg if (rctx->b.chip_class == R600) { 1529af69d88dSmrg radeon_emit(cs, 0xff); /* R_028238_CB_TARGET_MASK */ 1530af69d88dSmrg radeon_emit(cs, 0xff); /* R_02823C_CB_SHADER_MASK */ 1531af69d88dSmrg } else { 1532af69d88dSmrg radeon_emit(cs, 0xf); /* R_028238_CB_TARGET_MASK */ 1533af69d88dSmrg radeon_emit(cs, 0xf); /* R_02823C_CB_SHADER_MASK */ 1534af69d88dSmrg } 153501e04c3fSmrg radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control); 1536af69d88dSmrg } else { 153701e04c3fSmrg unsigned fb_colormask = a->bound_cbufs_target_mask; 153801e04c3fSmrg unsigned ps_colormask = a->ps_color_export_mask; 1539af69d88dSmrg unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1; 1540af69d88dSmrg 154101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); 1542af69d88dSmrg radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */ 1543af69d88dSmrg /* Always enable the first color output to make sure alpha-test works even without one. */ 1544af69d88dSmrg radeon_emit(cs, 0xf | (multiwrite ? fb_colormask : ps_colormask)); /* R_02823C_CB_SHADER_MASK */ 154501e04c3fSmrg radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, 1546af69d88dSmrg a->cb_color_control | 1547af69d88dSmrg S_028808_MULTIWRITE_ENABLE(multiwrite)); 1548af69d88dSmrg } 15493464ebd5Sriastradh} 15503464ebd5Sriastradh 1551af69d88dSmrgstatic void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) 15523464ebd5Sriastradh{ 15537ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1554af69d88dSmrg struct r600_db_state *a = (struct r600_db_state*)atom; 1555af69d88dSmrg 1556af69d88dSmrg if (a->rsurf && a->rsurf->db_htile_surface) { 1557af69d88dSmrg struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture; 1558af69d88dSmrg unsigned reloc_idx; 1559af69d88dSmrg 156001e04c3fSmrg radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 156101e04c3fSmrg radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); 156201e04c3fSmrg radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); 156301e04c3fSmrg reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource, 156401e04c3fSmrg RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); 156501e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 156601e04c3fSmrg radeon_emit(cs, reloc_idx); 1567af69d88dSmrg } else { 156801e04c3fSmrg radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0); 1569af69d88dSmrg } 15703464ebd5Sriastradh} 15713464ebd5Sriastradh 1572af69d88dSmrgstatic void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) 15733464ebd5Sriastradh{ 15747ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1575af69d88dSmrg struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; 1576af69d88dSmrg unsigned db_render_control = 0; 1577af69d88dSmrg unsigned db_render_override = 1578af69d88dSmrg S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | 1579af69d88dSmrg S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); 15803464ebd5Sriastradh 158101e04c3fSmrg if (rctx->b.chip_class >= R700) { 158201e04c3fSmrg switch (a->ps_conservative_z) { 158301e04c3fSmrg default: /* fall through */ 158401e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_ANY: 158501e04c3fSmrg db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_ANY_Z); 158601e04c3fSmrg break; 158701e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_GREATER: 158801e04c3fSmrg db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_GREATER_THAN_Z); 158901e04c3fSmrg break; 159001e04c3fSmrg case TGSI_FS_DEPTH_LAYOUT_LESS: 159101e04c3fSmrg db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_LESS_THAN_Z); 159201e04c3fSmrg break; 159301e04c3fSmrg } 159401e04c3fSmrg } 159501e04c3fSmrg 159601e04c3fSmrg if (rctx->b.num_occlusion_queries > 0 && 159701e04c3fSmrg !a->occlusion_queries_disabled) { 1598af69d88dSmrg if (rctx->b.chip_class >= R700) { 1599af69d88dSmrg db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); 1600af69d88dSmrg } 1601af69d88dSmrg db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); 160201e04c3fSmrg } else { 160301e04c3fSmrg db_render_control |= S_028D0C_ZPASS_INCREMENT_DISABLE(1); 1604af69d88dSmrg } 160501e04c3fSmrg 1606af69d88dSmrg if (rctx->db_state.rsurf && rctx->db_state.rsurf->db_htile_surface) { 1607af69d88dSmrg /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */ 1608af69d88dSmrg db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF); 1609af69d88dSmrg /* This is to fix a lockup when hyperz and alpha test are enabled at 1610af69d88dSmrg * the same time somehow GPU get confuse on which order to pick for 1611af69d88dSmrg * z test 1612af69d88dSmrg */ 1613af69d88dSmrg if (rctx->alphatest_state.sx_alpha_test_control) { 1614af69d88dSmrg db_render_override |= S_028D10_FORCE_SHADER_Z_ORDER(1); 1615af69d88dSmrg } 1616af69d88dSmrg } else { 1617af69d88dSmrg db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); 1618af69d88dSmrg } 161901e04c3fSmrg if (rctx->b.chip_class == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) { 162001e04c3fSmrg /* sample shading and hyperz causes lockups on R6xx chips */ 162101e04c3fSmrg db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); 162201e04c3fSmrg } 1623af69d88dSmrg if (a->flush_depthstencil_through_cb) { 1624af69d88dSmrg assert(a->copy_depth || a->copy_stencil); 1625af69d88dSmrg 1626af69d88dSmrg db_render_control |= S_028D0C_DEPTH_COPY_ENABLE(a->copy_depth) | 1627af69d88dSmrg S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) | 1628af69d88dSmrg S_028D0C_COPY_CENTROID(1) | 1629af69d88dSmrg S_028D0C_COPY_SAMPLE(a->copy_sample); 163001e04c3fSmrg 163101e04c3fSmrg if (rctx->b.chip_class == R600) 163201e04c3fSmrg db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); 163301e04c3fSmrg 163401e04c3fSmrg if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || 163501e04c3fSmrg rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635) 163601e04c3fSmrg db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); 163701e04c3fSmrg } else if (a->flush_depth_inplace || a->flush_stencil_inplace) { 163801e04c3fSmrg db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) | 163901e04c3fSmrg S_028D0C_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace); 1640af69d88dSmrg db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); 1641af69d88dSmrg } 1642af69d88dSmrg if (a->htile_clear) { 1643af69d88dSmrg db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1); 1644af69d88dSmrg } 16453464ebd5Sriastradh 1646af69d88dSmrg /* RV770 workaround for a hang with 8x MSAA. */ 1647af69d88dSmrg if (rctx->b.family == CHIP_RV770 && a->log_samples == 3) { 1648af69d88dSmrg db_render_override |= S_028D10_MAX_TILES_IN_DTT(6); 1649af69d88dSmrg } 1650af69d88dSmrg 165101e04c3fSmrg radeon_set_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2); 1652af69d88dSmrg radeon_emit(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */ 1653af69d88dSmrg radeon_emit(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */ 165401e04c3fSmrg radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control); 1655af69d88dSmrg} 16563464ebd5Sriastradh 1657af69d88dSmrgstatic void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) 1658af69d88dSmrg{ 16597ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1660af69d88dSmrg struct r600_config_state *a = (struct r600_config_state*)atom; 1661af69d88dSmrg 166201e04c3fSmrg radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1); 166301e04c3fSmrg radeon_set_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2); 1664af69d88dSmrg} 16653464ebd5Sriastradh 1666af69d88dSmrgstatic void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) 1667af69d88dSmrg{ 16687ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1669af69d88dSmrg uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; 1670af69d88dSmrg 1671af69d88dSmrg while (dirty_mask) { 1672af69d88dSmrg struct pipe_vertex_buffer *vb; 1673af69d88dSmrg struct r600_resource *rbuffer; 1674af69d88dSmrg unsigned offset; 1675af69d88dSmrg unsigned buffer_index = u_bit_scan(&dirty_mask); 1676af69d88dSmrg 1677af69d88dSmrg vb = &rctx->vertex_buffer_state.vb[buffer_index]; 167801e04c3fSmrg rbuffer = (struct r600_resource*)vb->buffer.resource; 1679af69d88dSmrg assert(rbuffer); 1680af69d88dSmrg 1681af69d88dSmrg offset = vb->buffer_offset; 1682af69d88dSmrg 168301e04c3fSmrg /* fetch resources start at index 320 (OFFSET_FS) */ 1684af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); 168501e04c3fSmrg radeon_emit(cs, (R600_FETCH_CONSTANTS_OFFSET_FS + buffer_index) * 7); 1686af69d88dSmrg radeon_emit(cs, offset); /* RESOURCEi_WORD0 */ 168701e04c3fSmrg radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */ 1688af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 1689af69d88dSmrg S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | 1690af69d88dSmrg S_038008_STRIDE(vb->stride)); 1691af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD3 */ 1692af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 1693af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 1694af69d88dSmrg radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ 1695af69d88dSmrg 1696af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 169701e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 169801e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); 1699af69d88dSmrg } 17003464ebd5Sriastradh} 17013464ebd5Sriastradh 1702af69d88dSmrgstatic void r600_emit_constant_buffers(struct r600_context *rctx, 1703af69d88dSmrg struct r600_constbuf_state *state, 1704af69d88dSmrg unsigned buffer_id_base, 1705af69d88dSmrg unsigned reg_alu_constbuf_size, 1706af69d88dSmrg unsigned reg_alu_const_cache) 17073464ebd5Sriastradh{ 17087ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1709af69d88dSmrg uint32_t dirty_mask = state->dirty_mask; 1710af69d88dSmrg 1711af69d88dSmrg while (dirty_mask) { 1712af69d88dSmrg struct pipe_constant_buffer *cb; 1713af69d88dSmrg struct r600_resource *rbuffer; 1714af69d88dSmrg unsigned offset; 1715af69d88dSmrg unsigned buffer_index = ffs(dirty_mask) - 1; 1716af69d88dSmrg unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER); 1717af69d88dSmrg cb = &state->cb[buffer_index]; 1718af69d88dSmrg rbuffer = (struct r600_resource*)cb->buffer; 1719af69d88dSmrg assert(rbuffer); 1720af69d88dSmrg 1721af69d88dSmrg offset = cb->buffer_offset; 1722af69d88dSmrg 1723af69d88dSmrg if (!gs_ring_buffer) { 172401e04c3fSmrg assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); 172501e04c3fSmrg radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, 172601e04c3fSmrg DIV_ROUND_UP(cb->buffer_size, 256)); 172701e04c3fSmrg radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); 172801e04c3fSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 172901e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 173001e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 1731af69d88dSmrg } 17323464ebd5Sriastradh 1733af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); 1734af69d88dSmrg radeon_emit(cs, (buffer_id_base + buffer_index) * 7); 1735af69d88dSmrg radeon_emit(cs, offset); /* RESOURCEi_WORD0 */ 173601e04c3fSmrg radeon_emit(cs, cb->buffer_size - 1); /* RESOURCEi_WORD1 */ 1737af69d88dSmrg radeon_emit(cs, /* RESOURCEi_WORD2 */ 1738af69d88dSmrg S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | 1739af69d88dSmrg S_038008_STRIDE(gs_ring_buffer ? 4 : 16)); 1740af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD3 */ 1741af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ 1742af69d88dSmrg radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ 1743af69d88dSmrg radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ 1744af69d88dSmrg 1745af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 174601e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 174701e04c3fSmrg RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); 1748af69d88dSmrg 1749af69d88dSmrg dirty_mask &= ~(1 << buffer_index); 1750af69d88dSmrg } 1751af69d88dSmrg state->dirty_mask = 0; 1752af69d88dSmrg} 17533464ebd5Sriastradh 1754af69d88dSmrgstatic void r600_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 1755af69d88dSmrg{ 175601e04c3fSmrg r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 175701e04c3fSmrg R600_FETCH_CONSTANTS_OFFSET_VS, 1758af69d88dSmrg R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 1759af69d88dSmrg R_028980_ALU_CONST_CACHE_VS_0); 1760af69d88dSmrg} 17613464ebd5Sriastradh 1762af69d88dSmrgstatic void r600_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 1763af69d88dSmrg{ 176401e04c3fSmrg r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 176501e04c3fSmrg R600_FETCH_CONSTANTS_OFFSET_GS, 1766af69d88dSmrg R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 1767af69d88dSmrg R_0289C0_ALU_CONST_CACHE_GS_0); 17683464ebd5Sriastradh} 17693464ebd5Sriastradh 1770af69d88dSmrgstatic void r600_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom) 17713464ebd5Sriastradh{ 177201e04c3fSmrg r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 177301e04c3fSmrg R600_FETCH_CONSTANTS_OFFSET_PS, 1774af69d88dSmrg R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 1775af69d88dSmrg R_028940_ALU_CONST_CACHE_PS_0); 1776af69d88dSmrg} 17773464ebd5Sriastradh 1778af69d88dSmrgstatic void r600_emit_sampler_views(struct r600_context *rctx, 1779af69d88dSmrg struct r600_samplerview_state *state, 1780af69d88dSmrg unsigned resource_id_base) 1781af69d88dSmrg{ 17827ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1783af69d88dSmrg uint32_t dirty_mask = state->dirty_mask; 1784af69d88dSmrg 1785af69d88dSmrg while (dirty_mask) { 1786af69d88dSmrg struct r600_pipe_sampler_view *rview; 1787af69d88dSmrg unsigned resource_index = u_bit_scan(&dirty_mask); 1788af69d88dSmrg unsigned reloc; 1789af69d88dSmrg 1790af69d88dSmrg rview = state->views[resource_index]; 1791af69d88dSmrg assert(rview); 1792af69d88dSmrg 1793af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); 1794af69d88dSmrg radeon_emit(cs, (resource_id_base + resource_index) * 7); 1795af69d88dSmrg radeon_emit_array(cs, rview->tex_resource_words, 7); 1796af69d88dSmrg 179701e04c3fSmrg reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, 1798af69d88dSmrg RADEON_USAGE_READ, 179901e04c3fSmrg r600_get_sampler_view_priority(rview->tex_resource)); 1800af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1801af69d88dSmrg radeon_emit(cs, reloc); 1802af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 1803af69d88dSmrg radeon_emit(cs, reloc); 1804af69d88dSmrg } 1805af69d88dSmrg state->dirty_mask = 0; 1806af69d88dSmrg} 1807af69d88dSmrg 1808af69d88dSmrg 1809af69d88dSmrgstatic void r600_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 1810af69d88dSmrg{ 181101e04c3fSmrg r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, R600_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS); 1812af69d88dSmrg} 1813af69d88dSmrg 1814af69d88dSmrgstatic void r600_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 1815af69d88dSmrg{ 181601e04c3fSmrg r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, R600_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS); 1817af69d88dSmrg} 18183464ebd5Sriastradh 1819af69d88dSmrgstatic void r600_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom) 1820af69d88dSmrg{ 182101e04c3fSmrg r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS); 1822af69d88dSmrg} 18233464ebd5Sriastradh 1824af69d88dSmrgstatic void r600_emit_sampler_states(struct r600_context *rctx, 1825af69d88dSmrg struct r600_textures_info *texinfo, 1826af69d88dSmrg unsigned resource_id_base, 1827af69d88dSmrg unsigned border_color_reg) 1828af69d88dSmrg{ 18297ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1830af69d88dSmrg uint32_t dirty_mask = texinfo->states.dirty_mask; 18313464ebd5Sriastradh 1832af69d88dSmrg while (dirty_mask) { 1833af69d88dSmrg struct r600_pipe_sampler_state *rstate; 1834af69d88dSmrg struct r600_pipe_sampler_view *rview; 1835af69d88dSmrg unsigned i = u_bit_scan(&dirty_mask); 18363464ebd5Sriastradh 1837af69d88dSmrg rstate = texinfo->states.states[i]; 1838af69d88dSmrg assert(rstate); 1839af69d88dSmrg rview = texinfo->views.views[i]; 18403464ebd5Sriastradh 1841af69d88dSmrg /* TEX_ARRAY_OVERRIDE must be set for array textures to disable 1842af69d88dSmrg * filtering between layers. 1843af69d88dSmrg */ 18449f464c52Smaya enum pipe_texture_target target = PIPE_BUFFER; 18459f464c52Smaya if (rview) 18469f464c52Smaya target = rview->base.texture->target; 18479f464c52Smaya if (target == PIPE_TEXTURE_1D_ARRAY || 18489f464c52Smaya target == PIPE_TEXTURE_2D_ARRAY) { 18499f464c52Smaya rstate->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1); 18509f464c52Smaya texinfo->is_array_sampler[i] = true; 18519f464c52Smaya } else { 18529f464c52Smaya rstate->tex_sampler_words[0] &= C_03C000_TEX_ARRAY_OVERRIDE; 18539f464c52Smaya texinfo->is_array_sampler[i] = false; 18543464ebd5Sriastradh } 18553464ebd5Sriastradh 1856af69d88dSmrg radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0)); 1857af69d88dSmrg radeon_emit(cs, (resource_id_base + i) * 3); 1858af69d88dSmrg radeon_emit_array(cs, rstate->tex_sampler_words, 3); 18593464ebd5Sriastradh 1860af69d88dSmrg if (rstate->border_color_use) { 1861af69d88dSmrg unsigned offset; 18623464ebd5Sriastradh 1863af69d88dSmrg offset = border_color_reg; 1864af69d88dSmrg offset += i * 16; 186501e04c3fSmrg radeon_set_config_reg_seq(cs, offset, 4); 1866af69d88dSmrg radeon_emit_array(cs, rstate->border_color.ui, 4); 1867af69d88dSmrg } 18683464ebd5Sriastradh } 1869af69d88dSmrg texinfo->states.dirty_mask = 0; 1870af69d88dSmrg} 18713464ebd5Sriastradh 1872af69d88dSmrgstatic void r600_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 1873af69d88dSmrg{ 1874af69d88dSmrg r600_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A600_TD_VS_SAMPLER0_BORDER_RED); 1875af69d88dSmrg} 18763464ebd5Sriastradh 1877af69d88dSmrgstatic void r600_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 1878af69d88dSmrg{ 1879af69d88dSmrg r600_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A800_TD_GS_SAMPLER0_BORDER_RED); 1880af69d88dSmrg} 18813464ebd5Sriastradh 1882af69d88dSmrgstatic void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom) 1883af69d88dSmrg{ 1884af69d88dSmrg r600_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED); 1885af69d88dSmrg} 18863464ebd5Sriastradh 1887af69d88dSmrgstatic void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom) 1888af69d88dSmrg{ 18897ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1890af69d88dSmrg unsigned tmp; 18913464ebd5Sriastradh 1892af69d88dSmrg tmp = S_009508_DISABLE_CUBE_ANISO(1) | 1893af69d88dSmrg S_009508_SYNC_GRADIENT(1) | 1894af69d88dSmrg S_009508_SYNC_WALKER(1) | 1895af69d88dSmrg S_009508_SYNC_ALIGNER(1); 1896af69d88dSmrg if (!rctx->seamless_cube_map.enabled) { 1897af69d88dSmrg tmp |= S_009508_DISABLE_CUBE_WRAP(1); 1898af69d88dSmrg } 189901e04c3fSmrg radeon_set_config_reg(cs, R_009508_TA_CNTL_AUX, tmp); 1900af69d88dSmrg} 19013464ebd5Sriastradh 1902af69d88dSmrgstatic void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) 1903af69d88dSmrg{ 1904af69d88dSmrg struct r600_sample_mask *s = (struct r600_sample_mask*)a; 1905af69d88dSmrg uint8_t mask = s->sample_mask; 19063464ebd5Sriastradh 19077ec681f3Smrg radeon_set_context_reg(&rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, 1908af69d88dSmrg mask | (mask << 8) | (mask << 16) | (mask << 24)); 1909af69d88dSmrg} 19103464ebd5Sriastradh 1911af69d88dSmrgstatic void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) 1912af69d88dSmrg{ 19137ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1914af69d88dSmrg struct r600_cso_state *state = (struct r600_cso_state*)a; 1915af69d88dSmrg struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; 1916af69d88dSmrg 191701e04c3fSmrg if (!shader) 191801e04c3fSmrg return; 191901e04c3fSmrg 192001e04c3fSmrg radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); 1921af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 192201e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, 192301e04c3fSmrg RADEON_USAGE_READ, 192401e04c3fSmrg RADEON_PRIO_SHADER_BINARY)); 19253464ebd5Sriastradh} 19263464ebd5Sriastradh 1927af69d88dSmrgstatic void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) 19283464ebd5Sriastradh{ 19297ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1930af69d88dSmrg struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; 19313464ebd5Sriastradh 1932af69d88dSmrg uint32_t v2 = 0, primid = 0; 19333464ebd5Sriastradh 193401e04c3fSmrg if (rctx->vs_shader->current->shader.vs_as_gs_a) { 193501e04c3fSmrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A); 193601e04c3fSmrg primid = 1; 193701e04c3fSmrg } 193801e04c3fSmrg 1939af69d88dSmrg if (state->geom_enable) { 1940af69d88dSmrg uint32_t cut_val; 19413464ebd5Sriastradh 194201e04c3fSmrg if (rctx->gs_shader->gs_max_out_vertices <= 128) 1943af69d88dSmrg cut_val = V_028A40_GS_CUT_128; 194401e04c3fSmrg else if (rctx->gs_shader->gs_max_out_vertices <= 256) 1945af69d88dSmrg cut_val = V_028A40_GS_CUT_256; 194601e04c3fSmrg else if (rctx->gs_shader->gs_max_out_vertices <= 512) 1947af69d88dSmrg cut_val = V_028A40_GS_CUT_512; 1948af69d88dSmrg else 1949af69d88dSmrg cut_val = V_028A40_GS_CUT_1024; 19503464ebd5Sriastradh 1951af69d88dSmrg v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) | 1952af69d88dSmrg S_028A40_CUT_MODE(cut_val); 19533464ebd5Sriastradh 1954af69d88dSmrg if (rctx->gs_shader->current->shader.gs_prim_id_input) 1955af69d88dSmrg primid = 1; 19563464ebd5Sriastradh } 19573464ebd5Sriastradh 195801e04c3fSmrg radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2); 195901e04c3fSmrg radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid); 1960af69d88dSmrg} 19613464ebd5Sriastradh 1962af69d88dSmrgstatic void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) 1963af69d88dSmrg{ 19647ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; 1965af69d88dSmrg struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; 1966af69d88dSmrg struct r600_resource *rbuffer; 19673464ebd5Sriastradh 196801e04c3fSmrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 1969af69d88dSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1970af69d88dSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 1971af69d88dSmrg 1972af69d88dSmrg if (state->enable) { 1973af69d88dSmrg rbuffer =(struct r600_resource*)state->esgs_ring.buffer; 197401e04c3fSmrg radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0); 1975af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 197601e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 1977af69d88dSmrg RADEON_USAGE_READWRITE, 197801e04c3fSmrg RADEON_PRIO_SHADER_RINGS)); 197901e04c3fSmrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 1980af69d88dSmrg state->esgs_ring.buffer_size >> 8); 1981af69d88dSmrg 1982af69d88dSmrg rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; 198301e04c3fSmrg radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0); 1984af69d88dSmrg radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 198501e04c3fSmrg radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, 1986af69d88dSmrg RADEON_USAGE_READWRITE, 198701e04c3fSmrg RADEON_PRIO_SHADER_RINGS)); 198801e04c3fSmrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 1989af69d88dSmrg state->gsvs_ring.buffer_size >> 8); 1990af69d88dSmrg } else { 199101e04c3fSmrg radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0); 199201e04c3fSmrg radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0); 1993af69d88dSmrg } 19943464ebd5Sriastradh 199501e04c3fSmrg radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); 1996af69d88dSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1997af69d88dSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); 1998af69d88dSmrg} 19993464ebd5Sriastradh 2000af69d88dSmrg/* Adjust GPR allocation on R6xx/R7xx */ 2001af69d88dSmrgbool r600_adjust_gprs(struct r600_context *rctx) 2002af69d88dSmrg{ 200301e04c3fSmrg unsigned num_gprs[R600_NUM_HW_STAGES]; 200401e04c3fSmrg unsigned new_gprs[R600_NUM_HW_STAGES]; 200501e04c3fSmrg unsigned cur_gprs[R600_NUM_HW_STAGES]; 200601e04c3fSmrg unsigned def_gprs[R600_NUM_HW_STAGES]; 2007af69d88dSmrg unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; 200801e04c3fSmrg unsigned max_gprs; 2009af69d88dSmrg unsigned tmp, tmp2; 201001e04c3fSmrg unsigned i; 201101e04c3fSmrg bool need_recalc = false, use_default = true; 2012af69d88dSmrg 201301e04c3fSmrg /* hardware will reserve twice num_clause_temp_gprs */ 201401e04c3fSmrg max_gprs = def_num_clause_temp_gprs * 2; 201501e04c3fSmrg for (i = 0; i < R600_NUM_HW_STAGES; i++) { 201601e04c3fSmrg def_gprs[i] = rctx->default_gprs[i]; 201701e04c3fSmrg max_gprs += def_gprs[i]; 201801e04c3fSmrg } 201901e04c3fSmrg 202001e04c3fSmrg cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 202101e04c3fSmrg cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); 202201e04c3fSmrg cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 202301e04c3fSmrg cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); 202401e04c3fSmrg 202501e04c3fSmrg num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr; 2026af69d88dSmrg if (rctx->gs_shader) { 202701e04c3fSmrg num_gprs[R600_HW_STAGE_ES] = rctx->vs_shader->current->shader.bc.ngpr; 202801e04c3fSmrg num_gprs[R600_HW_STAGE_GS] = rctx->gs_shader->current->shader.bc.ngpr; 202901e04c3fSmrg num_gprs[R600_HW_STAGE_VS] = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr; 2030af69d88dSmrg } else { 203101e04c3fSmrg num_gprs[R600_HW_STAGE_ES] = 0; 203201e04c3fSmrg num_gprs[R600_HW_STAGE_GS] = 0; 203301e04c3fSmrg num_gprs[R600_HW_STAGE_VS] = rctx->vs_shader->current->shader.bc.ngpr; 203401e04c3fSmrg } 203501e04c3fSmrg 203601e04c3fSmrg for (i = 0; i < R600_NUM_HW_STAGES; i++) { 203701e04c3fSmrg new_gprs[i] = num_gprs[i]; 203801e04c3fSmrg if (new_gprs[i] > cur_gprs[i]) 203901e04c3fSmrg need_recalc = true; 204001e04c3fSmrg if (new_gprs[i] > def_gprs[i]) 204101e04c3fSmrg use_default = false; 20423464ebd5Sriastradh } 2043af69d88dSmrg 2044af69d88dSmrg /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */ 204501e04c3fSmrg if (!need_recalc) 2046af69d88dSmrg return true; 204701e04c3fSmrg 204801e04c3fSmrg /* try to use switch back to default */ 204901e04c3fSmrg if (!use_default) { 205001e04c3fSmrg /* always privilege vs stage so that at worst we have the 205101e04c3fSmrg * pixel stage producing wrong output (not the vertex 205201e04c3fSmrg * stage) */ 205301e04c3fSmrg new_gprs[R600_HW_STAGE_PS] = max_gprs - def_num_clause_temp_gprs * 2; 205401e04c3fSmrg for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++) 205501e04c3fSmrg new_gprs[R600_HW_STAGE_PS] -= new_gprs[i]; 205601e04c3fSmrg } else { 205701e04c3fSmrg for (i = 0; i < R600_NUM_HW_STAGES; i++) 205801e04c3fSmrg new_gprs[i] = def_gprs[i]; 20593464ebd5Sriastradh } 20603464ebd5Sriastradh 2061af69d88dSmrg /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= 2062af69d88dSmrg * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup 2063af69d88dSmrg * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS 2064af69d88dSmrg * it will lockup. So in this case just discard the draw command 2065af69d88dSmrg * and don't change the current gprs repartitions. 2066af69d88dSmrg */ 206701e04c3fSmrg for (i = 0; i < R600_NUM_HW_STAGES; i++) { 206801e04c3fSmrg if (num_gprs[i] > new_gprs[i]) { 206901e04c3fSmrg R600_ERR("shaders require too many register (%d + %d + %d + %d) " 207001e04c3fSmrg "for a combined maximum of %d\n", 207101e04c3fSmrg num_gprs[R600_HW_STAGE_PS], num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], num_gprs[R600_HW_STAGE_GS], max_gprs); 207201e04c3fSmrg return false; 207301e04c3fSmrg } 2074af69d88dSmrg } 20753464ebd5Sriastradh 2076af69d88dSmrg /* in some case we endup recomputing the current value */ 207701e04c3fSmrg tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | 207801e04c3fSmrg S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | 2079af69d88dSmrg S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); 2080af69d88dSmrg 208101e04c3fSmrg tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | 208201e04c3fSmrg S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); 2083af69d88dSmrg if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) { 2084af69d88dSmrg rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; 2085af69d88dSmrg rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2; 208601e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->config_state.atom); 2087af69d88dSmrg rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; 2088af69d88dSmrg } 2089af69d88dSmrg return true; 20903464ebd5Sriastradh} 20913464ebd5Sriastradh 2092af69d88dSmrgvoid r600_init_atom_start_cs(struct r600_context *rctx) 20933464ebd5Sriastradh{ 20943464ebd5Sriastradh int ps_prio; 20953464ebd5Sriastradh int vs_prio; 20963464ebd5Sriastradh int gs_prio; 20973464ebd5Sriastradh int es_prio; 20983464ebd5Sriastradh int num_ps_gprs; 20993464ebd5Sriastradh int num_vs_gprs; 21003464ebd5Sriastradh int num_gs_gprs; 21013464ebd5Sriastradh int num_es_gprs; 21023464ebd5Sriastradh int num_temp_gprs; 21033464ebd5Sriastradh int num_ps_threads; 21043464ebd5Sriastradh int num_vs_threads; 21053464ebd5Sriastradh int num_gs_threads; 21063464ebd5Sriastradh int num_es_threads; 21073464ebd5Sriastradh int num_ps_stack_entries; 21083464ebd5Sriastradh int num_vs_stack_entries; 21093464ebd5Sriastradh int num_gs_stack_entries; 21103464ebd5Sriastradh int num_es_stack_entries; 21113464ebd5Sriastradh enum radeon_family family; 2112af69d88dSmrg struct r600_command_buffer *cb = &rctx->start_cs_cmd; 2113af69d88dSmrg uint32_t tmp, i; 2114af69d88dSmrg 2115af69d88dSmrg r600_init_command_buffer(cb, 256); 21163464ebd5Sriastradh 2117af69d88dSmrg /* R6xx requires this packet at the start of each command buffer */ 2118af69d88dSmrg if (rctx->b.chip_class == R600) { 2119af69d88dSmrg r600_store_value(cb, PKT3(PKT3_START_3D_CMDBUF, 0, 0)); 2120af69d88dSmrg r600_store_value(cb, 0); 2121af69d88dSmrg } 2122af69d88dSmrg /* All asics require this one */ 2123af69d88dSmrg r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 2124af69d88dSmrg r600_store_value(cb, 0x80000000); 2125af69d88dSmrg r600_store_value(cb, 0x80000000); 2126af69d88dSmrg 2127af69d88dSmrg /* We're setting config registers here. */ 2128af69d88dSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 2129af69d88dSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 2130af69d88dSmrg 213101e04c3fSmrg /* This enables pipeline stat & streamout queries. 213201e04c3fSmrg * They are only disabled by blits. 213301e04c3fSmrg */ 213401e04c3fSmrg r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); 213501e04c3fSmrg r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0)); 213601e04c3fSmrg 2137af69d88dSmrg family = rctx->b.family; 21383464ebd5Sriastradh ps_prio = 0; 21393464ebd5Sriastradh vs_prio = 1; 21403464ebd5Sriastradh gs_prio = 2; 21413464ebd5Sriastradh es_prio = 3; 21423464ebd5Sriastradh switch (family) { 21433464ebd5Sriastradh case CHIP_R600: 21443464ebd5Sriastradh num_ps_gprs = 192; 21453464ebd5Sriastradh num_vs_gprs = 56; 21463464ebd5Sriastradh num_temp_gprs = 4; 21473464ebd5Sriastradh num_gs_gprs = 0; 21483464ebd5Sriastradh num_es_gprs = 0; 21493464ebd5Sriastradh num_ps_threads = 136; 21503464ebd5Sriastradh num_vs_threads = 48; 21513464ebd5Sriastradh num_gs_threads = 4; 21523464ebd5Sriastradh num_es_threads = 4; 21533464ebd5Sriastradh num_ps_stack_entries = 128; 21543464ebd5Sriastradh num_vs_stack_entries = 128; 21553464ebd5Sriastradh num_gs_stack_entries = 0; 21563464ebd5Sriastradh num_es_stack_entries = 0; 21573464ebd5Sriastradh break; 21583464ebd5Sriastradh case CHIP_RV630: 21593464ebd5Sriastradh case CHIP_RV635: 21603464ebd5Sriastradh num_ps_gprs = 84; 21613464ebd5Sriastradh num_vs_gprs = 36; 21623464ebd5Sriastradh num_temp_gprs = 4; 21633464ebd5Sriastradh num_gs_gprs = 0; 21643464ebd5Sriastradh num_es_gprs = 0; 21653464ebd5Sriastradh num_ps_threads = 144; 21663464ebd5Sriastradh num_vs_threads = 40; 21673464ebd5Sriastradh num_gs_threads = 4; 21683464ebd5Sriastradh num_es_threads = 4; 21693464ebd5Sriastradh num_ps_stack_entries = 40; 21703464ebd5Sriastradh num_vs_stack_entries = 40; 21713464ebd5Sriastradh num_gs_stack_entries = 32; 21723464ebd5Sriastradh num_es_stack_entries = 16; 21733464ebd5Sriastradh break; 21743464ebd5Sriastradh case CHIP_RV610: 21753464ebd5Sriastradh case CHIP_RV620: 21763464ebd5Sriastradh case CHIP_RS780: 21773464ebd5Sriastradh case CHIP_RS880: 21783464ebd5Sriastradh default: 21793464ebd5Sriastradh num_ps_gprs = 84; 21803464ebd5Sriastradh num_vs_gprs = 36; 21813464ebd5Sriastradh num_temp_gprs = 4; 21823464ebd5Sriastradh num_gs_gprs = 0; 21833464ebd5Sriastradh num_es_gprs = 0; 218401e04c3fSmrg /* use limits 40 VS and at least 16 ES/GS */ 218501e04c3fSmrg num_ps_threads = 120; 218601e04c3fSmrg num_vs_threads = 40; 218701e04c3fSmrg num_gs_threads = 16; 218801e04c3fSmrg num_es_threads = 16; 21893464ebd5Sriastradh num_ps_stack_entries = 40; 21903464ebd5Sriastradh num_vs_stack_entries = 40; 21913464ebd5Sriastradh num_gs_stack_entries = 32; 21923464ebd5Sriastradh num_es_stack_entries = 16; 21933464ebd5Sriastradh break; 21943464ebd5Sriastradh case CHIP_RV670: 21953464ebd5Sriastradh num_ps_gprs = 144; 21963464ebd5Sriastradh num_vs_gprs = 40; 21973464ebd5Sriastradh num_temp_gprs = 4; 21983464ebd5Sriastradh num_gs_gprs = 0; 21993464ebd5Sriastradh num_es_gprs = 0; 22003464ebd5Sriastradh num_ps_threads = 136; 22013464ebd5Sriastradh num_vs_threads = 48; 22023464ebd5Sriastradh num_gs_threads = 4; 22033464ebd5Sriastradh num_es_threads = 4; 22043464ebd5Sriastradh num_ps_stack_entries = 40; 22053464ebd5Sriastradh num_vs_stack_entries = 40; 22063464ebd5Sriastradh num_gs_stack_entries = 32; 22073464ebd5Sriastradh num_es_stack_entries = 16; 22083464ebd5Sriastradh break; 22093464ebd5Sriastradh case CHIP_RV770: 2210af69d88dSmrg num_ps_gprs = 130; 22113464ebd5Sriastradh num_vs_gprs = 56; 22123464ebd5Sriastradh num_temp_gprs = 4; 2213af69d88dSmrg num_gs_gprs = 31; 2214af69d88dSmrg num_es_gprs = 31; 2215af69d88dSmrg num_ps_threads = 180; 22163464ebd5Sriastradh num_vs_threads = 60; 2217af69d88dSmrg num_gs_threads = 4; 2218af69d88dSmrg num_es_threads = 4; 2219af69d88dSmrg num_ps_stack_entries = 128; 2220af69d88dSmrg num_vs_stack_entries = 128; 2221af69d88dSmrg num_gs_stack_entries = 128; 2222af69d88dSmrg num_es_stack_entries = 128; 22233464ebd5Sriastradh break; 22243464ebd5Sriastradh case CHIP_RV730: 22253464ebd5Sriastradh case CHIP_RV740: 22263464ebd5Sriastradh num_ps_gprs = 84; 22273464ebd5Sriastradh num_vs_gprs = 36; 22283464ebd5Sriastradh num_temp_gprs = 4; 22293464ebd5Sriastradh num_gs_gprs = 0; 22303464ebd5Sriastradh num_es_gprs = 0; 2231af69d88dSmrg num_ps_threads = 180; 22323464ebd5Sriastradh num_vs_threads = 60; 2233af69d88dSmrg num_gs_threads = 4; 2234af69d88dSmrg num_es_threads = 4; 22353464ebd5Sriastradh num_ps_stack_entries = 128; 22363464ebd5Sriastradh num_vs_stack_entries = 128; 22373464ebd5Sriastradh num_gs_stack_entries = 0; 22383464ebd5Sriastradh num_es_stack_entries = 0; 22393464ebd5Sriastradh break; 22403464ebd5Sriastradh case CHIP_RV710: 22413464ebd5Sriastradh num_ps_gprs = 192; 22423464ebd5Sriastradh num_vs_gprs = 56; 22433464ebd5Sriastradh num_temp_gprs = 4; 22443464ebd5Sriastradh num_gs_gprs = 0; 22453464ebd5Sriastradh num_es_gprs = 0; 2246af69d88dSmrg num_ps_threads = 136; 22473464ebd5Sriastradh num_vs_threads = 48; 2248af69d88dSmrg num_gs_threads = 4; 2249af69d88dSmrg num_es_threads = 4; 22503464ebd5Sriastradh num_ps_stack_entries = 128; 22513464ebd5Sriastradh num_vs_stack_entries = 128; 22523464ebd5Sriastradh num_gs_stack_entries = 0; 22533464ebd5Sriastradh num_es_stack_entries = 0; 22543464ebd5Sriastradh break; 22553464ebd5Sriastradh } 22563464ebd5Sriastradh 225701e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs; 225801e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs; 225901e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_GS] = 0; 226001e04c3fSmrg rctx->default_gprs[R600_HW_STAGE_ES] = 0; 226101e04c3fSmrg 2262af69d88dSmrg rctx->r6xx_num_clause_temp_gprs = num_temp_gprs; 22633464ebd5Sriastradh 22643464ebd5Sriastradh /* SQ_CONFIG */ 22653464ebd5Sriastradh tmp = 0; 22663464ebd5Sriastradh switch (family) { 22673464ebd5Sriastradh case CHIP_RV610: 22683464ebd5Sriastradh case CHIP_RV620: 22693464ebd5Sriastradh case CHIP_RS780: 22703464ebd5Sriastradh case CHIP_RS880: 22713464ebd5Sriastradh case CHIP_RV710: 22723464ebd5Sriastradh break; 22733464ebd5Sriastradh default: 22743464ebd5Sriastradh tmp |= S_008C00_VC_ENABLE(1); 22753464ebd5Sriastradh break; 22763464ebd5Sriastradh } 22773464ebd5Sriastradh tmp |= S_008C00_DX9_CONSTS(0); 22783464ebd5Sriastradh tmp |= S_008C00_ALU_INST_PREFER_VECTOR(1); 22793464ebd5Sriastradh tmp |= S_008C00_PS_PRIO(ps_prio); 22803464ebd5Sriastradh tmp |= S_008C00_VS_PRIO(vs_prio); 22813464ebd5Sriastradh tmp |= S_008C00_GS_PRIO(gs_prio); 22823464ebd5Sriastradh tmp |= S_008C00_ES_PRIO(es_prio); 2283af69d88dSmrg r600_store_config_reg(cb, R_008C00_SQ_CONFIG, tmp); 22843464ebd5Sriastradh 22853464ebd5Sriastradh /* SQ_GPR_RESOURCE_MGMT_2 */ 2286af69d88dSmrg tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs); 22873464ebd5Sriastradh tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); 2288af69d88dSmrg r600_store_config_reg_seq(cb, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 4); 2289af69d88dSmrg r600_store_value(cb, tmp); 22903464ebd5Sriastradh 22913464ebd5Sriastradh /* SQ_THREAD_RESOURCE_MGMT */ 2292af69d88dSmrg tmp = S_008C0C_NUM_PS_THREADS(num_ps_threads); 22933464ebd5Sriastradh tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads); 22943464ebd5Sriastradh tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads); 22953464ebd5Sriastradh tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads); 2296af69d88dSmrg r600_store_value(cb, tmp); /* R_008C0C_SQ_THREAD_RESOURCE_MGMT */ 22973464ebd5Sriastradh 22983464ebd5Sriastradh /* SQ_STACK_RESOURCE_MGMT_1 */ 2299af69d88dSmrg tmp = S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); 23003464ebd5Sriastradh tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); 2301af69d88dSmrg r600_store_value(cb, tmp); /* R_008C10_SQ_STACK_RESOURCE_MGMT_1 */ 23023464ebd5Sriastradh 23033464ebd5Sriastradh /* SQ_STACK_RESOURCE_MGMT_2 */ 2304af69d88dSmrg tmp = S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); 23053464ebd5Sriastradh tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); 2306af69d88dSmrg r600_store_value(cb, tmp); /* R_008C14_SQ_STACK_RESOURCE_MGMT_2 */ 2307af69d88dSmrg 2308af69d88dSmrg r600_store_config_reg(cb, R_009714_VC_ENHANCE, 0); 2309af69d88dSmrg 2310af69d88dSmrg if (rctx->b.chip_class >= R700) { 231101e04c3fSmrg r600_store_context_reg(cb, R_028A50_VGT_ENHANCE, 4); 2312af69d88dSmrg r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000); 2313af69d88dSmrg r600_store_config_reg(cb, R_009830_DB_DEBUG, 0); 2314af69d88dSmrg r600_store_config_reg(cb, R_009838_DB_WATERMARKS, 0x00420204); 2315af69d88dSmrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0); 23163464ebd5Sriastradh } else { 2317af69d88dSmrg r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 2318af69d88dSmrg r600_store_config_reg(cb, R_009830_DB_DEBUG, 0x82000000); 2319af69d88dSmrg r600_store_config_reg(cb, R_009838_DB_WATERMARKS, 0x01020204); 2320af69d88dSmrg r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 1); 2321af69d88dSmrg } 2322af69d88dSmrg r600_store_context_reg_seq(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 9); 2323af69d88dSmrg r600_store_value(cb, 0); /* R_0288A8_SQ_ESGS_RING_ITEMSIZE */ 2324af69d88dSmrg r600_store_value(cb, 0); /* R_0288AC_SQ_GSVS_RING_ITEMSIZE */ 2325af69d88dSmrg r600_store_value(cb, 0); /* R_0288B0_SQ_ESTMP_RING_ITEMSIZE */ 2326af69d88dSmrg r600_store_value(cb, 0); /* R_0288B4_SQ_GSTMP_RING_ITEMSIZE */ 2327af69d88dSmrg r600_store_value(cb, 0); /* R_0288B8_SQ_VSTMP_RING_ITEMSIZE */ 2328af69d88dSmrg r600_store_value(cb, 0); /* R_0288BC_SQ_PSTMP_RING_ITEMSIZE */ 2329af69d88dSmrg r600_store_value(cb, 0); /* R_0288C0_SQ_FBUF_RING_ITEMSIZE */ 2330af69d88dSmrg r600_store_value(cb, 0); /* R_0288C4_SQ_REDUC_RING_ITEMSIZE */ 2331af69d88dSmrg r600_store_value(cb, 0); /* R_0288C8_SQ_GS_VERT_ITEMSIZE */ 2332af69d88dSmrg 2333af69d88dSmrg /* to avoid GPU doing any preloading of constant from random address */ 2334af69d88dSmrg r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16); 2335af69d88dSmrg for (i = 0; i < 16; i++) 2336af69d88dSmrg r600_store_value(cb, 0); 2337af69d88dSmrg 2338af69d88dSmrg r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16); 2339af69d88dSmrg for (i = 0; i < 16; i++) 2340af69d88dSmrg r600_store_value(cb, 0); 2341af69d88dSmrg 2342af69d88dSmrg r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16); 2343af69d88dSmrg for (i = 0; i < 16; i++) 2344af69d88dSmrg r600_store_value(cb, 0); 2345af69d88dSmrg 2346af69d88dSmrg r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13); 2347af69d88dSmrg r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */ 2348af69d88dSmrg r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */ 2349af69d88dSmrg r600_store_value(cb, 0); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */ 2350af69d88dSmrg r600_store_value(cb, 0); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */ 2351af69d88dSmrg r600_store_value(cb, 0); /* R_028A20_VGT_HOS_REUSE_DEPTH */ 2352af69d88dSmrg r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */ 2353af69d88dSmrg r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */ 2354af69d88dSmrg r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */ 2355af69d88dSmrg r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */ 2356af69d88dSmrg r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */ 2357af69d88dSmrg r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */ 2358af69d88dSmrg r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */ 2359af69d88dSmrg r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE, 0); */ 2360af69d88dSmrg 2361af69d88dSmrg r600_store_context_reg(cb, R_028A84_VGT_PRIMITIVEID_EN, 0); 2362af69d88dSmrg r600_store_context_reg(cb, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0); 2363af69d88dSmrg r600_store_context_reg(cb, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0); 2364af69d88dSmrg 2365af69d88dSmrg r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2); 2366af69d88dSmrg r600_store_value(cb, 1); /* R_028AB4_VGT_REUSE_OFF */ 2367af69d88dSmrg r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */ 2368af69d88dSmrg 2369af69d88dSmrg r600_store_context_reg(cb, R_028B20_VGT_STRMOUT_BUFFER_EN, 0); 2370af69d88dSmrg 2371af69d88dSmrg r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); 2372af69d88dSmrg 2373af69d88dSmrg r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0); 2374af69d88dSmrg 2375af69d88dSmrg r600_store_context_reg_seq(cb, R_0286DC_SPI_FOG_CNTL, 3); 2376af69d88dSmrg r600_store_value(cb, 0); /* R_0286DC_SPI_FOG_CNTL */ 2377af69d88dSmrg r600_store_value(cb, 0); /* R_0286E0_SPI_FOG_FUNC_SCALE */ 2378af69d88dSmrg r600_store_value(cb, 0); /* R_0286E4_SPI_FOG_FUNC_BIAS */ 2379af69d88dSmrg 2380af69d88dSmrg r600_store_context_reg_seq(cb, R_028D28_DB_SRESULTS_COMPARE_STATE0, 3); 2381af69d88dSmrg r600_store_value(cb, 0); /* R_028D28_DB_SRESULTS_COMPARE_STATE0 */ 2382af69d88dSmrg r600_store_value(cb, 0); /* R_028D2C_DB_SRESULTS_COMPARE_STATE1 */ 2383af69d88dSmrg r600_store_value(cb, 0); /* R_028D30_DB_PRELOAD_CONTROL */ 2384af69d88dSmrg 2385af69d88dSmrg r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0); 2386af69d88dSmrg r600_store_context_reg(cb, R_028A48_PA_SC_MPASS_PS_CNTL, 0); 2387af69d88dSmrg 2388af69d88dSmrg r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); 2389af69d88dSmrg r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 2390af69d88dSmrg 2391af69d88dSmrg if (rctx->b.chip_class >= R700) { 2392af69d88dSmrg r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 2393af69d88dSmrg } 2394af69d88dSmrg 2395af69d88dSmrg r600_store_context_reg_seq(cb, R_028C30_CB_CLRCMP_CONTROL, 4); 2396af69d88dSmrg r600_store_value(cb, 0x1000000); /* R_028C30_CB_CLRCMP_CONTROL */ 2397af69d88dSmrg r600_store_value(cb, 0); /* R_028C34_CB_CLRCMP_SRC */ 2398af69d88dSmrg r600_store_value(cb, 0xFF); /* R_028C38_CB_CLRCMP_DST */ 2399af69d88dSmrg r600_store_value(cb, 0xFFFFFFFF); /* R_028C3C_CB_CLRCMP_MSK */ 2400af69d88dSmrg 2401af69d88dSmrg r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2); 2402af69d88dSmrg r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */ 2403af69d88dSmrg r600_store_value(cb, S_028034_BR_X(8192) | S_028034_BR_Y(8192)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */ 2404af69d88dSmrg 2405af69d88dSmrg r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2); 2406af69d88dSmrg r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */ 2407af69d88dSmrg r600_store_value(cb, S_028244_BR_X(8192) | S_028244_BR_Y(8192)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */ 2408af69d88dSmrg 2409af69d88dSmrg r600_store_context_reg_seq(cb, R_0288CC_SQ_PGM_CF_OFFSET_PS, 5); 2410af69d88dSmrg r600_store_value(cb, 0); /* R_0288CC_SQ_PGM_CF_OFFSET_PS */ 2411af69d88dSmrg r600_store_value(cb, 0); /* R_0288D0_SQ_PGM_CF_OFFSET_VS */ 2412af69d88dSmrg r600_store_value(cb, 0); /* R_0288D4_SQ_PGM_CF_OFFSET_GS */ 2413af69d88dSmrg r600_store_value(cb, 0); /* R_0288D8_SQ_PGM_CF_OFFSET_ES */ 2414af69d88dSmrg r600_store_value(cb, 0); /* R_0288DC_SQ_PGM_CF_OFFSET_FS */ 2415af69d88dSmrg 2416af69d88dSmrg r600_store_context_reg(cb, R_0288E0_SQ_VTX_SEMANTIC_CLEAR, ~0); 2417af69d88dSmrg 2418af69d88dSmrg r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2); 2419af69d88dSmrg r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */ 2420af69d88dSmrg r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */ 2421af69d88dSmrg 2422af69d88dSmrg r600_store_context_reg(cb, R_0288A4_SQ_PGM_RESOURCES_FS, 0); 2423af69d88dSmrg 2424af69d88dSmrg if (rctx->b.chip_class == R700) 2425af69d88dSmrg r600_store_context_reg(cb, R_028350_SX_MISC, 0); 2426af69d88dSmrg if (rctx->b.chip_class == R700 && rctx->screen->b.has_streamout) 2427af69d88dSmrg r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf)); 2428af69d88dSmrg 2429af69d88dSmrg r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); 2430af69d88dSmrg if (rctx->screen->b.has_streamout) { 2431af69d88dSmrg r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 2432af69d88dSmrg } 2433af69d88dSmrg 2434af69d88dSmrg r600_store_loop_const(cb, R_03E200_SQ_LOOP_CONST_0, 0x1000FFF); 2435af69d88dSmrg r600_store_loop_const(cb, R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x1000FFF); 2436af69d88dSmrg r600_store_loop_const(cb, R_03E200_SQ_LOOP_CONST_0 + (64 * 4), 0x1000FFF); 2437af69d88dSmrg} 2438af69d88dSmrg 2439af69d88dSmrgvoid r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 2440af69d88dSmrg{ 2441af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 2442af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 24433464ebd5Sriastradh struct r600_shader *rshader = &shader->shader; 24443464ebd5Sriastradh unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; 244501e04c3fSmrg int pos_index = -1, face_index = -1, fixed_pt_position_index = -1; 2446af69d88dSmrg unsigned tmp, sid, ufi = 0; 2447af69d88dSmrg int need_linear = 0; 244801e04c3fSmrg unsigned z_export = 0, stencil_export = 0, mask_export = 0; 2449af69d88dSmrg unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; 24503464ebd5Sriastradh 2451af69d88dSmrg if (!cb->buf) { 2452af69d88dSmrg r600_init_command_buffer(cb, 64); 2453af69d88dSmrg } else { 2454af69d88dSmrg cb->num_dw = 0; 2455af69d88dSmrg } 24563464ebd5Sriastradh 2457af69d88dSmrg r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, rshader->ninput); 24583464ebd5Sriastradh for (i = 0; i < rshader->ninput; i++) { 24593464ebd5Sriastradh if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 24603464ebd5Sriastradh pos_index = i; 246101e04c3fSmrg if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1) 24623464ebd5Sriastradh face_index = i; 246301e04c3fSmrg if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) 246401e04c3fSmrg fixed_pt_position_index = i; 2465af69d88dSmrg 2466af69d88dSmrg sid = rshader->input[i].spi_sid; 2467af69d88dSmrg 2468af69d88dSmrg tmp = S_028644_SEMANTIC(sid); 2469af69d88dSmrg 247001e04c3fSmrg /* D3D 9 behaviour. GL is undefined */ 247101e04c3fSmrg if (rshader->input[i].name == TGSI_SEMANTIC_COLOR && rshader->input[i].sid == 0) 247201e04c3fSmrg tmp |= S_028644_DEFAULT_VAL(3); 247301e04c3fSmrg 2474af69d88dSmrg if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || 2475af69d88dSmrg rshader->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || 2476af69d88dSmrg (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && 2477af69d88dSmrg rctx->rasterizer && rctx->rasterizer->flatshade)) 2478af69d88dSmrg tmp |= S_028644_FLAT_SHADE(1); 2479af69d88dSmrg 24807ec681f3Smrg if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD || 24817ec681f3Smrg (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD && 24827ec681f3Smrg sprite_coord_enable & (1 << rshader->input[i].sid))) { 2483af69d88dSmrg tmp |= S_028644_PT_SPRITE_TEX(1); 2484af69d88dSmrg } 2485af69d88dSmrg 248601e04c3fSmrg if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) 2487af69d88dSmrg tmp |= S_028644_SEL_CENTROID(1); 2488af69d88dSmrg 248901e04c3fSmrg if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE) 249001e04c3fSmrg tmp |= S_028644_SEL_SAMPLE(1); 249101e04c3fSmrg 2492af69d88dSmrg if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) { 2493af69d88dSmrg need_linear = 1; 2494af69d88dSmrg tmp |= S_028644_SEL_LINEAR(1); 2495af69d88dSmrg } 2496af69d88dSmrg 2497af69d88dSmrg r600_store_value(cb, tmp); 24983464ebd5Sriastradh } 24993464ebd5Sriastradh 25003464ebd5Sriastradh db_shader_control = 0; 25013464ebd5Sriastradh for (i = 0; i < rshader->noutput; i++) { 25023464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 2503af69d88dSmrg z_export = 1; 25043464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 2505af69d88dSmrg stencil_export = 1; 250601e04c3fSmrg if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && 250701e04c3fSmrg rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) 250801e04c3fSmrg mask_export = 1; 25093464ebd5Sriastradh } 2510af69d88dSmrg db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); 2511af69d88dSmrg db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export); 251201e04c3fSmrg db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); 25133464ebd5Sriastradh if (rshader->uses_kill) 25143464ebd5Sriastradh db_shader_control |= S_02880C_KILL_ENABLE(1); 25153464ebd5Sriastradh 25163464ebd5Sriastradh exports_ps = 0; 25173464ebd5Sriastradh for (i = 0; i < rshader->noutput; i++) { 25183464ebd5Sriastradh if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || 251901e04c3fSmrg rshader->output[i].name == TGSI_SEMANTIC_STENCIL || 252001e04c3fSmrg rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 25213464ebd5Sriastradh exports_ps |= 1; 25223464ebd5Sriastradh } 25233464ebd5Sriastradh } 2524af69d88dSmrg num_cout = rshader->nr_ps_color_exports; 25253464ebd5Sriastradh exports_ps |= S_028854_EXPORT_COLORS(num_cout); 25263464ebd5Sriastradh if (!exports_ps) { 25273464ebd5Sriastradh /* always at least export 1 component per pixel */ 25283464ebd5Sriastradh exports_ps = 2; 25293464ebd5Sriastradh } 25303464ebd5Sriastradh 2531af69d88dSmrg shader->nr_ps_color_outputs = num_cout; 253201e04c3fSmrg shader->ps_color_export_mask = rshader->ps_color_export_mask; 2533af69d88dSmrg 25343464ebd5Sriastradh spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 2535af69d88dSmrg S_0286CC_PERSP_GRADIENT_ENA(1)| 2536af69d88dSmrg S_0286CC_LINEAR_GRADIENT_ENA(need_linear); 25373464ebd5Sriastradh spi_input_z = 0; 25383464ebd5Sriastradh if (pos_index != -1) { 25393464ebd5Sriastradh spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 254001e04c3fSmrg S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) | 25413464ebd5Sriastradh S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 254201e04c3fSmrg S_0286CC_BARYC_SAMPLE_CNTL(1)) | 254301e04c3fSmrg S_0286CC_POSITION_SAMPLE(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE); 2544af69d88dSmrg spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); 25453464ebd5Sriastradh } 25463464ebd5Sriastradh 25473464ebd5Sriastradh spi_ps_in_control_1 = 0; 25483464ebd5Sriastradh if (face_index != -1) { 25493464ebd5Sriastradh spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 25503464ebd5Sriastradh S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 25513464ebd5Sriastradh } 255201e04c3fSmrg if (fixed_pt_position_index != -1) { 255301e04c3fSmrg spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) | 255401e04c3fSmrg S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr); 255501e04c3fSmrg } 25563464ebd5Sriastradh 2557af69d88dSmrg /* HW bug in original R600 */ 2558af69d88dSmrg if (rctx->b.family == CHIP_R600) 2559af69d88dSmrg ufi = 1; 2560af69d88dSmrg 2561af69d88dSmrg r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); 2562af69d88dSmrg r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ 2563af69d88dSmrg r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */ 2564af69d88dSmrg 2565af69d88dSmrg r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z); 2566af69d88dSmrg 2567af69d88dSmrg r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2); 2568af69d88dSmrg r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/ 2569af69d88dSmrg S_028850_NUM_GPRS(rshader->bc.ngpr) | 257001e04c3fSmrg /* 257101e04c3fSmrg * docs are misleading about the dx10_clamp bit. This only affects 257201e04c3fSmrg * instructions using CLAMP dst modifier, in which case they will 257301e04c3fSmrg * return 0 with this set for a NaN (otherwise NaN). 257401e04c3fSmrg */ 257501e04c3fSmrg S_028850_DX10_CLAMP(1) | 2576af69d88dSmrg S_028850_STACK_SIZE(rshader->bc.nstack) | 2577af69d88dSmrg S_028850_UNCACHED_FIRST_INST(ufi)); 2578af69d88dSmrg r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */ 2579af69d88dSmrg 2580af69d88dSmrg r600_store_context_reg(cb, R_028840_SQ_PGM_START_PS, 0); 2581af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 2582af69d88dSmrg 25833464ebd5Sriastradh /* only set some bits here, the other bits are set in the dsa state */ 2584af69d88dSmrg shader->db_shader_control = db_shader_control; 258501e04c3fSmrg shader->ps_depth_export = z_export | stencil_export | mask_export; 25863464ebd5Sriastradh 2587af69d88dSmrg shader->sprite_coord_enable = sprite_coord_enable; 2588af69d88dSmrg if (rctx->rasterizer) 2589af69d88dSmrg shader->flatshade = rctx->rasterizer->flatshade; 25903464ebd5Sriastradh} 25913464ebd5Sriastradh 2592af69d88dSmrgvoid r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 25933464ebd5Sriastradh{ 2594af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 25953464ebd5Sriastradh struct r600_shader *rshader = &shader->shader; 2596af69d88dSmrg unsigned spi_vs_out_id[10] = {}; 2597af69d88dSmrg unsigned i, tmp, nparams = 0; 25983464ebd5Sriastradh 2599af69d88dSmrg for (i = 0; i < rshader->noutput; i++) { 2600af69d88dSmrg if (rshader->output[i].spi_sid) { 2601af69d88dSmrg tmp = rshader->output[i].spi_sid << ((nparams & 3) * 8); 2602af69d88dSmrg spi_vs_out_id[nparams / 4] |= tmp; 2603af69d88dSmrg nparams++; 2604af69d88dSmrg } 26053464ebd5Sriastradh } 2606af69d88dSmrg 2607af69d88dSmrg r600_init_command_buffer(cb, 32); 2608af69d88dSmrg 2609af69d88dSmrg r600_store_context_reg_seq(cb, R_028614_SPI_VS_OUT_ID_0, 10); 26103464ebd5Sriastradh for (i = 0; i < 10; i++) { 2611af69d88dSmrg r600_store_value(cb, spi_vs_out_id[i]); 26123464ebd5Sriastradh } 26133464ebd5Sriastradh 26143464ebd5Sriastradh /* Certain attributes (position, psize, etc.) don't count as params. 26153464ebd5Sriastradh * VS is required to export at least one param and r600_shader_from_tgsi() 26163464ebd5Sriastradh * takes care of adding a dummy export. 26173464ebd5Sriastradh */ 26183464ebd5Sriastradh if (nparams < 1) 26193464ebd5Sriastradh nparams = 1; 26203464ebd5Sriastradh 2621af69d88dSmrg r600_store_context_reg(cb, R_0286C4_SPI_VS_OUT_CONFIG, 2622af69d88dSmrg S_0286C4_VS_EXPORT_COUNT(nparams - 1)); 2623af69d88dSmrg r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS, 2624af69d88dSmrg S_028868_NUM_GPRS(rshader->bc.ngpr) | 262501e04c3fSmrg S_028868_DX10_CLAMP(1) | 2626af69d88dSmrg S_028868_STACK_SIZE(rshader->bc.nstack)); 2627af69d88dSmrg if (rshader->vs_position_window_space) { 2628af69d88dSmrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 2629af69d88dSmrg S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1)); 2630af69d88dSmrg } else { 2631af69d88dSmrg r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 2632af69d88dSmrg S_028818_VTX_W0_FMT(1) | 2633af69d88dSmrg S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | 2634af69d88dSmrg S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | 2635af69d88dSmrg S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); 2636af69d88dSmrg 2637af69d88dSmrg } 2638af69d88dSmrg r600_store_context_reg(cb, R_028858_SQ_PGM_START_VS, 0); 2639af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 2640af69d88dSmrg 2641af69d88dSmrg shader->pa_cl_vs_out_cntl = 2642af69d88dSmrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->clip_dist_write & 0x0F) != 0) | 2643af69d88dSmrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->clip_dist_write & 0xF0) != 0) | 2644af69d88dSmrg S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) | 2645af69d88dSmrg S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) | 2646af69d88dSmrg S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) | 2647af69d88dSmrg S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer) | 2648af69d88dSmrg S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport); 2649af69d88dSmrg} 2650af69d88dSmrg 265101e04c3fSmrg#define RV610_GSVS_ALIGN 32 265201e04c3fSmrg#define R600_GSVS_ALIGN 16 265301e04c3fSmrg 2654af69d88dSmrgvoid r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 2655af69d88dSmrg{ 2656af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 2657af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 2658af69d88dSmrg struct r600_shader *rshader = &shader->shader; 2659af69d88dSmrg struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; 2660af69d88dSmrg unsigned gsvs_itemsize = 266101e04c3fSmrg (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2; 266201e04c3fSmrg 266301e04c3fSmrg /* some r600s needs gsvs itemsize aligned to cacheline size 266401e04c3fSmrg this was fixed in rs780 and above. */ 266501e04c3fSmrg switch (rctx->b.family) { 266601e04c3fSmrg case CHIP_RV610: 266701e04c3fSmrg gsvs_itemsize = align(gsvs_itemsize, RV610_GSVS_ALIGN); 266801e04c3fSmrg break; 266901e04c3fSmrg case CHIP_R600: 267001e04c3fSmrg case CHIP_RV630: 267101e04c3fSmrg case CHIP_RV670: 267201e04c3fSmrg case CHIP_RV620: 267301e04c3fSmrg case CHIP_RV635: 267401e04c3fSmrg gsvs_itemsize = align(gsvs_itemsize, R600_GSVS_ALIGN); 267501e04c3fSmrg break; 267601e04c3fSmrg default: 267701e04c3fSmrg break; 267801e04c3fSmrg } 2679af69d88dSmrg 2680af69d88dSmrg r600_init_command_buffer(cb, 64); 2681af69d88dSmrg 2682af69d88dSmrg /* VGT_GS_MODE is written by r600_emit_shader_stages */ 2683af69d88dSmrg r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1); 2684af69d88dSmrg 2685af69d88dSmrg if (rctx->b.chip_class >= R700) { 2686af69d88dSmrg r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, 268701e04c3fSmrg S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); 2688af69d88dSmrg } 2689af69d88dSmrg r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 269001e04c3fSmrg r600_conv_prim_to_gs_out(shader->selector->gs_output_prim)); 2691af69d88dSmrg 269201e04c3fSmrg r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE, 269301e04c3fSmrg cp_shader->ring_item_sizes[0] >> 2); 2694af69d88dSmrg 2695af69d88dSmrg r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 269601e04c3fSmrg (rshader->ring_item_sizes[0]) >> 2); 2697af69d88dSmrg 2698af69d88dSmrg r600_store_context_reg(cb, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 2699af69d88dSmrg gsvs_itemsize); 2700af69d88dSmrg 2701af69d88dSmrg /* FIXME calculate these values somehow ??? */ 2702af69d88dSmrg r600_store_config_reg_seq(cb, R_0088C8_VGT_GS_PER_ES, 2); 2703af69d88dSmrg r600_store_value(cb, 0x80); /* GS_PER_ES */ 2704af69d88dSmrg r600_store_value(cb, 0x100); /* ES_PER_GS */ 2705af69d88dSmrg r600_store_config_reg_seq(cb, R_0088E8_VGT_GS_PER_VS, 1); 2706af69d88dSmrg r600_store_value(cb, 0x2); /* GS_PER_VS */ 2707af69d88dSmrg 2708af69d88dSmrg r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS, 2709af69d88dSmrg S_02887C_NUM_GPRS(rshader->bc.ngpr) | 271001e04c3fSmrg S_02887C_DX10_CLAMP(1) | 2711af69d88dSmrg S_02887C_STACK_SIZE(rshader->bc.nstack)); 2712af69d88dSmrg r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0); 2713af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 2714af69d88dSmrg} 2715af69d88dSmrg 2716af69d88dSmrgvoid r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) 2717af69d88dSmrg{ 2718af69d88dSmrg struct r600_command_buffer *cb = &shader->command_buffer; 2719af69d88dSmrg struct r600_shader *rshader = &shader->shader; 2720af69d88dSmrg 2721af69d88dSmrg r600_init_command_buffer(cb, 32); 2722af69d88dSmrg 2723af69d88dSmrg r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, 2724af69d88dSmrg S_028890_NUM_GPRS(rshader->bc.ngpr) | 272501e04c3fSmrg S_028890_DX10_CLAMP(1) | 2726af69d88dSmrg S_028890_STACK_SIZE(rshader->bc.nstack)); 2727af69d88dSmrg r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0); 2728af69d88dSmrg /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ 2729af69d88dSmrg} 2730af69d88dSmrg 2731af69d88dSmrg 2732af69d88dSmrgvoid *r600_create_resolve_blend(struct r600_context *rctx) 2733af69d88dSmrg{ 2734af69d88dSmrg struct pipe_blend_state blend; 2735af69d88dSmrg unsigned i; 2736af69d88dSmrg 2737af69d88dSmrg memset(&blend, 0, sizeof(blend)); 2738af69d88dSmrg blend.independent_blend_enable = true; 2739af69d88dSmrg for (i = 0; i < 2; i++) { 2740af69d88dSmrg blend.rt[i].colormask = 0xf; 2741af69d88dSmrg blend.rt[i].blend_enable = 1; 2742af69d88dSmrg blend.rt[i].rgb_func = PIPE_BLEND_ADD; 2743af69d88dSmrg blend.rt[i].alpha_func = PIPE_BLEND_ADD; 2744af69d88dSmrg blend.rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ZERO; 2745af69d88dSmrg blend.rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; 2746af69d88dSmrg blend.rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; 2747af69d88dSmrg blend.rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; 2748af69d88dSmrg } 2749af69d88dSmrg return r600_create_blend_state_mode(&rctx->b.b, &blend, V_028808_SPECIAL_RESOLVE_BOX); 2750af69d88dSmrg} 2751af69d88dSmrg 2752af69d88dSmrgvoid *r700_create_resolve_blend(struct r600_context *rctx) 2753af69d88dSmrg{ 2754af69d88dSmrg struct pipe_blend_state blend; 2755af69d88dSmrg 2756af69d88dSmrg memset(&blend, 0, sizeof(blend)); 2757af69d88dSmrg blend.independent_blend_enable = true; 2758af69d88dSmrg blend.rt[0].colormask = 0xf; 2759af69d88dSmrg return r600_create_blend_state_mode(&rctx->b.b, &blend, V_028808_SPECIAL_RESOLVE_BOX); 2760af69d88dSmrg} 2761af69d88dSmrg 2762af69d88dSmrgvoid *r600_create_decompress_blend(struct r600_context *rctx) 2763af69d88dSmrg{ 2764af69d88dSmrg struct pipe_blend_state blend; 2765af69d88dSmrg 2766af69d88dSmrg memset(&blend, 0, sizeof(blend)); 2767af69d88dSmrg blend.independent_blend_enable = true; 2768af69d88dSmrg blend.rt[0].colormask = 0xf; 2769af69d88dSmrg return r600_create_blend_state_mode(&rctx->b.b, &blend, V_028808_SPECIAL_EXPAND_SAMPLES); 2770af69d88dSmrg} 2771af69d88dSmrg 2772af69d88dSmrgvoid *r600_create_db_flush_dsa(struct r600_context *rctx) 27733464ebd5Sriastradh{ 27743464ebd5Sriastradh struct pipe_depth_stencil_alpha_state dsa; 27753464ebd5Sriastradh boolean quirk = false; 27763464ebd5Sriastradh 2777af69d88dSmrg if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || 2778af69d88dSmrg rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635) 27793464ebd5Sriastradh quirk = true; 27803464ebd5Sriastradh 27813464ebd5Sriastradh memset(&dsa, 0, sizeof(dsa)); 27823464ebd5Sriastradh 27833464ebd5Sriastradh if (quirk) { 27847ec681f3Smrg dsa.depth_enabled = 1; 27857ec681f3Smrg dsa.depth_func = PIPE_FUNC_LEQUAL; 27863464ebd5Sriastradh dsa.stencil[0].enabled = 1; 27873464ebd5Sriastradh dsa.stencil[0].func = PIPE_FUNC_ALWAYS; 27883464ebd5Sriastradh dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; 27893464ebd5Sriastradh dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_INCR; 27903464ebd5Sriastradh dsa.stencil[0].writemask = 0xff; 27913464ebd5Sriastradh } 27923464ebd5Sriastradh 2793af69d88dSmrg return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa); 2794af69d88dSmrg} 2795af69d88dSmrg 2796af69d88dSmrgvoid r600_update_db_shader_control(struct r600_context * rctx) 2797af69d88dSmrg{ 2798af69d88dSmrg bool dual_export; 2799af69d88dSmrg unsigned db_shader_control; 280001e04c3fSmrg uint8_t ps_conservative_z; 2801af69d88dSmrg 2802af69d88dSmrg if (!rctx->ps_shader) { 2803af69d88dSmrg return; 2804af69d88dSmrg } 2805af69d88dSmrg 2806af69d88dSmrg dual_export = rctx->framebuffer.export_16bpc && 2807af69d88dSmrg !rctx->ps_shader->current->ps_depth_export; 2808af69d88dSmrg 2809af69d88dSmrg db_shader_control = rctx->ps_shader->current->db_shader_control | 2810af69d88dSmrg S_02880C_DUAL_EXPORT_ENABLE(dual_export); 2811af69d88dSmrg 281201e04c3fSmrg ps_conservative_z = rctx->ps_shader->current->shader.ps_conservative_z; 281301e04c3fSmrg 2814af69d88dSmrg /* When alpha test is enabled we can't trust the hw to make the proper 2815af69d88dSmrg * decision on the order in which ztest should be run related to fragment 2816af69d88dSmrg * shader execution. 2817af69d88dSmrg * 2818af69d88dSmrg * If alpha test is enabled perform z test after fragment. RE_Z (early 2819af69d88dSmrg * z test but no write to the zbuffer) seems to cause lockup on r6xx/r7xx 2820af69d88dSmrg */ 2821af69d88dSmrg if (rctx->alphatest_state.sx_alpha_test_control) { 2822af69d88dSmrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 2823af69d88dSmrg } else { 2824af69d88dSmrg db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); 2825af69d88dSmrg } 2826af69d88dSmrg 282701e04c3fSmrg if (db_shader_control != rctx->db_misc_state.db_shader_control || 282801e04c3fSmrg ps_conservative_z != rctx->db_misc_state.ps_conservative_z) { 2829af69d88dSmrg rctx->db_misc_state.db_shader_control = db_shader_control; 283001e04c3fSmrg rctx->db_misc_state.ps_conservative_z = ps_conservative_z; 283101e04c3fSmrg r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); 2832af69d88dSmrg } 2833af69d88dSmrg} 2834af69d88dSmrg 283501e04c3fSmrgstatic inline unsigned r600_array_mode(unsigned mode) 2836af69d88dSmrg{ 2837af69d88dSmrg switch (mode) { 283801e04c3fSmrg default: 2839af69d88dSmrg case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED; 2840af69d88dSmrg break; 2841af69d88dSmrg case RADEON_SURF_MODE_1D: return V_0280A0_ARRAY_1D_TILED_THIN1; 2842af69d88dSmrg break; 2843af69d88dSmrg case RADEON_SURF_MODE_2D: return V_0280A0_ARRAY_2D_TILED_THIN1; 2844af69d88dSmrg } 2845af69d88dSmrg} 2846af69d88dSmrg 2847af69d88dSmrgstatic boolean r600_dma_copy_tile(struct r600_context *rctx, 2848af69d88dSmrg struct pipe_resource *dst, 2849af69d88dSmrg unsigned dst_level, 2850af69d88dSmrg unsigned dst_x, 2851af69d88dSmrg unsigned dst_y, 2852af69d88dSmrg unsigned dst_z, 2853af69d88dSmrg struct pipe_resource *src, 2854af69d88dSmrg unsigned src_level, 2855af69d88dSmrg unsigned src_x, 2856af69d88dSmrg unsigned src_y, 2857af69d88dSmrg unsigned src_z, 2858af69d88dSmrg unsigned copy_height, 2859af69d88dSmrg unsigned pitch, 2860af69d88dSmrg unsigned bpp) 2861af69d88dSmrg{ 28627ec681f3Smrg struct radeon_cmdbuf *cs = &rctx->b.dma.cs; 2863af69d88dSmrg struct r600_texture *rsrc = (struct r600_texture*)src; 2864af69d88dSmrg struct r600_texture *rdst = (struct r600_texture*)dst; 2865af69d88dSmrg unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; 2866af69d88dSmrg unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode; 2867af69d88dSmrg uint64_t base, addr; 2868af69d88dSmrg 286901e04c3fSmrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 287001e04c3fSmrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 2871af69d88dSmrg assert(dst_mode != src_mode); 2872af69d88dSmrg 2873af69d88dSmrg y = 0; 2874af69d88dSmrg lbpp = util_logbase2(bpp); 2875af69d88dSmrg pitch_tile_max = ((pitch / bpp) / 8) - 1; 2876af69d88dSmrg 287701e04c3fSmrg if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED) { 2878af69d88dSmrg /* T2L */ 2879af69d88dSmrg array_mode = r600_array_mode(src_mode); 288001e04c3fSmrg slice_tile_max = (rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.u.legacy.level[src_level].nblk_y) / (8*8); 2881af69d88dSmrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 2882af69d88dSmrg /* linear height must be the same as the slice tile max height, it's ok even 2883af69d88dSmrg * if the linear destination/source have smaller heigh as the size of the 2884af69d88dSmrg * dma packet will be using the copy_height which is always smaller or equal 2885af69d88dSmrg * to the linear height 2886af69d88dSmrg */ 288701e04c3fSmrg height = u_minify(rsrc->resource.b.b.height0, src_level); 2888af69d88dSmrg detile = 1; 2889af69d88dSmrg x = src_x; 2890af69d88dSmrg y = src_y; 2891af69d88dSmrg z = src_z; 28927ec681f3Smrg base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 28937ec681f3Smrg addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 289401e04c3fSmrg addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 2895af69d88dSmrg addr += dst_y * pitch + dst_x * bpp; 2896af69d88dSmrg } else { 2897af69d88dSmrg /* L2T */ 2898af69d88dSmrg array_mode = r600_array_mode(dst_mode); 289901e04c3fSmrg slice_tile_max = (rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.u.legacy.level[dst_level].nblk_y) / (8*8); 2900af69d88dSmrg slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; 2901af69d88dSmrg /* linear height must be the same as the slice tile max height, it's ok even 2902af69d88dSmrg * if the linear destination/source have smaller heigh as the size of the 2903af69d88dSmrg * dma packet will be using the copy_height which is always smaller or equal 2904af69d88dSmrg * to the linear height 2905af69d88dSmrg */ 290601e04c3fSmrg height = u_minify(rdst->resource.b.b.height0, dst_level); 2907af69d88dSmrg detile = 0; 2908af69d88dSmrg x = dst_x; 2909af69d88dSmrg y = dst_y; 2910af69d88dSmrg z = dst_z; 29117ec681f3Smrg base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 29127ec681f3Smrg addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 291301e04c3fSmrg addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z; 2914af69d88dSmrg addr += src_y * pitch + src_x * bpp; 2915af69d88dSmrg } 2916af69d88dSmrg /* check that we are in dw/base alignment constraint */ 2917af69d88dSmrg if (addr % 4 || base % 256) { 2918af69d88dSmrg return FALSE; 2919af69d88dSmrg } 2920af69d88dSmrg 2921af69d88dSmrg /* It's a r6xx/r7xx limitation, the blit must be on 8 boundary for number 2922af69d88dSmrg * line in the blit. Compute max 8 line we can copy in the size limit 2923af69d88dSmrg */ 2924af69d88dSmrg cheight = ((R600_DMA_COPY_MAX_SIZE_DW * 4) / pitch) & 0xfffffff8; 2925af69d88dSmrg ncopy = (copy_height / cheight) + !!(copy_height % cheight); 292601e04c3fSmrg r600_need_dma_space(&rctx->b, ncopy * 7, &rdst->resource, &rsrc->resource); 2927af69d88dSmrg 2928af69d88dSmrg for (i = 0; i < ncopy; i++) { 2929af69d88dSmrg cheight = cheight > copy_height ? copy_height : cheight; 2930af69d88dSmrg size = (cheight * pitch) / 4; 293101e04c3fSmrg /* emit reloc before writing cs so that cs is always in consistent state */ 293201e04c3fSmrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, 0); 293301e04c3fSmrg radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE, 0); 293401e04c3fSmrg radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 1, 0, size)); 293501e04c3fSmrg radeon_emit(cs, base >> 8); 293601e04c3fSmrg radeon_emit(cs, (detile << 31) | (array_mode << 27) | 293701e04c3fSmrg (lbpp << 24) | ((height - 1) << 10) | 293801e04c3fSmrg pitch_tile_max); 293901e04c3fSmrg radeon_emit(cs, (slice_tile_max << 12) | (z << 0)); 294001e04c3fSmrg radeon_emit(cs, (x << 3) | (y << 17)); 294101e04c3fSmrg radeon_emit(cs, addr & 0xfffffffc); 294201e04c3fSmrg radeon_emit(cs, (addr >> 32UL) & 0xff); 2943af69d88dSmrg copy_height -= cheight; 2944af69d88dSmrg addr += cheight * pitch; 2945af69d88dSmrg y += cheight; 2946af69d88dSmrg } 2947af69d88dSmrg return TRUE; 29483464ebd5Sriastradh} 29493464ebd5Sriastradh 2950af69d88dSmrgstatic void r600_dma_copy(struct pipe_context *ctx, 2951af69d88dSmrg struct pipe_resource *dst, 2952af69d88dSmrg unsigned dst_level, 2953af69d88dSmrg unsigned dstx, unsigned dsty, unsigned dstz, 2954af69d88dSmrg struct pipe_resource *src, 2955af69d88dSmrg unsigned src_level, 2956af69d88dSmrg const struct pipe_box *src_box) 29573464ebd5Sriastradh{ 2958af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 2959af69d88dSmrg struct r600_texture *rsrc = (struct r600_texture*)src; 2960af69d88dSmrg struct r600_texture *rdst = (struct r600_texture*)dst; 2961af69d88dSmrg unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height; 2962af69d88dSmrg unsigned src_w, dst_w; 2963af69d88dSmrg unsigned src_x, src_y; 2964af69d88dSmrg unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; 2965af69d88dSmrg 29667ec681f3Smrg if (rctx->b.dma.cs.priv == NULL) { 2967af69d88dSmrg goto fallback; 2968af69d88dSmrg } 2969af69d88dSmrg 2970af69d88dSmrg if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 2971af69d88dSmrg if (dst_x % 4 || src_box->x % 4 || src_box->width % 4) 2972af69d88dSmrg goto fallback; 2973af69d88dSmrg 2974af69d88dSmrg r600_dma_copy_buffer(rctx, dst, src, dst_x, src_box->x, src_box->width); 2975af69d88dSmrg return; 2976af69d88dSmrg } 2977af69d88dSmrg 297801e04c3fSmrg if (src_box->depth > 1 || 297901e04c3fSmrg !r600_prepare_for_dma_blit(&rctx->b, rdst, dst_level, dstx, dsty, 298001e04c3fSmrg dstz, rsrc, src_level, src_box)) 2981af69d88dSmrg goto fallback; 2982af69d88dSmrg 2983af69d88dSmrg src_x = util_format_get_nblocksx(src->format, src_box->x); 2984af69d88dSmrg dst_x = util_format_get_nblocksx(src->format, dst_x); 2985af69d88dSmrg src_y = util_format_get_nblocksy(src->format, src_box->y); 2986af69d88dSmrg dst_y = util_format_get_nblocksy(src->format, dst_y); 2987af69d88dSmrg 2988af69d88dSmrg bpp = rdst->surface.bpe; 298901e04c3fSmrg dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.bpe; 299001e04c3fSmrg src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.bpe; 299101e04c3fSmrg src_w = u_minify(rsrc->resource.b.b.width0, src_level); 299201e04c3fSmrg dst_w = u_minify(rdst->resource.b.b.width0, dst_level); 2993af69d88dSmrg copy_height = src_box->height / rsrc->surface.blk_h; 2994af69d88dSmrg 299501e04c3fSmrg dst_mode = rdst->surface.u.legacy.level[dst_level].mode; 299601e04c3fSmrg src_mode = rsrc->surface.u.legacy.level[src_level].mode; 2997af69d88dSmrg 2998af69d88dSmrg if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { 2999af69d88dSmrg /* strict requirement on r6xx/r7xx */ 3000af69d88dSmrg goto fallback; 3001af69d88dSmrg } 3002af69d88dSmrg /* lot of constraint on alignment this should capture them all */ 3003af69d88dSmrg if (src_pitch % 8 || src_box->y % 8 || dst_y % 8) { 3004af69d88dSmrg goto fallback; 3005af69d88dSmrg } 3006af69d88dSmrg 3007af69d88dSmrg if (src_mode == dst_mode) { 3008af69d88dSmrg uint64_t dst_offset, src_offset, size; 3009af69d88dSmrg 3010af69d88dSmrg /* simple dma blit would do NOTE code here assume : 3011af69d88dSmrg * src_box.x/y == 0 3012af69d88dSmrg * dst_x/y == 0 3013af69d88dSmrg * dst_pitch == src_pitch 3014af69d88dSmrg */ 30157ec681f3Smrg src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; 301601e04c3fSmrg src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z; 3017af69d88dSmrg src_offset += src_y * src_pitch + src_x * bpp; 30187ec681f3Smrg dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; 301901e04c3fSmrg dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; 3020af69d88dSmrg dst_offset += dst_y * dst_pitch + dst_x * bpp; 3021af69d88dSmrg size = src_box->height * src_pitch; 3022af69d88dSmrg /* must be dw aligned */ 3023af69d88dSmrg if (dst_offset % 4 || src_offset % 4 || size % 4) { 3024af69d88dSmrg goto fallback; 3025af69d88dSmrg } 3026af69d88dSmrg r600_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset, size); 3027af69d88dSmrg } else { 3028af69d88dSmrg if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z, 3029af69d88dSmrg src, src_level, src_x, src_y, src_box->z, 3030af69d88dSmrg copy_height, dst_pitch, bpp)) { 3031af69d88dSmrg goto fallback; 3032af69d88dSmrg } 3033af69d88dSmrg } 3034af69d88dSmrg return; 30353464ebd5Sriastradh 3036af69d88dSmrgfallback: 303701e04c3fSmrg r600_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, 3038af69d88dSmrg src, src_level, src_box); 30393464ebd5Sriastradh} 30403464ebd5Sriastradh 3041af69d88dSmrgvoid r600_init_state_functions(struct r600_context *rctx) 30423464ebd5Sriastradh{ 304301e04c3fSmrg unsigned id = 1; 304401e04c3fSmrg unsigned i; 3045af69d88dSmrg /* !!! 3046af69d88dSmrg * To avoid GPU lockup registers must be emited in a specific order 3047af69d88dSmrg * (no kidding ...). The order below is important and have been 3048af69d88dSmrg * partialy infered from analyzing fglrx command stream. 3049af69d88dSmrg * 3050af69d88dSmrg * Don't reorder atom without carefully checking the effect (GPU lockup 3051af69d88dSmrg * or piglit regression). 3052af69d88dSmrg * !!! 3053af69d88dSmrg */ 3054af69d88dSmrg 3055af69d88dSmrg r600_init_atom(rctx, &rctx->framebuffer.atom, id++, r600_emit_framebuffer_state, 0); 3056af69d88dSmrg 3057af69d88dSmrg /* shader const */ 3058af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, r600_emit_vs_constant_buffers, 0); 3059af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, r600_emit_gs_constant_buffers, 0); 3060af69d88dSmrg r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, r600_emit_ps_constant_buffers, 0); 3061af69d88dSmrg 3062af69d88dSmrg /* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change 3063af69d88dSmrg * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map) 3064af69d88dSmrg */ 3065af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, r600_emit_vs_sampler_states, 0); 3066af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, r600_emit_gs_sampler_states, 0); 3067af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, r600_emit_ps_sampler_states, 0); 3068af69d88dSmrg /* resource */ 3069af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, r600_emit_vs_sampler_views, 0); 3070af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, r600_emit_gs_sampler_views, 0); 3071af69d88dSmrg r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, r600_emit_ps_sampler_views, 0); 3072af69d88dSmrg r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, r600_emit_vertex_buffers, 0); 3073af69d88dSmrg 307401e04c3fSmrg r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); 3075af69d88dSmrg 3076af69d88dSmrg r600_init_atom(rctx, &rctx->seamless_cube_map.atom, id++, r600_emit_seamless_cube_map, 3); 3077af69d88dSmrg r600_init_atom(rctx, &rctx->sample_mask.atom, id++, r600_emit_sample_mask, 3); 3078af69d88dSmrg rctx->sample_mask.sample_mask = ~0; 3079af69d88dSmrg 3080af69d88dSmrg r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6); 3081af69d88dSmrg r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6); 3082af69d88dSmrg r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0); 3083af69d88dSmrg r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, r600_emit_cb_misc_state, 7); 3084af69d88dSmrg r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6); 3085af69d88dSmrg r600_init_atom(rctx, &rctx->clip_state.atom, id++, r600_emit_clip_state, 26); 3086af69d88dSmrg r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, r600_emit_db_misc_state, 7); 3087af69d88dSmrg r600_init_atom(rctx, &rctx->db_state.atom, id++, r600_emit_db_state, 11); 3088af69d88dSmrg r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0); 308901e04c3fSmrg r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 9); 3090af69d88dSmrg r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0); 309101e04c3fSmrg r600_add_atom(rctx, &rctx->b.scissors.atom, id++); 309201e04c3fSmrg r600_add_atom(rctx, &rctx->b.viewports.atom, id++); 3093af69d88dSmrg r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3); 3094af69d88dSmrg r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); 3095af69d88dSmrg r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); 309601e04c3fSmrg r600_add_atom(rctx, &rctx->b.render_cond_atom, id++); 309701e04c3fSmrg r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); 309801e04c3fSmrg r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); 309901e04c3fSmrg for (i = 0; i < R600_NUM_HW_STAGES; i++) 310001e04c3fSmrg r600_init_atom(rctx, &rctx->hw_shader_stages[i].atom, id++, r600_emit_shader, 0); 3101af69d88dSmrg r600_init_atom(rctx, &rctx->shader_stages.atom, id++, r600_emit_shader_stages, 0); 3102af69d88dSmrg r600_init_atom(rctx, &rctx->gs_rings.atom, id++, r600_emit_gs_rings, 0); 3103af69d88dSmrg 3104af69d88dSmrg rctx->b.b.create_blend_state = r600_create_blend_state; 3105af69d88dSmrg rctx->b.b.create_depth_stencil_alpha_state = r600_create_dsa_state; 3106af69d88dSmrg rctx->b.b.create_rasterizer_state = r600_create_rs_state; 3107af69d88dSmrg rctx->b.b.create_sampler_state = r600_create_sampler_state; 3108af69d88dSmrg rctx->b.b.create_sampler_view = r600_create_sampler_view; 3109af69d88dSmrg rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state; 3110af69d88dSmrg rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple; 311101e04c3fSmrg rctx->b.b.set_min_samples = r600_set_min_samples; 3112af69d88dSmrg rctx->b.b.get_sample_position = r600_get_sample_position; 3113af69d88dSmrg rctx->b.dma_copy = r600_dma_copy; 31143464ebd5Sriastradh} 3115af69d88dSmrg/* this function must be last */ 3116