1b8e80941Smrg/* 2b8e80941Smrg * Copyright (c) 2014-2015 Etnaviv Project 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the 12b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 13b8e80941Smrg * of the Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Wladimir J. van der Laan <laanwj@gmail.com> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "etnaviv_emit.h" 28b8e80941Smrg 29b8e80941Smrg#include "etnaviv_blend.h" 30b8e80941Smrg#include "etnaviv_compiler.h" 31b8e80941Smrg#include "etnaviv_context.h" 32b8e80941Smrg#include "etnaviv_rasterizer.h" 33b8e80941Smrg#include "etnaviv_resource.h" 34b8e80941Smrg#include "etnaviv_rs.h" 35b8e80941Smrg#include "etnaviv_screen.h" 36b8e80941Smrg#include "etnaviv_shader.h" 37b8e80941Smrg#include "etnaviv_texture.h" 38b8e80941Smrg#include "etnaviv_translate.h" 39b8e80941Smrg#include "etnaviv_uniforms.h" 40b8e80941Smrg#include "etnaviv_util.h" 41b8e80941Smrg#include "etnaviv_zsa.h" 42b8e80941Smrg#include "hw/common.xml.h" 43b8e80941Smrg#include "hw/state.xml.h" 44b8e80941Smrg#include "hw/state_blt.xml.h" 45b8e80941Smrg#include "util/u_math.h" 46b8e80941Smrg 47b8e80941Smrg/* Queue a STALL command (queues 2 words) */ 48b8e80941Smrgstatic inline void 49b8e80941SmrgCMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to) 50b8e80941Smrg{ 51b8e80941Smrg etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL); 52b8e80941Smrg etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to)); 53b8e80941Smrg} 54b8e80941Smrg 55b8e80941Smrgvoid 56b8e80941Smrgetna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to) 57b8e80941Smrg{ 58b8e80941Smrg bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT); 59b8e80941Smrg etna_cmd_stream_reserve(stream, blt ? 8 : 4); 60b8e80941Smrg 61b8e80941Smrg if (blt) { 62b8e80941Smrg etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0); 63b8e80941Smrg etna_cmd_stream_emit(stream, 1); 64b8e80941Smrg } 65b8e80941Smrg 66b8e80941Smrg /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */ 67b8e80941Smrg etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0); 68b8e80941Smrg etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to)); 69b8e80941Smrg 70b8e80941Smrg if (from == SYNC_RECIPIENT_FE) { 71b8e80941Smrg /* if the frontend is to be stalled, queue a STALL frontend command */ 72b8e80941Smrg CMD_STALL(stream, from, to); 73b8e80941Smrg } else { 74b8e80941Smrg /* otherwise, load the STALL token state */ 75b8e80941Smrg etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0); 76b8e80941Smrg etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to)); 77b8e80941Smrg } 78b8e80941Smrg 79b8e80941Smrg if (blt) { 80b8e80941Smrg etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0); 81b8e80941Smrg etna_cmd_stream_emit(stream, 0); 82b8e80941Smrg } 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrg#define EMIT_STATE(state_name, src_value) \ 86b8e80941Smrg etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value) 87b8e80941Smrg 88b8e80941Smrg#define EMIT_STATE_FIXP(state_name, src_value) \ 89b8e80941Smrg etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value) 90b8e80941Smrg 91b8e80941Smrg#define EMIT_STATE_RELOC(state_name, src_value) \ 92b8e80941Smrg etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value) 93b8e80941Smrg 94b8e80941Smrg#define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */ 95b8e80941Smrg 96b8e80941Smrgstatic unsigned 97b8e80941Smrgrequired_stream_size(struct etna_context *ctx) 98b8e80941Smrg{ 99b8e80941Smrg unsigned size = ETNA_3D_CONTEXT_SIZE; 100b8e80941Smrg 101b8e80941Smrg /* stall + flush */ 102b8e80941Smrg size += 2 + 4; 103b8e80941Smrg 104b8e80941Smrg /* vertex elements */ 105b8e80941Smrg size += ctx->vertex_elements->num_elements + 1; 106b8e80941Smrg 107b8e80941Smrg /* uniforms - worst case (2 words per uniform load) */ 108b8e80941Smrg size += ctx->shader.vs->uniforms.const_count * 2; 109b8e80941Smrg size += ctx->shader.fs->uniforms.const_count * 2; 110b8e80941Smrg 111b8e80941Smrg /* shader */ 112b8e80941Smrg size += ctx->shader_state.vs_inst_mem_size + 1; 113b8e80941Smrg size += ctx->shader_state.ps_inst_mem_size + 1; 114b8e80941Smrg 115b8e80941Smrg /* DRAW_INDEXED_PRIMITIVES command */ 116b8e80941Smrg size += 6; 117b8e80941Smrg 118b8e80941Smrg /* reserve for alignment etc. */ 119b8e80941Smrg size += 64; 120b8e80941Smrg 121b8e80941Smrg return size; 122b8e80941Smrg} 123b8e80941Smrg 124b8e80941Smrg/* Emit state that only exists on HALTI5+ */ 125b8e80941Smrgstatic void 126b8e80941Smrgemit_halti5_only_state(struct etna_context *ctx, int vs_output_count) 127b8e80941Smrg{ 128b8e80941Smrg struct etna_cmd_stream *stream = ctx->stream; 129b8e80941Smrg uint32_t dirty = ctx->dirty; 130b8e80941Smrg struct etna_coalesce coalesce; 131b8e80941Smrg 132b8e80941Smrg etna_coalesce_start(stream, &coalesce); 133b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 134b8e80941Smrg /* Magic states (load balancing, inter-unit sync, buffers) */ 135b8e80941Smrg /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8)); 136b8e80941Smrg /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20)); 137b8e80941Smrg for (int x = 0; x < 4; ++x) { 138b8e80941Smrg /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]); 139b8e80941Smrg } 140b8e80941Smrg } 141b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { 142b8e80941Smrg for (int x = 0; x < 4; ++x) { 143b8e80941Smrg /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]); 144b8e80941Smrg } 145b8e80941Smrg } 146b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 147b8e80941Smrg /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS); 148b8e80941Smrg /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count); 149b8e80941Smrg /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS); 150b8e80941Smrg /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS); 151b8e80941Smrg } 152b8e80941Smrg etna_coalesce_end(stream, &coalesce); 153b8e80941Smrg} 154b8e80941Smrg 155b8e80941Smrg/* Emit state that no longer exists on HALTI5 */ 156b8e80941Smrgstatic void 157b8e80941Smrgemit_pre_halti5_state(struct etna_context *ctx) 158b8e80941Smrg{ 159b8e80941Smrg struct etna_cmd_stream *stream = ctx->stream; 160b8e80941Smrg uint32_t dirty = ctx->dirty; 161b8e80941Smrg struct etna_coalesce coalesce; 162b8e80941Smrg 163b8e80941Smrg etna_coalesce_start(stream, &coalesce); 164b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 165b8e80941Smrg /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC); 166b8e80941Smrg } 167b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 168b8e80941Smrg for (int x = 0; x < 4; ++x) { 169b8e80941Smrg /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]); 170b8e80941Smrg } 171b8e80941Smrg } 172b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { 173b8e80941Smrg for (int x = 0; x < 4; ++x) { 174b8e80941Smrg /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]); 175b8e80941Smrg } 176b8e80941Smrg } 177b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 178b8e80941Smrg /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC); 179b8e80941Smrg } 180b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 181b8e80941Smrg for (int x = 0; x < 10; ++x) { 182b8e80941Smrg /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]); 183b8e80941Smrg } 184b8e80941Smrg } 185b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) { 186b8e80941Smrg /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04); 187b8e80941Smrg for (int x = 0; x < 4; ++x) { 188b8e80941Smrg /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]); 189b8e80941Smrg } 190b8e80941Smrg for (int x = 0; x < 16; ++x) { 191b8e80941Smrg /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]); 192b8e80941Smrg } 193b8e80941Smrg } 194b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { 195b8e80941Smrg /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC); 196b8e80941Smrg } 197b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { 198b8e80941Smrg /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC); 199b8e80941Smrg } 200b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 201b8e80941Smrg /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS); 202b8e80941Smrg for (int x = 0; x < 2; ++x) { 203b8e80941Smrg /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]); 204b8e80941Smrg } 205b8e80941Smrg } 206b8e80941Smrg etna_coalesce_end(stream, &coalesce); 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrg/* Weave state before draw operation. This function merges all the compiled 210b8e80941Smrg * state blocks under the context into one device register state. Parts of 211b8e80941Smrg * this state that are changed since last call (dirty) will be uploaded as 212b8e80941Smrg * state changes in the command buffer. */ 213b8e80941Smrgvoid 214b8e80941Smrgetna_emit_state(struct etna_context *ctx) 215b8e80941Smrg{ 216b8e80941Smrg struct etna_cmd_stream *stream = ctx->stream; 217b8e80941Smrg 218b8e80941Smrg /* Pre-reserve the command buffer space which we are likely to need. 219b8e80941Smrg * This must cover all the state emitted below, and the following 220b8e80941Smrg * draw command. */ 221b8e80941Smrg etna_cmd_stream_reserve(stream, required_stream_size(ctx)); 222b8e80941Smrg 223b8e80941Smrg uint32_t dirty = ctx->dirty; 224b8e80941Smrg 225b8e80941Smrg /* Pre-processing: see what caches we need to flush before making state changes. */ 226b8e80941Smrg uint32_t to_flush = 0; 227b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND))) { 228b8e80941Smrg /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */ 229b8e80941Smrg#if 0 230b8e80941Smrg /* TODO*/ 231b8e80941Smrg if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) != 232b8e80941Smrg (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE)) 233b8e80941Smrg#endif 234b8e80941Smrg to_flush |= VIVS_GL_FLUSH_CACHE_COLOR; 235b8e80941Smrg } 236b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES))) 237b8e80941Smrg to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE; 238b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */ 239b8e80941Smrg to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH; 240b8e80941Smrg if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL)) 241b8e80941Smrg to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH; 242b8e80941Smrg 243b8e80941Smrg if (to_flush) { 244b8e80941Smrg etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush); 245b8e80941Smrg etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); 246b8e80941Smrg } 247b8e80941Smrg 248b8e80941Smrg /* Flush TS cache before changing TS configuration. */ 249b8e80941Smrg if (unlikely(dirty & ETNA_DIRTY_TS)) { 250b8e80941Smrg etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH); 251b8e80941Smrg } 252b8e80941Smrg 253b8e80941Smrg /* Update vertex elements. This is different from any of the other states, in that 254b8e80941Smrg * a) the number of vertex elements written matters: so write only active ones 255b8e80941Smrg * b) the vertex element states must all be written: do not skip entries that stay the same */ 256b8e80941Smrg if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) { 257b8e80941Smrg if (ctx->specs.halti >= 5) { 258b8e80941Smrg /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0), 259b8e80941Smrg ctx->vertex_elements->num_elements, 260b8e80941Smrg ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0); 261b8e80941Smrg /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0), 262b8e80941Smrg ctx->vertex_elements->num_elements, 263b8e80941Smrg ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE); 264b8e80941Smrg /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0), 265b8e80941Smrg ctx->vertex_elements->num_elements, 266b8e80941Smrg ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1); 267b8e80941Smrg } else { 268b8e80941Smrg /* Special case: vertex elements must always be sent in full if changed */ 269b8e80941Smrg /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), 270b8e80941Smrg ctx->vertex_elements->num_elements, 271b8e80941Smrg ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG); 272b8e80941Smrg if (ctx->specs.halti >= 2) { 273b8e80941Smrg /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0), 274b8e80941Smrg ctx->vertex_elements->num_elements, 275b8e80941Smrg ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE); 276b8e80941Smrg } 277b8e80941Smrg } 278b8e80941Smrg } 279b8e80941Smrg unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex 280b8e80941Smrg ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE 281b8e80941Smrg : ctx->shader_state.VS_OUTPUT_COUNT; 282b8e80941Smrg 283b8e80941Smrg /* The following code is originally generated by gen_merge_state.py, to 284b8e80941Smrg * emit state in increasing order of address (this makes it possible to merge 285b8e80941Smrg * consecutive register updates into one SET_STATE command) 286b8e80941Smrg * 287b8e80941Smrg * There have been some manual changes, where the weaving operation is not 288b8e80941Smrg * simply bitwise or: 289b8e80941Smrg * - scissor fixp 290b8e80941Smrg * - num vertex elements 291b8e80941Smrg * - scissor handling 292b8e80941Smrg * - num samplers 293b8e80941Smrg * - texture lod 294b8e80941Smrg * - ETNA_DIRTY_TS 295b8e80941Smrg * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not 296b8e80941Smrg * change anyway 297b8e80941Smrg * - PS / framebuffer interaction for MSAA 298b8e80941Smrg * - move update of GL_MULTI_SAMPLE_CONFIG first 299b8e80941Smrg * - add unlikely()/likely() 300b8e80941Smrg */ 301b8e80941Smrg struct etna_coalesce coalesce; 302b8e80941Smrg 303b8e80941Smrg etna_coalesce_start(stream, &coalesce); 304b8e80941Smrg 305b8e80941Smrg /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here 306b8e80941Smrg * directly 307b8e80941Smrg * or indirectly */ 308b8e80941Smrg /* multi sample config is set first, and outside of the normal sorting 309b8e80941Smrg * order, as changing the multisample state clobbers PS.INPUT_COUNT (and 310b8e80941Smrg * possibly PS.TEMP_REGISTER_CONTROL). 311b8e80941Smrg */ 312b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) { 313b8e80941Smrg uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask); 314b8e80941Smrg val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG; 315b8e80941Smrg 316b8e80941Smrg /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val); 317b8e80941Smrg } 318b8e80941Smrg if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) { 319b8e80941Smrg /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR); 320b8e80941Smrg /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL); 321b8e80941Smrg } 322b8e80941Smrg if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) { 323b8e80941Smrg /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX); 324b8e80941Smrg } 325b8e80941Smrg if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) { 326b8e80941Smrg if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */ 327b8e80941Smrg for (int x = 0; x < ctx->vertex_buffer.count; ++x) { 328b8e80941Smrg /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); 329b8e80941Smrg } 330b8e80941Smrg for (int x = 0; x < ctx->vertex_buffer.count; ++x) { 331b8e80941Smrg if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { 332b8e80941Smrg /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); 333b8e80941Smrg } 334b8e80941Smrg } 335b8e80941Smrg for (int x = 0; x < ctx->vertex_buffer.count; ++x) { 336b8e80941Smrg if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { 337b8e80941Smrg /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680); 338b8e80941Smrg } 339b8e80941Smrg } 340b8e80941Smrg } else if(ctx->specs.stream_count >= 1) { /* hw w/ multiple vertex streams */ 341b8e80941Smrg for (int x = 0; x < ctx->vertex_buffer.count; ++x) { 342b8e80941Smrg /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); 343b8e80941Smrg } 344b8e80941Smrg for (int x = 0; x < ctx->vertex_buffer.count; ++x) { 345b8e80941Smrg if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { 346b8e80941Smrg /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); 347b8e80941Smrg } 348b8e80941Smrg } 349b8e80941Smrg } else { /* hw w/ single vertex stream */ 350b8e80941Smrg /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR); 351b8e80941Smrg /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL); 352b8e80941Smrg } 353b8e80941Smrg } 354b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) { 355b8e80941Smrg 356b8e80941Smrg /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count); 357b8e80941Smrg } 358b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { 359b8e80941Smrg /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT); 360b8e80941Smrg /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL); 361b8e80941Smrg } 362b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 363b8e80941Smrg /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING); 364b8e80941Smrg } 365b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) { 366b8e80941Smrg /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X); 367b8e80941Smrg /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y); 368b8e80941Smrg /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z); 369b8e80941Smrg /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X); 370b8e80941Smrg /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y); 371b8e80941Smrg /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z); 372b8e80941Smrg } 373b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) { 374b8e80941Smrg struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer); 375b8e80941Smrg 376b8e80941Smrg /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH); 377b8e80941Smrg /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE); 378b8e80941Smrg /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE); 379b8e80941Smrg } 380b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 381b8e80941Smrg /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT); 382b8e80941Smrg } 383b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) { 384b8e80941Smrg uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG; 385b8e80941Smrg /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG); 386b8e80941Smrg } 387b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) { 388b8e80941Smrg struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer); 389b8e80941Smrg /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH); 390b8e80941Smrg /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH); 391b8e80941Smrg } 392b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER | 393b8e80941Smrg ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) { 394b8e80941Smrg /* this is a bit of a mess: rasterizer.scissor determines whether to use 395b8e80941Smrg * only the framebuffer scissor, or specific scissor state, and the 396b8e80941Smrg * viewport clips too so the logic spans four CSOs */ 397b8e80941Smrg struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer); 398b8e80941Smrg 399b8e80941Smrg uint32_t scissor_left = 400b8e80941Smrg MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT); 401b8e80941Smrg uint32_t scissor_top = 402b8e80941Smrg MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP); 403b8e80941Smrg uint32_t scissor_right = 404b8e80941Smrg MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT); 405b8e80941Smrg uint32_t scissor_bottom = 406b8e80941Smrg MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM); 407b8e80941Smrg 408b8e80941Smrg if (rasterizer->scissor) { 409b8e80941Smrg scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left); 410b8e80941Smrg scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top); 411b8e80941Smrg scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right); 412b8e80941Smrg scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom); 413b8e80941Smrg } 414b8e80941Smrg 415b8e80941Smrg /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left); 416b8e80941Smrg /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top); 417b8e80941Smrg /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right); 418b8e80941Smrg /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom); 419b8e80941Smrg } 420b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) { 421b8e80941Smrg struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer); 422b8e80941Smrg 423b8e80941Smrg /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE); 424b8e80941Smrg /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS); 425b8e80941Smrg /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG); 426b8e80941Smrg } 427b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER | 428b8e80941Smrg ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) { 429b8e80941Smrg struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer); 430b8e80941Smrg 431b8e80941Smrg uint32_t clip_right = 432b8e80941Smrg MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT); 433b8e80941Smrg uint32_t clip_bottom = 434b8e80941Smrg MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM); 435b8e80941Smrg 436b8e80941Smrg if (rasterizer->scissor) { 437b8e80941Smrg clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right); 438b8e80941Smrg clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom); 439b8e80941Smrg } 440b8e80941Smrg 441b8e80941Smrg /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right); 442b8e80941Smrg /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom); 443b8e80941Smrg } 444b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 445b8e80941Smrg /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL); 446b8e80941Smrg } 447b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { 448b8e80941Smrg /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG); 449b8e80941Smrg /*01008*/ EMIT_STATE(PS_INPUT_COUNT, 450b8e80941Smrg ctx->framebuffer.msaa_mode 451b8e80941Smrg ? ctx->shader_state.PS_INPUT_COUNT_MSAA 452b8e80941Smrg : ctx->shader_state.PS_INPUT_COUNT); 453b8e80941Smrg /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, 454b8e80941Smrg ctx->framebuffer.msaa_mode 455b8e80941Smrg ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA 456b8e80941Smrg : ctx->shader_state.PS_TEMP_REGISTER_CONTROL); 457b8e80941Smrg /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL); 458b8e80941Smrg } 459b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) { 460b8e80941Smrg uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG; 461b8e80941Smrg /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG); 462b8e80941Smrg } 463b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) { 464b8e80941Smrg /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR); 465b8e80941Smrg /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR); 466b8e80941Smrg } 467b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) { 468b8e80941Smrg /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE); 469b8e80941Smrg 470b8e80941Smrg if (ctx->specs.pixel_pipes == 1) { 471b8e80941Smrg /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR); 472b8e80941Smrg } 473b8e80941Smrg 474b8e80941Smrg /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE); 475b8e80941Smrg } 476b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_ZSA))) { 477b8e80941Smrg uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP; 478b8e80941Smrg /*01418*/ EMIT_STATE(PE_STENCIL_OP, val); 479b8e80941Smrg } 480b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) { 481b8e80941Smrg uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG; 482b8e80941Smrg /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG); 483b8e80941Smrg } 484b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_ZSA))) { 485b8e80941Smrg uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP; 486b8e80941Smrg /*01420*/ EMIT_STATE(PE_ALPHA_OP, val); 487b8e80941Smrg } 488b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) { 489b8e80941Smrg /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR); 490b8e80941Smrg } 491b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND))) { 492b8e80941Smrg uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG; 493b8e80941Smrg /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val); 494b8e80941Smrg } 495b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) { 496b8e80941Smrg uint32_t val; 497b8e80941Smrg /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT 498b8e80941Smrg * as a mask to enable the bits from blend PE_COLOR_FORMAT */ 499b8e80941Smrg val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK | 500b8e80941Smrg VIVS_PE_COLOR_FORMAT_OVERWRITE); 501b8e80941Smrg val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT; 502b8e80941Smrg val &= ctx->framebuffer.PE_COLOR_FORMAT; 503b8e80941Smrg /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val); 504b8e80941Smrg } 505b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) { 506b8e80941Smrg if (ctx->specs.pixel_pipes == 1) { 507b8e80941Smrg /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR); 508b8e80941Smrg /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE); 509b8e80941Smrg /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL); 510b8e80941Smrg } else if (ctx->specs.pixel_pipes == 2) { 511b8e80941Smrg /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE); 512b8e80941Smrg /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL); 513b8e80941Smrg /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]); 514b8e80941Smrg /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]); 515b8e80941Smrg /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]); 516b8e80941Smrg /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]); 517b8e80941Smrg } else { 518b8e80941Smrg abort(); 519b8e80941Smrg } 520b8e80941Smrg } 521b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) { 522b8e80941Smrg /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT); 523b8e80941Smrg } 524b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) { 525b8e80941Smrg struct etna_blend_state *blend = etna_blend_state(ctx->blend); 526b8e80941Smrg /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP); 527b8e80941Smrg } 528b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_BLEND))) { 529b8e80941Smrg struct etna_blend_state *blend = etna_blend_state(ctx->blend); 530b8e80941Smrg for (int x = 0; x < 2; ++x) { 531b8e80941Smrg /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]); 532b8e80941Smrg } 533b8e80941Smrg } 534b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) { 535b8e80941Smrg /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG); 536b8e80941Smrg /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE); 537b8e80941Smrg /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE); 538b8e80941Smrg /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE); 539b8e80941Smrg /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE); 540b8e80941Smrg /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE); 541b8e80941Smrg /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE); 542b8e80941Smrg } 543b8e80941Smrg if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { 544b8e80941Smrg /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS); 545b8e80941Smrg } 546b8e80941Smrg etna_coalesce_end(stream, &coalesce); 547b8e80941Smrg /* end only EMIT_STATE */ 548b8e80941Smrg 549b8e80941Smrg /* Emit strongly architecture-specific state */ 550b8e80941Smrg if (ctx->specs.halti >= 5) 551b8e80941Smrg emit_halti5_only_state(ctx, vs_output_count); 552b8e80941Smrg else 553b8e80941Smrg emit_pre_halti5_state(ctx); 554b8e80941Smrg 555b8e80941Smrg ctx->emit_texture_state(ctx); 556b8e80941Smrg 557b8e80941Smrg /* Insert a FE/PE stall as changing the shader instructions (and maybe 558b8e80941Smrg * the uniforms) can corrupt the previous in-progress draw operation. 559b8e80941Smrg * Observed with amoeba on GC2000 during the right-to-left rendering 560b8e80941Smrg * of PI, and can cause GPU hangs immediately after. 561b8e80941Smrg * I summise that this is because the "new" locations at 0xc000 are not 562b8e80941Smrg * properly protected against updates as other states seem to be. Hence, 563b8e80941Smrg * we detect the "new" vertex shader instruction offset to apply this. */ 564b8e80941Smrg if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000) 565b8e80941Smrg etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE); 566b8e80941Smrg 567b8e80941Smrg /* We need to update the uniform cache only if one of the following bits are 568b8e80941Smrg * set in ctx->dirty: 569b8e80941Smrg * - ETNA_DIRTY_SHADER 570b8e80941Smrg * - ETNA_DIRTY_CONSTBUF 571b8e80941Smrg * - uniforms_dirty_bits 572b8e80941Smrg * 573b8e80941Smrg * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In 574b8e80941Smrg * all 575b8e80941Smrg * other cases we can load on the changed uniforms. 576b8e80941Smrg */ 577b8e80941Smrg static const uint32_t uniform_dirty_bits = 578b8e80941Smrg ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF; 579b8e80941Smrg 580b8e80941Smrg if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits)) 581b8e80941Smrg etna_uniforms_write( 582b8e80941Smrg ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX], 583b8e80941Smrg ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size); 584b8e80941Smrg 585b8e80941Smrg if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits)) 586b8e80941Smrg etna_uniforms_write( 587b8e80941Smrg ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT], 588b8e80941Smrg ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size); 589b8e80941Smrg 590b8e80941Smrg /**** Large dynamically-sized state ****/ 591b8e80941Smrg bool do_uniform_flush = ctx->specs.halti < 5; 592b8e80941Smrg if (dirty & (ETNA_DIRTY_SHADER)) { 593b8e80941Smrg /* Special case: a new shader was loaded; simply re-load all uniforms and 594b8e80941Smrg * shader code at once */ 595b8e80941Smrg /* This sequence is special, do not change ordering unless necessary. According to comment 596b8e80941Smrg snippets in the Vivante kernel driver a process called "steering" goes on while programming 597b8e80941Smrg shader state. This (as I understand it) means certain unified states are "steered" 598b8e80941Smrg toward a specific shader unit (VS/PS/...) based on either explicit flags in register 599b8e80941Smrg 00860, or what other state is written before "auto-steering". So this means some 600b8e80941Smrg state can legitimately be programmed multiple times. 601b8e80941Smrg */ 602b8e80941Smrg 603b8e80941Smrg if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */ 604b8e80941Smrg assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo); 605b8e80941Smrg /* Set icache (VS) */ 606b8e80941Smrg etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0); 607b8e80941Smrg etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4); 608b8e80941Smrg assert(ctx->shader_state.VS_INST_ADDR.bo); 609b8e80941Smrg etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR); 610b8e80941Smrg etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002); 611b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE); 612b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1); 613b8e80941Smrg 614b8e80941Smrg /* Set icache (PS) */ 615b8e80941Smrg etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0); 616b8e80941Smrg etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4); 617b8e80941Smrg assert(ctx->shader_state.PS_INST_ADDR.bo); 618b8e80941Smrg etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR); 619b8e80941Smrg etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002); 620b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE); 621b8e80941Smrg etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1); 622b8e80941Smrg 623b8e80941Smrg } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) { 624b8e80941Smrg /* ICACHE (pre-HALTI5) */ 625b8e80941Smrg assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers); 626b8e80941Smrg /* Set icache (VS) */ 627b8e80941Smrg etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); 628b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, 629b8e80941Smrg VIVS_VS_ICACHE_CONTROL_ENABLE | 630b8e80941Smrg VIVS_VS_ICACHE_CONTROL_FLUSH_VS); 631b8e80941Smrg assert(ctx->shader_state.VS_INST_ADDR.bo); 632b8e80941Smrg etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR); 633b8e80941Smrg 634b8e80941Smrg /* Set icache (PS) */ 635b8e80941Smrg etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16); 636b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, 637b8e80941Smrg VIVS_VS_ICACHE_CONTROL_ENABLE | 638b8e80941Smrg VIVS_VS_ICACHE_CONTROL_FLUSH_PS); 639b8e80941Smrg assert(ctx->shader_state.PS_INST_ADDR.bo); 640b8e80941Smrg etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR); 641b8e80941Smrg } else { 642b8e80941Smrg /* Upload shader directly, first flushing and disabling icache if 643b8e80941Smrg * supported on this hw */ 644b8e80941Smrg if (ctx->specs.has_icache) { 645b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, 646b8e80941Smrg VIVS_VS_ICACHE_CONTROL_FLUSH_PS | 647b8e80941Smrg VIVS_VS_ICACHE_CONTROL_FLUSH_VS); 648b8e80941Smrg } 649b8e80941Smrg if (ctx->specs.has_shader_range_registers) { 650b8e80941Smrg etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); 651b8e80941Smrg etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) | 652b8e80941Smrg 0x100); 653b8e80941Smrg } 654b8e80941Smrg etna_set_state_multi(stream, ctx->specs.vs_offset, 655b8e80941Smrg ctx->shader_state.vs_inst_mem_size, 656b8e80941Smrg ctx->shader_state.VS_INST_MEM); 657b8e80941Smrg etna_set_state_multi(stream, ctx->specs.ps_offset, 658b8e80941Smrg ctx->shader_state.ps_inst_mem_size, 659b8e80941Smrg ctx->shader_state.PS_INST_MEM); 660b8e80941Smrg } 661b8e80941Smrg 662b8e80941Smrg if (ctx->specs.has_unified_uniforms) { 663b8e80941Smrg etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0); 664b8e80941Smrg etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms); 665b8e80941Smrg } 666b8e80941Smrg 667b8e80941Smrg if (do_uniform_flush) 668b8e80941Smrg etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); 669b8e80941Smrg etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset, 670b8e80941Smrg ctx->shader_state.vs_uniforms_size, 671b8e80941Smrg ctx->shader_state.VS_UNIFORMS); 672b8e80941Smrg if (do_uniform_flush) 673b8e80941Smrg etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); 674b8e80941Smrg etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset, 675b8e80941Smrg ctx->shader_state.ps_uniforms_size, 676b8e80941Smrg ctx->shader_state.PS_UNIFORMS); 677b8e80941Smrg 678b8e80941Smrg /* Copy uniforms to gpu3d, so that incremental updates to uniforms are 679b8e80941Smrg * possible as long as the 680b8e80941Smrg * same shader remains bound */ 681b8e80941Smrg memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS, 682b8e80941Smrg ctx->shader_state.vs_uniforms_size * 4); 683b8e80941Smrg memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS, 684b8e80941Smrg ctx->shader_state.ps_uniforms_size * 4); 685b8e80941Smrg 686b8e80941Smrg if (ctx->specs.halti >= 5) { 687b8e80941Smrg /* HALTI5 needs to be prompted to pre-fetch shaders */ 688b8e80941Smrg etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000); 689b8e80941Smrg etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000); 690b8e80941Smrg etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); 691b8e80941Smrg } 692b8e80941Smrg } else { 693b8e80941Smrg /* ideally this cache would only be flushed if there are VS uniform changes */ 694b8e80941Smrg if (do_uniform_flush) 695b8e80941Smrg etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); 696b8e80941Smrg etna_coalesce_start(stream, &coalesce); 697b8e80941Smrg for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) { 698b8e80941Smrg if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) { 699b8e80941Smrg etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]); 700b8e80941Smrg ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x]; 701b8e80941Smrg } 702b8e80941Smrg } 703b8e80941Smrg etna_coalesce_end(stream, &coalesce); 704b8e80941Smrg 705b8e80941Smrg /* ideally this cache would only be flushed if there are PS uniform changes */ 706b8e80941Smrg if (do_uniform_flush) 707b8e80941Smrg etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); 708b8e80941Smrg etna_coalesce_start(stream, &coalesce); 709b8e80941Smrg for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) { 710b8e80941Smrg if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) { 711b8e80941Smrg etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]); 712b8e80941Smrg ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x]; 713b8e80941Smrg } 714b8e80941Smrg } 715b8e80941Smrg etna_coalesce_end(stream, &coalesce); 716b8e80941Smrg } 717b8e80941Smrg/**** End of state update ****/ 718b8e80941Smrg#undef EMIT_STATE 719b8e80941Smrg#undef EMIT_STATE_FIXP 720b8e80941Smrg#undef EMIT_STATE_RELOC 721b8e80941Smrg ctx->dirty = 0; 722b8e80941Smrg ctx->dirty_sampler_views = 0; 723b8e80941Smrg} 724