1b8e80941Smrg/*
2b8e80941Smrg * Copyright (c) 2014-2015 Etnaviv Project
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the
12b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions
13b8e80941Smrg * of the Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Wladimir J. van der Laan <laanwj@gmail.com>
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg#include "etnaviv_emit.h"
28b8e80941Smrg
29b8e80941Smrg#include "etnaviv_blend.h"
30b8e80941Smrg#include "etnaviv_compiler.h"
31b8e80941Smrg#include "etnaviv_context.h"
32b8e80941Smrg#include "etnaviv_rasterizer.h"
33b8e80941Smrg#include "etnaviv_resource.h"
34b8e80941Smrg#include "etnaviv_rs.h"
35b8e80941Smrg#include "etnaviv_screen.h"
36b8e80941Smrg#include "etnaviv_shader.h"
37b8e80941Smrg#include "etnaviv_texture.h"
38b8e80941Smrg#include "etnaviv_translate.h"
39b8e80941Smrg#include "etnaviv_uniforms.h"
40b8e80941Smrg#include "etnaviv_util.h"
41b8e80941Smrg#include "etnaviv_zsa.h"
42b8e80941Smrg#include "hw/common.xml.h"
43b8e80941Smrg#include "hw/state.xml.h"
44b8e80941Smrg#include "hw/state_blt.xml.h"
45b8e80941Smrg#include "util/u_math.h"
46b8e80941Smrg
47b8e80941Smrg/* Queue a STALL command (queues 2 words) */
48b8e80941Smrgstatic inline void
49b8e80941SmrgCMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
50b8e80941Smrg{
51b8e80941Smrg   etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
52b8e80941Smrg   etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
53b8e80941Smrg}
54b8e80941Smrg
55b8e80941Smrgvoid
56b8e80941Smrgetna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
57b8e80941Smrg{
58b8e80941Smrg   bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
59b8e80941Smrg   etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60b8e80941Smrg
61b8e80941Smrg   if (blt) {
62b8e80941Smrg      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
63b8e80941Smrg      etna_cmd_stream_emit(stream, 1);
64b8e80941Smrg   }
65b8e80941Smrg
66b8e80941Smrg   /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67b8e80941Smrg   etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
68b8e80941Smrg   etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
69b8e80941Smrg
70b8e80941Smrg   if (from == SYNC_RECIPIENT_FE) {
71b8e80941Smrg      /* if the frontend is to be stalled, queue a STALL frontend command */
72b8e80941Smrg      CMD_STALL(stream, from, to);
73b8e80941Smrg   } else {
74b8e80941Smrg      /* otherwise, load the STALL token state */
75b8e80941Smrg      etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
76b8e80941Smrg      etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
77b8e80941Smrg   }
78b8e80941Smrg
79b8e80941Smrg   if (blt) {
80b8e80941Smrg      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
81b8e80941Smrg      etna_cmd_stream_emit(stream, 0);
82b8e80941Smrg   }
83b8e80941Smrg}
84b8e80941Smrg
85b8e80941Smrg#define EMIT_STATE(state_name, src_value) \
86b8e80941Smrg   etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
87b8e80941Smrg
88b8e80941Smrg#define EMIT_STATE_FIXP(state_name, src_value) \
89b8e80941Smrg   etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
90b8e80941Smrg
91b8e80941Smrg#define EMIT_STATE_RELOC(state_name, src_value) \
92b8e80941Smrg   etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
93b8e80941Smrg
94b8e80941Smrg#define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
95b8e80941Smrg
96b8e80941Smrgstatic unsigned
97b8e80941Smrgrequired_stream_size(struct etna_context *ctx)
98b8e80941Smrg{
99b8e80941Smrg   unsigned size = ETNA_3D_CONTEXT_SIZE;
100b8e80941Smrg
101b8e80941Smrg   /* stall + flush */
102b8e80941Smrg   size += 2 + 4;
103b8e80941Smrg
104b8e80941Smrg   /* vertex elements */
105b8e80941Smrg   size += ctx->vertex_elements->num_elements + 1;
106b8e80941Smrg
107b8e80941Smrg   /* uniforms - worst case (2 words per uniform load) */
108b8e80941Smrg   size += ctx->shader.vs->uniforms.const_count * 2;
109b8e80941Smrg   size += ctx->shader.fs->uniforms.const_count * 2;
110b8e80941Smrg
111b8e80941Smrg   /* shader */
112b8e80941Smrg   size += ctx->shader_state.vs_inst_mem_size + 1;
113b8e80941Smrg   size += ctx->shader_state.ps_inst_mem_size + 1;
114b8e80941Smrg
115b8e80941Smrg   /* DRAW_INDEXED_PRIMITIVES command */
116b8e80941Smrg   size += 6;
117b8e80941Smrg
118b8e80941Smrg   /* reserve for alignment etc. */
119b8e80941Smrg   size += 64;
120b8e80941Smrg
121b8e80941Smrg   return size;
122b8e80941Smrg}
123b8e80941Smrg
124b8e80941Smrg/* Emit state that only exists on HALTI5+ */
125b8e80941Smrgstatic void
126b8e80941Smrgemit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
127b8e80941Smrg{
128b8e80941Smrg   struct etna_cmd_stream *stream = ctx->stream;
129b8e80941Smrg   uint32_t dirty = ctx->dirty;
130b8e80941Smrg   struct etna_coalesce coalesce;
131b8e80941Smrg
132b8e80941Smrg   etna_coalesce_start(stream, &coalesce);
133b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
134b8e80941Smrg      /* Magic states (load balancing, inter-unit sync, buffers) */
135b8e80941Smrg      /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
136b8e80941Smrg      /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
137b8e80941Smrg      for (int x = 0; x < 4; ++x) {
138b8e80941Smrg         /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
139b8e80941Smrg      }
140b8e80941Smrg   }
141b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
142b8e80941Smrg      for (int x = 0; x < 4; ++x) {
143b8e80941Smrg         /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
144b8e80941Smrg      }
145b8e80941Smrg   }
146b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
147b8e80941Smrg      /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
148b8e80941Smrg      /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
149b8e80941Smrg      /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
150b8e80941Smrg      /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
151b8e80941Smrg   }
152b8e80941Smrg   etna_coalesce_end(stream, &coalesce);
153b8e80941Smrg}
154b8e80941Smrg
155b8e80941Smrg/* Emit state that no longer exists on HALTI5 */
156b8e80941Smrgstatic void
157b8e80941Smrgemit_pre_halti5_state(struct etna_context *ctx)
158b8e80941Smrg{
159b8e80941Smrg   struct etna_cmd_stream *stream = ctx->stream;
160b8e80941Smrg   uint32_t dirty = ctx->dirty;
161b8e80941Smrg   struct etna_coalesce coalesce;
162b8e80941Smrg
163b8e80941Smrg   etna_coalesce_start(stream, &coalesce);
164b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
165b8e80941Smrg      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
166b8e80941Smrg   }
167b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
168b8e80941Smrg      for (int x = 0; x < 4; ++x) {
169b8e80941Smrg        /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
170b8e80941Smrg      }
171b8e80941Smrg   }
172b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
173b8e80941Smrg      for (int x = 0; x < 4; ++x) {
174b8e80941Smrg        /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
175b8e80941Smrg      }
176b8e80941Smrg   }
177b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
178b8e80941Smrg      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
179b8e80941Smrg   }
180b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
181b8e80941Smrg      for (int x = 0; x < 10; ++x) {
182b8e80941Smrg         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
183b8e80941Smrg      }
184b8e80941Smrg   }
185b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
186b8e80941Smrg      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
187b8e80941Smrg      for (int x = 0; x < 4; ++x) {
188b8e80941Smrg         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
189b8e80941Smrg      }
190b8e80941Smrg      for (int x = 0; x < 16; ++x) {
191b8e80941Smrg         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
192b8e80941Smrg      }
193b8e80941Smrg   }
194b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
195b8e80941Smrg      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
196b8e80941Smrg   }
197b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
198b8e80941Smrg      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
199b8e80941Smrg   }
200b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
201b8e80941Smrg      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
202b8e80941Smrg      for (int x = 0; x < 2; ++x) {
203b8e80941Smrg         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
204b8e80941Smrg      }
205b8e80941Smrg   }
206b8e80941Smrg   etna_coalesce_end(stream, &coalesce);
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrg/* Weave state before draw operation. This function merges all the compiled
210b8e80941Smrg * state blocks under the context into one device register state. Parts of
211b8e80941Smrg * this state that are changed since last call (dirty) will be uploaded as
212b8e80941Smrg * state changes in the command buffer. */
213b8e80941Smrgvoid
214b8e80941Smrgetna_emit_state(struct etna_context *ctx)
215b8e80941Smrg{
216b8e80941Smrg   struct etna_cmd_stream *stream = ctx->stream;
217b8e80941Smrg
218b8e80941Smrg   /* Pre-reserve the command buffer space which we are likely to need.
219b8e80941Smrg    * This must cover all the state emitted below, and the following
220b8e80941Smrg    * draw command. */
221b8e80941Smrg   etna_cmd_stream_reserve(stream, required_stream_size(ctx));
222b8e80941Smrg
223b8e80941Smrg   uint32_t dirty = ctx->dirty;
224b8e80941Smrg
225b8e80941Smrg   /* Pre-processing: see what caches we need to flush before making state changes. */
226b8e80941Smrg   uint32_t to_flush = 0;
227b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
228b8e80941Smrg      /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
229b8e80941Smrg#if 0
230b8e80941Smrg        /* TODO*/
231b8e80941Smrg        if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
232b8e80941Smrg           (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
233b8e80941Smrg#endif
234b8e80941Smrg      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
235b8e80941Smrg   }
236b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
237b8e80941Smrg      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
238b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
239b8e80941Smrg      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
240b8e80941Smrg   if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
241b8e80941Smrg      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
242b8e80941Smrg
243b8e80941Smrg   if (to_flush) {
244b8e80941Smrg      etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
245b8e80941Smrg      etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
246b8e80941Smrg   }
247b8e80941Smrg
248b8e80941Smrg   /* Flush TS cache before changing TS configuration. */
249b8e80941Smrg   if (unlikely(dirty & ETNA_DIRTY_TS)) {
250b8e80941Smrg      etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
251b8e80941Smrg   }
252b8e80941Smrg
253b8e80941Smrg   /* Update vertex elements. This is different from any of the other states, in that
254b8e80941Smrg    * a) the number of vertex elements written matters: so write only active ones
255b8e80941Smrg    * b) the vertex element states must all be written: do not skip entries that stay the same */
256b8e80941Smrg   if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
257b8e80941Smrg      if (ctx->specs.halti >= 5) {
258b8e80941Smrg         /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
259b8e80941Smrg            ctx->vertex_elements->num_elements,
260b8e80941Smrg            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
261b8e80941Smrg         /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
262b8e80941Smrg            ctx->vertex_elements->num_elements,
263b8e80941Smrg            ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
264b8e80941Smrg         /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
265b8e80941Smrg            ctx->vertex_elements->num_elements,
266b8e80941Smrg            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
267b8e80941Smrg      } else {
268b8e80941Smrg         /* Special case: vertex elements must always be sent in full if changed */
269b8e80941Smrg         /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
270b8e80941Smrg            ctx->vertex_elements->num_elements,
271b8e80941Smrg            ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
272b8e80941Smrg         if (ctx->specs.halti >= 2) {
273b8e80941Smrg            /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
274b8e80941Smrg               ctx->vertex_elements->num_elements,
275b8e80941Smrg               ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
276b8e80941Smrg         }
277b8e80941Smrg      }
278b8e80941Smrg   }
279b8e80941Smrg   unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
280b8e80941Smrg                           ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
281b8e80941Smrg                           : ctx->shader_state.VS_OUTPUT_COUNT;
282b8e80941Smrg
283b8e80941Smrg   /* The following code is originally generated by gen_merge_state.py, to
284b8e80941Smrg    * emit state in increasing order of address (this makes it possible to merge
285b8e80941Smrg    * consecutive register updates into one SET_STATE command)
286b8e80941Smrg    *
287b8e80941Smrg    * There have been some manual changes, where the weaving operation is not
288b8e80941Smrg    * simply bitwise or:
289b8e80941Smrg    * - scissor fixp
290b8e80941Smrg    * - num vertex elements
291b8e80941Smrg    * - scissor handling
292b8e80941Smrg    * - num samplers
293b8e80941Smrg    * - texture lod
294b8e80941Smrg    * - ETNA_DIRTY_TS
295b8e80941Smrg    * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
296b8e80941Smrg    * change anyway
297b8e80941Smrg    * - PS / framebuffer interaction for MSAA
298b8e80941Smrg    * - move update of GL_MULTI_SAMPLE_CONFIG first
299b8e80941Smrg    * - add unlikely()/likely()
300b8e80941Smrg    */
301b8e80941Smrg   struct etna_coalesce coalesce;
302b8e80941Smrg
303b8e80941Smrg   etna_coalesce_start(stream, &coalesce);
304b8e80941Smrg
305b8e80941Smrg   /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
306b8e80941Smrg    * directly
307b8e80941Smrg    *    or indirectly */
308b8e80941Smrg   /* multi sample config is set first, and outside of the normal sorting
309b8e80941Smrg    * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
310b8e80941Smrg    * possibly PS.TEMP_REGISTER_CONTROL).
311b8e80941Smrg    */
312b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
313b8e80941Smrg      uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
314b8e80941Smrg      val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
315b8e80941Smrg
316b8e80941Smrg      /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
317b8e80941Smrg   }
318b8e80941Smrg   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
319b8e80941Smrg      /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
320b8e80941Smrg      /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
321b8e80941Smrg   }
322b8e80941Smrg   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
323b8e80941Smrg      /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
324b8e80941Smrg   }
325b8e80941Smrg   if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
326b8e80941Smrg      if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
327b8e80941Smrg         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
328b8e80941Smrg            /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
329b8e80941Smrg         }
330b8e80941Smrg         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
331b8e80941Smrg            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
332b8e80941Smrg               /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
333b8e80941Smrg            }
334b8e80941Smrg         }
335b8e80941Smrg         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
336b8e80941Smrg            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
337b8e80941Smrg               /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680);
338b8e80941Smrg            }
339b8e80941Smrg         }
340b8e80941Smrg      } else if(ctx->specs.stream_count >= 1) { /* hw w/ multiple vertex streams */
341b8e80941Smrg         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
342b8e80941Smrg            /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
343b8e80941Smrg         }
344b8e80941Smrg         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
345b8e80941Smrg            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
346b8e80941Smrg               /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
347b8e80941Smrg            }
348b8e80941Smrg         }
349b8e80941Smrg      } else { /* hw w/ single vertex stream */
350b8e80941Smrg         /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
351b8e80941Smrg         /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
352b8e80941Smrg      }
353b8e80941Smrg   }
354b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
355b8e80941Smrg
356b8e80941Smrg      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
357b8e80941Smrg   }
358b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
359b8e80941Smrg      /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
360b8e80941Smrg      /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
361b8e80941Smrg   }
362b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
363b8e80941Smrg      /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
364b8e80941Smrg   }
365b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
366b8e80941Smrg      /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
367b8e80941Smrg      /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
368b8e80941Smrg      /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
369b8e80941Smrg      /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
370b8e80941Smrg      /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
371b8e80941Smrg      /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
372b8e80941Smrg   }
373b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
374b8e80941Smrg      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
375b8e80941Smrg
376b8e80941Smrg      /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
377b8e80941Smrg      /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
378b8e80941Smrg      /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
379b8e80941Smrg   }
380b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
381b8e80941Smrg      /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
382b8e80941Smrg   }
383b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
384b8e80941Smrg      uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
385b8e80941Smrg      /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
386b8e80941Smrg   }
387b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
388b8e80941Smrg      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
389b8e80941Smrg      /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
390b8e80941Smrg      /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
391b8e80941Smrg   }
392b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
393b8e80941Smrg                         ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
394b8e80941Smrg      /* this is a bit of a mess: rasterizer.scissor determines whether to use
395b8e80941Smrg       * only the framebuffer scissor, or specific scissor state, and the
396b8e80941Smrg       * viewport clips too so the logic spans four CSOs */
397b8e80941Smrg      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
398b8e80941Smrg
399b8e80941Smrg      uint32_t scissor_left =
400b8e80941Smrg         MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
401b8e80941Smrg      uint32_t scissor_top =
402b8e80941Smrg         MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
403b8e80941Smrg      uint32_t scissor_right =
404b8e80941Smrg         MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
405b8e80941Smrg      uint32_t scissor_bottom =
406b8e80941Smrg         MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
407b8e80941Smrg
408b8e80941Smrg      if (rasterizer->scissor) {
409b8e80941Smrg         scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
410b8e80941Smrg         scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
411b8e80941Smrg         scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
412b8e80941Smrg         scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
413b8e80941Smrg      }
414b8e80941Smrg
415b8e80941Smrg      /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
416b8e80941Smrg      /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
417b8e80941Smrg      /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
418b8e80941Smrg      /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
419b8e80941Smrg   }
420b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
421b8e80941Smrg      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
422b8e80941Smrg
423b8e80941Smrg      /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
424b8e80941Smrg      /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
425b8e80941Smrg      /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
426b8e80941Smrg   }
427b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
428b8e80941Smrg                         ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
429b8e80941Smrg      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
430b8e80941Smrg
431b8e80941Smrg      uint32_t clip_right =
432b8e80941Smrg         MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
433b8e80941Smrg      uint32_t clip_bottom =
434b8e80941Smrg         MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
435b8e80941Smrg
436b8e80941Smrg      if (rasterizer->scissor) {
437b8e80941Smrg         clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
438b8e80941Smrg         clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
439b8e80941Smrg      }
440b8e80941Smrg
441b8e80941Smrg      /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
442b8e80941Smrg      /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
443b8e80941Smrg   }
444b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
445b8e80941Smrg      /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
446b8e80941Smrg   }
447b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
448b8e80941Smrg      /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
449b8e80941Smrg      /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
450b8e80941Smrg                           ctx->framebuffer.msaa_mode
451b8e80941Smrg                              ? ctx->shader_state.PS_INPUT_COUNT_MSAA
452b8e80941Smrg                              : ctx->shader_state.PS_INPUT_COUNT);
453b8e80941Smrg      /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
454b8e80941Smrg                           ctx->framebuffer.msaa_mode
455b8e80941Smrg                              ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
456b8e80941Smrg                              : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
457b8e80941Smrg      /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
458b8e80941Smrg   }
459b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
460b8e80941Smrg      uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
461b8e80941Smrg      /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
462b8e80941Smrg   }
463b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
464b8e80941Smrg      /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
465b8e80941Smrg      /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
466b8e80941Smrg   }
467b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
468b8e80941Smrg      /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
469b8e80941Smrg
470b8e80941Smrg      if (ctx->specs.pixel_pipes == 1) {
471b8e80941Smrg         /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
472b8e80941Smrg      }
473b8e80941Smrg
474b8e80941Smrg      /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
475b8e80941Smrg   }
476b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
477b8e80941Smrg      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
478b8e80941Smrg      /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
479b8e80941Smrg   }
480b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
481b8e80941Smrg      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
482b8e80941Smrg      /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
483b8e80941Smrg   }
484b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
485b8e80941Smrg      uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
486b8e80941Smrg      /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
487b8e80941Smrg   }
488b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
489b8e80941Smrg      /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
490b8e80941Smrg   }
491b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
492b8e80941Smrg      uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
493b8e80941Smrg      /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
494b8e80941Smrg   }
495b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
496b8e80941Smrg      uint32_t val;
497b8e80941Smrg      /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
498b8e80941Smrg       * as a mask to enable the bits from blend PE_COLOR_FORMAT */
499b8e80941Smrg      val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
500b8e80941Smrg              VIVS_PE_COLOR_FORMAT_OVERWRITE);
501b8e80941Smrg      val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
502b8e80941Smrg      val &= ctx->framebuffer.PE_COLOR_FORMAT;
503b8e80941Smrg      /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
504b8e80941Smrg   }
505b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
506b8e80941Smrg      if (ctx->specs.pixel_pipes == 1) {
507b8e80941Smrg         /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
508b8e80941Smrg         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
509b8e80941Smrg         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
510b8e80941Smrg      } else if (ctx->specs.pixel_pipes == 2) {
511b8e80941Smrg         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
512b8e80941Smrg         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
513b8e80941Smrg         /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
514b8e80941Smrg         /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
515b8e80941Smrg         /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
516b8e80941Smrg         /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
517b8e80941Smrg      } else {
518b8e80941Smrg         abort();
519b8e80941Smrg      }
520b8e80941Smrg   }
521b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
522b8e80941Smrg      /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
523b8e80941Smrg   }
524b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
525b8e80941Smrg      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
526b8e80941Smrg      /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
527b8e80941Smrg   }
528b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
529b8e80941Smrg      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
530b8e80941Smrg      for (int x = 0; x < 2; ++x) {
531b8e80941Smrg         /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
532b8e80941Smrg      }
533b8e80941Smrg   }
534b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
535b8e80941Smrg      /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
536b8e80941Smrg      /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
537b8e80941Smrg      /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
538b8e80941Smrg      /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
539b8e80941Smrg      /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
540b8e80941Smrg      /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
541b8e80941Smrg      /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
542b8e80941Smrg   }
543b8e80941Smrg   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
544b8e80941Smrg      /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
545b8e80941Smrg   }
546b8e80941Smrg   etna_coalesce_end(stream, &coalesce);
547b8e80941Smrg   /* end only EMIT_STATE */
548b8e80941Smrg
549b8e80941Smrg   /* Emit strongly architecture-specific state */
550b8e80941Smrg   if (ctx->specs.halti >= 5)
551b8e80941Smrg      emit_halti5_only_state(ctx, vs_output_count);
552b8e80941Smrg   else
553b8e80941Smrg      emit_pre_halti5_state(ctx);
554b8e80941Smrg
555b8e80941Smrg   ctx->emit_texture_state(ctx);
556b8e80941Smrg
557b8e80941Smrg   /* Insert a FE/PE stall as changing the shader instructions (and maybe
558b8e80941Smrg    * the uniforms) can corrupt the previous in-progress draw operation.
559b8e80941Smrg    * Observed with amoeba on GC2000 during the right-to-left rendering
560b8e80941Smrg    * of PI, and can cause GPU hangs immediately after.
561b8e80941Smrg    * I summise that this is because the "new" locations at 0xc000 are not
562b8e80941Smrg    * properly protected against updates as other states seem to be. Hence,
563b8e80941Smrg    * we detect the "new" vertex shader instruction offset to apply this. */
564b8e80941Smrg   if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
565b8e80941Smrg      etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
566b8e80941Smrg
567b8e80941Smrg   /* We need to update the uniform cache only if one of the following bits are
568b8e80941Smrg    * set in ctx->dirty:
569b8e80941Smrg    * - ETNA_DIRTY_SHADER
570b8e80941Smrg    * - ETNA_DIRTY_CONSTBUF
571b8e80941Smrg    * - uniforms_dirty_bits
572b8e80941Smrg    *
573b8e80941Smrg    * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
574b8e80941Smrg    * all
575b8e80941Smrg    * other cases we can load on the changed uniforms.
576b8e80941Smrg    */
577b8e80941Smrg   static const uint32_t uniform_dirty_bits =
578b8e80941Smrg      ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
579b8e80941Smrg
580b8e80941Smrg   if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
581b8e80941Smrg      etna_uniforms_write(
582b8e80941Smrg         ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
583b8e80941Smrg         ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
584b8e80941Smrg
585b8e80941Smrg   if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
586b8e80941Smrg      etna_uniforms_write(
587b8e80941Smrg         ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
588b8e80941Smrg         ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
589b8e80941Smrg
590b8e80941Smrg   /**** Large dynamically-sized state ****/
591b8e80941Smrg   bool do_uniform_flush = ctx->specs.halti < 5;
592b8e80941Smrg   if (dirty & (ETNA_DIRTY_SHADER)) {
593b8e80941Smrg      /* Special case: a new shader was loaded; simply re-load all uniforms and
594b8e80941Smrg       * shader code at once */
595b8e80941Smrg      /* This sequence is special, do not change ordering unless necessary. According to comment
596b8e80941Smrg         snippets in the Vivante kernel driver a process called "steering" goes on while programming
597b8e80941Smrg         shader state. This (as I understand it) means certain unified states are "steered"
598b8e80941Smrg         toward a specific shader unit (VS/PS/...) based on either explicit flags in register
599b8e80941Smrg         00860, or what other state is written before "auto-steering". So this means some
600b8e80941Smrg         state can legitimately be programmed multiple times.
601b8e80941Smrg       */
602b8e80941Smrg
603b8e80941Smrg      if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
604b8e80941Smrg         assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
605b8e80941Smrg         /* Set icache (VS) */
606b8e80941Smrg         etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
607b8e80941Smrg         etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
608b8e80941Smrg         assert(ctx->shader_state.VS_INST_ADDR.bo);
609b8e80941Smrg         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
610b8e80941Smrg         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
611b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
612b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
613b8e80941Smrg
614b8e80941Smrg         /* Set icache (PS) */
615b8e80941Smrg         etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
616b8e80941Smrg         etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
617b8e80941Smrg         assert(ctx->shader_state.PS_INST_ADDR.bo);
618b8e80941Smrg         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
619b8e80941Smrg         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
620b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
621b8e80941Smrg         etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
622b8e80941Smrg
623b8e80941Smrg      } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
624b8e80941Smrg         /* ICACHE (pre-HALTI5) */
625b8e80941Smrg         assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
626b8e80941Smrg         /* Set icache (VS) */
627b8e80941Smrg         etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
628b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
629b8e80941Smrg               VIVS_VS_ICACHE_CONTROL_ENABLE |
630b8e80941Smrg               VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
631b8e80941Smrg         assert(ctx->shader_state.VS_INST_ADDR.bo);
632b8e80941Smrg         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
633b8e80941Smrg
634b8e80941Smrg         /* Set icache (PS) */
635b8e80941Smrg         etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
636b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
637b8e80941Smrg               VIVS_VS_ICACHE_CONTROL_ENABLE |
638b8e80941Smrg               VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
639b8e80941Smrg         assert(ctx->shader_state.PS_INST_ADDR.bo);
640b8e80941Smrg         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
641b8e80941Smrg      } else {
642b8e80941Smrg         /* Upload shader directly, first flushing and disabling icache if
643b8e80941Smrg          * supported on this hw */
644b8e80941Smrg         if (ctx->specs.has_icache) {
645b8e80941Smrg            etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
646b8e80941Smrg                  VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
647b8e80941Smrg                  VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
648b8e80941Smrg         }
649b8e80941Smrg         if (ctx->specs.has_shader_range_registers) {
650b8e80941Smrg            etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
651b8e80941Smrg            etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
652b8e80941Smrg                                        0x100);
653b8e80941Smrg         }
654b8e80941Smrg         etna_set_state_multi(stream, ctx->specs.vs_offset,
655b8e80941Smrg                              ctx->shader_state.vs_inst_mem_size,
656b8e80941Smrg                              ctx->shader_state.VS_INST_MEM);
657b8e80941Smrg         etna_set_state_multi(stream, ctx->specs.ps_offset,
658b8e80941Smrg                              ctx->shader_state.ps_inst_mem_size,
659b8e80941Smrg                              ctx->shader_state.PS_INST_MEM);
660b8e80941Smrg      }
661b8e80941Smrg
662b8e80941Smrg      if (ctx->specs.has_unified_uniforms) {
663b8e80941Smrg         etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
664b8e80941Smrg         etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
665b8e80941Smrg      }
666b8e80941Smrg
667b8e80941Smrg      if (do_uniform_flush)
668b8e80941Smrg         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
669b8e80941Smrg      etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
670b8e80941Smrg                                     ctx->shader_state.vs_uniforms_size,
671b8e80941Smrg                                     ctx->shader_state.VS_UNIFORMS);
672b8e80941Smrg      if (do_uniform_flush)
673b8e80941Smrg         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
674b8e80941Smrg      etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
675b8e80941Smrg                                     ctx->shader_state.ps_uniforms_size,
676b8e80941Smrg                                     ctx->shader_state.PS_UNIFORMS);
677b8e80941Smrg
678b8e80941Smrg      /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
679b8e80941Smrg       * possible as long as the
680b8e80941Smrg       * same shader remains bound */
681b8e80941Smrg      memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
682b8e80941Smrg             ctx->shader_state.vs_uniforms_size * 4);
683b8e80941Smrg      memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
684b8e80941Smrg             ctx->shader_state.ps_uniforms_size * 4);
685b8e80941Smrg
686b8e80941Smrg      if (ctx->specs.halti >= 5) {
687b8e80941Smrg         /* HALTI5 needs to be prompted to pre-fetch shaders */
688b8e80941Smrg         etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
689b8e80941Smrg         etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
690b8e80941Smrg         etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
691b8e80941Smrg      }
692b8e80941Smrg   } else {
693b8e80941Smrg      /* ideally this cache would only be flushed if there are VS uniform changes */
694b8e80941Smrg      if (do_uniform_flush)
695b8e80941Smrg         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
696b8e80941Smrg      etna_coalesce_start(stream, &coalesce);
697b8e80941Smrg      for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
698b8e80941Smrg         if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
699b8e80941Smrg            etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
700b8e80941Smrg            ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
701b8e80941Smrg         }
702b8e80941Smrg      }
703b8e80941Smrg      etna_coalesce_end(stream, &coalesce);
704b8e80941Smrg
705b8e80941Smrg      /* ideally this cache would only be flushed if there are PS uniform changes */
706b8e80941Smrg      if (do_uniform_flush)
707b8e80941Smrg         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
708b8e80941Smrg      etna_coalesce_start(stream, &coalesce);
709b8e80941Smrg      for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
710b8e80941Smrg         if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
711b8e80941Smrg            etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
712b8e80941Smrg            ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
713b8e80941Smrg         }
714b8e80941Smrg      }
715b8e80941Smrg      etna_coalesce_end(stream, &coalesce);
716b8e80941Smrg   }
717b8e80941Smrg/**** End of state update ****/
718b8e80941Smrg#undef EMIT_STATE
719b8e80941Smrg#undef EMIT_STATE_FIXP
720b8e80941Smrg#undef EMIT_STATE_RELOC
721b8e80941Smrg   ctx->dirty = 0;
722b8e80941Smrg   ctx->dirty_sampler_views = 0;
723b8e80941Smrg}
724