1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FD5_EMIT_H 28#define FD5_EMIT_H 29 30#include "pipe/p_context.h" 31 32#include "fd5_context.h" 33#include "fd5_format.h" 34#include "fd5_program.h" 35#include "fd5_screen.h" 36#include "freedreno_batch.h" 37#include "freedreno_context.h" 38#include "ir3_gallium.h" 39 40struct fd_ringbuffer; 41 42/* grouped together emit-state for prog/vertex/state emit: */ 43struct fd5_emit { 44 struct pipe_debug_callback *debug; 45 const struct fd_vertex_state *vtx; 46 const struct fd5_program_state *prog; 47 const struct pipe_draw_info *info; 48 unsigned drawid_offset; 49 const struct pipe_draw_indirect_info *indirect; 50 const struct pipe_draw_start_count_bias *draw; 51 bool binning_pass; 52 struct ir3_cache_key key; 53 enum fd_dirty_3d_state dirty; 54 55 uint32_t sprite_coord_enable; /* bitmask */ 56 bool sprite_coord_mode; 57 bool rasterflat; 58 59 /* in binning pass, we don't have real frag shader, so we 60 * don't know if real draw disqualifies lrz write. So just 61 * figure that out up-front and stash it in the emit. 62 */ 63 bool no_lrz_write; 64 65 /* cached to avoid repeated lookups of same variants: */ 66 const struct ir3_shader_variant *vs, *fs; 67 /* TODO: other shader stages.. */ 68 69 unsigned streamout_mask; 70}; 71 72static inline enum a5xx_color_fmt 73fd5_emit_format(struct pipe_surface *surf) 74{ 75 if (!surf) 76 return 0; 77 return fd5_pipe2color(surf->format); 78} 79 80static inline const struct ir3_shader_variant * 81fd5_emit_get_vp(struct fd5_emit *emit) 82{ 83 if (!emit->vs) { 84 /* We use nonbinning VS during binning when TFB is enabled because that 85 * is what has all the outputs that might be involved in TFB. 86 */ 87 if (emit->binning_pass && 88 !emit->prog->vs->shader->stream_output.num_outputs) 89 emit->vs = emit->prog->bs; 90 else 91 emit->vs = emit->prog->vs; 92 } 93 return emit->vs; 94} 95 96static inline const struct ir3_shader_variant * 97fd5_emit_get_fp(struct fd5_emit *emit) 98{ 99 if (!emit->fs) { 100 if (emit->binning_pass) { 101 /* use dummy stateobj to simplify binning vs non-binning: */ 102 static const struct ir3_shader_variant binning_fs = {}; 103 emit->fs = &binning_fs; 104 } else { 105 emit->fs = emit->prog->fs; 106 } 107 } 108 return emit->fs; 109} 110 111static inline void 112fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt 113{ 114 fd_reset_wfi(batch); 115 OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5); 116 OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */ 117 OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */ 118 OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */ 119 OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */ 120 OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */ 121 fd_wfi(batch, ring); 122} 123 124static inline void 125fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring, 126 enum render_mode_cmd mode) 127{ 128 /* TODO add preemption support, gmem bypass, etc */ 129 emit_marker5(ring, 7); 130 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 131 OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode)); 132 OUT_RING(ring, 0x00000000); /* ADDR_LO */ 133 OUT_RING(ring, 0x00000000); /* ADDR_HI */ 134 OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) | 135 COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE)); 136 OUT_RING(ring, 0x00000000); 137 emit_marker5(ring, 7); 138} 139 140static inline void 141fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring, 142 enum vgt_event_type evt, bool timestamp) 143{ 144 OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); 145 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); 146 if (timestamp) { 147 OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0, 148 0); /* ADDR_LO/HI */ 149 OUT_RING(ring, 0x00000000); 150 } 151} 152 153static inline void 154fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring) 155{ 156 emit_marker5(ring, 7); 157 fd5_event_write(batch, ring, BLIT, true); 158 emit_marker5(ring, 7); 159} 160 161static inline void 162fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt 163{ 164 struct fd_ringbuffer *ring = 165 binning ? ctx->batch->binning : ctx->batch->draw; 166 167 /* TODO eventually this partially depends on the pfb state, ie. 168 * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part 169 * we could probably cache and just regenerate if framebuffer 170 * state is dirty (or something like that).. 171 * 172 * Other bits seem to depend on query state, like if samples-passed 173 * query is active. 174 */ 175 bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0); 176 OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); 177 OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ 178 COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) | 179 COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) | 180 COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) | 181 COND(!blit, 0x8)); 182 183 OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1); 184 OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */ 185 COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) | 186 COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED)); 187} 188 189static inline void 190fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) 191{ 192 /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably 193 * a workaround and not needed on all a5xx. 194 */ 195 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); 196 OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE); 197 198 fd5_event_write(batch, ring, LRZ_FLUSH, false); 199 200 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); 201 OUT_RING(ring, 0x0); 202} 203 204void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, 205 struct fd5_emit *emit) assert_dt; 206 207void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 208 struct fd5_emit *emit) assert_dt; 209 210void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 211 struct ir3_shader_variant *cp) assert_dt; 212void fd5_emit_cs_consts(const struct ir3_shader_variant *v, 213 struct fd_ringbuffer *ring, struct fd_context *ctx, 214 const struct pipe_grid_info *info) assert_dt; 215 216void fd5_emit_restore(struct fd_batch *batch, 217 struct fd_ringbuffer *ring) assert_dt; 218 219void fd5_emit_init_screen(struct pipe_screen *pscreen); 220void fd5_emit_init(struct pipe_context *pctx); 221 222static inline void 223fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 224{ 225 /* for debug after a lock up, write a unique counter value 226 * to scratch6 for each IB, to make it easier to match up 227 * register dumps to cmdstream. The combination of IB and 228 * DRAW (scratch7) is enough to "triangulate" the particular 229 * draw that caused lockup. 230 */ 231 emit_marker5(ring, 6); 232 __OUT_IB5(ring, target); 233 emit_marker5(ring, 6); 234} 235 236#endif /* FD5_EMIT_H */ 237