1/*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#ifndef FD5_EMIT_H
28#define FD5_EMIT_H
29
30#include "pipe/p_context.h"
31
32#include "fd5_context.h"
33#include "fd5_format.h"
34#include "fd5_program.h"
35#include "fd5_screen.h"
36#include "freedreno_batch.h"
37#include "freedreno_context.h"
38#include "ir3_gallium.h"
39
40struct fd_ringbuffer;
41
42/* grouped together emit-state for prog/vertex/state emit: */
43struct fd5_emit {
44   struct pipe_debug_callback *debug;
45   const struct fd_vertex_state *vtx;
46   const struct fd5_program_state *prog;
47   const struct pipe_draw_info *info;
48        unsigned drawid_offset;
49   const struct pipe_draw_indirect_info *indirect;
50	const struct pipe_draw_start_count_bias *draw;
51   bool binning_pass;
52   struct ir3_cache_key key;
53   enum fd_dirty_3d_state dirty;
54
55   uint32_t sprite_coord_enable; /* bitmask */
56   bool sprite_coord_mode;
57   bool rasterflat;
58
59   /* in binning pass, we don't have real frag shader, so we
60    * don't know if real draw disqualifies lrz write.  So just
61    * figure that out up-front and stash it in the emit.
62    */
63   bool no_lrz_write;
64
65   /* cached to avoid repeated lookups of same variants: */
66   const struct ir3_shader_variant *vs, *fs;
67   /* TODO: other shader stages.. */
68
69   unsigned streamout_mask;
70};
71
72static inline enum a5xx_color_fmt
73fd5_emit_format(struct pipe_surface *surf)
74{
75   if (!surf)
76      return 0;
77   return fd5_pipe2color(surf->format);
78}
79
80static inline const struct ir3_shader_variant *
81fd5_emit_get_vp(struct fd5_emit *emit)
82{
83   if (!emit->vs) {
84      /* We use nonbinning VS during binning when TFB is enabled because that
85       * is what has all the outputs that might be involved in TFB.
86       */
87      if (emit->binning_pass &&
88          !emit->prog->vs->shader->stream_output.num_outputs)
89         emit->vs = emit->prog->bs;
90      else
91         emit->vs = emit->prog->vs;
92   }
93   return emit->vs;
94}
95
96static inline const struct ir3_shader_variant *
97fd5_emit_get_fp(struct fd5_emit *emit)
98{
99   if (!emit->fs) {
100      if (emit->binning_pass) {
101         /* use dummy stateobj to simplify binning vs non-binning: */
102         static const struct ir3_shader_variant binning_fs = {};
103         emit->fs = &binning_fs;
104      } else {
105         emit->fs = emit->prog->fs;
106      }
107   }
108   return emit->fs;
109}
110
111static inline void
112fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
113{
114   fd_reset_wfi(batch);
115   OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
116   OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
117   OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
118   OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
119   OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
120   OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
121   fd_wfi(batch, ring);
122}
123
124static inline void
125fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
126                    enum render_mode_cmd mode)
127{
128   /* TODO add preemption support, gmem bypass, etc */
129   emit_marker5(ring, 7);
130   OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
131   OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
132   OUT_RING(ring, 0x00000000); /* ADDR_LO */
133   OUT_RING(ring, 0x00000000); /* ADDR_HI */
134   OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
135                     COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
136   OUT_RING(ring, 0x00000000);
137   emit_marker5(ring, 7);
138}
139
140static inline void
141fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
142                enum vgt_event_type evt, bool timestamp)
143{
144   OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
145   OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
146   if (timestamp) {
147      OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
148                0); /* ADDR_LO/HI */
149      OUT_RING(ring, 0x00000000);
150   }
151}
152
153static inline void
154fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
155{
156   emit_marker5(ring, 7);
157   fd5_event_write(batch, ring, BLIT, true);
158   emit_marker5(ring, 7);
159}
160
161static inline void
162fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
163{
164   struct fd_ringbuffer *ring =
165      binning ? ctx->batch->binning : ctx->batch->draw;
166
167   /* TODO eventually this partially depends on the pfb state, ie.
168    * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
169    * we could probably cache and just regenerate if framebuffer
170    * state is dirty (or something like that)..
171    *
172    * Other bits seem to depend on query state, like if samples-passed
173    * query is active.
174    */
175   bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
176   OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
177   OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
178                     COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
179                     COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
180                     COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
181                     COND(!blit, 0x8));
182
183   OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
184   OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
185                     COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
186                     COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
187}
188
189static inline void
190fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
191{
192   /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
193    * a workaround and not needed on all a5xx.
194    */
195   OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
196   OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
197
198   fd5_event_write(batch, ring, LRZ_FLUSH, false);
199
200   OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
201   OUT_RING(ring, 0x0);
202}
203
204void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
205                          struct fd5_emit *emit) assert_dt;
206
207void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
208                    struct fd5_emit *emit) assert_dt;
209
210void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
211                       struct ir3_shader_variant *cp) assert_dt;
212void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
213                        struct fd_ringbuffer *ring, struct fd_context *ctx,
214                        const struct pipe_grid_info *info) assert_dt;
215
216void fd5_emit_restore(struct fd_batch *batch,
217                      struct fd_ringbuffer *ring) assert_dt;
218
219void fd5_emit_init_screen(struct pipe_screen *pscreen);
220void fd5_emit_init(struct pipe_context *pctx);
221
222static inline void
223fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
224{
225   /* for debug after a lock up, write a unique counter value
226    * to scratch6 for each IB, to make it easier to match up
227    * register dumps to cmdstream.  The combination of IB and
228    * DRAW (scratch7) is enough to "triangulate" the particular
229    * draw that caused lockup.
230    */
231   emit_marker5(ring, 6);
232   __OUT_IB5(ring, target);
233   emit_marker5(ring, 6);
234}
235
236#endif /* FD5_EMIT_H */
237