1af69d88dSmrg/*
2af69d88dSmrg * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3af69d88dSmrg *
4af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5af69d88dSmrg * copy of this software and associated documentation files (the "Software"),
6af69d88dSmrg * to deal in the Software without restriction, including without limitation
7af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9af69d88dSmrg * Software is furnished to do so, subject to the following conditions:
10af69d88dSmrg *
11af69d88dSmrg * The above copyright notice and this permission notice (including the next
12af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the
13af69d88dSmrg * Software.
14af69d88dSmrg *
15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20af69d88dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21af69d88dSmrg * SOFTWARE.
22af69d88dSmrg *
23af69d88dSmrg * Authors:
24af69d88dSmrg *    Rob Clark <robclark@freedesktop.org>
25af69d88dSmrg */
26af69d88dSmrg
27af69d88dSmrg#ifndef FREEDRENO_CONTEXT_H_
28af69d88dSmrg#define FREEDRENO_CONTEXT_H_
29af69d88dSmrg
30af69d88dSmrg#include "pipe/p_context.h"
317ec681f3Smrg#include "util/libsync.h"
3201e04c3fSmrg#include "util/list.h"
3301e04c3fSmrg#include "util/slab.h"
347ec681f3Smrg#include "util/u_blitter.h"
35af69d88dSmrg#include "util/u_string.h"
367ec681f3Smrg#include "util/u_threaded_context.h"
377ec681f3Smrg#include "util/perf/u_trace.h"
38af69d88dSmrg
397ec681f3Smrg#include "freedreno_autotune.h"
40af69d88dSmrg#include "freedreno_gmem.h"
417ec681f3Smrg#include "freedreno_perfetto.h"
427ec681f3Smrg#include "freedreno_screen.h"
43af69d88dSmrg#include "freedreno_util.h"
44af69d88dSmrg
457ec681f3Smrg#ifdef __cplusplus
467ec681f3Smrgextern "C" {
477ec681f3Smrg#endif
487ec681f3Smrg
4901e04c3fSmrg#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
5001e04c3fSmrg
51af69d88dSmrgstruct fd_vertex_stateobj;
527ec681f3Smrgstruct fd_batch;
53af69d88dSmrg
54af69d88dSmrgstruct fd_texture_stateobj {
557ec681f3Smrg   struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
567ec681f3Smrg   unsigned num_textures;
577ec681f3Smrg   unsigned valid_textures;
587ec681f3Smrg   struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
597ec681f3Smrg   unsigned num_samplers;
607ec681f3Smrg   unsigned valid_samplers;
61af69d88dSmrg};
62af69d88dSmrg
63af69d88dSmrgstruct fd_program_stateobj {
647ec681f3Smrg   void *vs, *hs, *ds, *gs, *fs;
65af69d88dSmrg};
66af69d88dSmrg
67af69d88dSmrgstruct fd_constbuf_stateobj {
687ec681f3Smrg   struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
697ec681f3Smrg   uint32_t enabled_mask;
7001e04c3fSmrg};
7101e04c3fSmrg
7201e04c3fSmrgstruct fd_shaderbuf_stateobj {
737ec681f3Smrg   struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
747ec681f3Smrg   uint32_t enabled_mask;
757ec681f3Smrg   uint32_t writable_mask;
7601e04c3fSmrg};
7701e04c3fSmrg
7801e04c3fSmrgstruct fd_shaderimg_stateobj {
797ec681f3Smrg   struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
807ec681f3Smrg   uint32_t enabled_mask;
81af69d88dSmrg};
82af69d88dSmrg
83af69d88dSmrgstruct fd_vertexbuf_stateobj {
847ec681f3Smrg   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
857ec681f3Smrg   unsigned count;
867ec681f3Smrg   uint32_t enabled_mask;
87af69d88dSmrg};
88af69d88dSmrg
89af69d88dSmrgstruct fd_vertex_stateobj {
907ec681f3Smrg   struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
917ec681f3Smrg   unsigned num_elements;
927ec681f3Smrg};
937ec681f3Smrg
947ec681f3Smrgstruct fd_stream_output_target {
957ec681f3Smrg   struct pipe_stream_output_target base;
967ec681f3Smrg   struct pipe_resource *offset_buf;
977ec681f3Smrg   /* stride of the last stream out recorded to this target, for
987ec681f3Smrg    * glDrawTransformFeedback(). */
997ec681f3Smrg   uint32_t stride;
100af69d88dSmrg};
101af69d88dSmrg
10201e04c3fSmrgstruct fd_streamout_stateobj {
1037ec681f3Smrg   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1047ec681f3Smrg   /* Bitmask of stream that should be reset. */
1057ec681f3Smrg   unsigned reset;
1067ec681f3Smrg
1077ec681f3Smrg   unsigned num_targets;
1087ec681f3Smrg   /* Track offset from vtxcnt for streamout data.  This counter
1097ec681f3Smrg    * is just incremented by # of vertices on each draw until
1107ec681f3Smrg    * reset or new streamout buffer bound.
1117ec681f3Smrg    *
1127ec681f3Smrg    * When we eventually have GS, the CPU won't actually know the
1137ec681f3Smrg    * number of vertices per draw, so I think we'll have to do
1147ec681f3Smrg    * something more clever.
1157ec681f3Smrg    */
1167ec681f3Smrg   unsigned offsets[PIPE_MAX_SO_BUFFERS];
1177ec681f3Smrg
1187ec681f3Smrg   /* Pre-a6xx, the maximum number of vertices that could be recorded to this
1197ec681f3Smrg    * set of targets with the current vertex shader.  a6xx and newer, hardware
1207ec681f3Smrg    * queries are used.
1217ec681f3Smrg    */
1227ec681f3Smrg   unsigned max_tf_vtx;
1237ec681f3Smrg
1247ec681f3Smrg   /* Pre-a6xx, the number of verts written to the buffers since the last
1257ec681f3Smrg    * Begin.  Used for overflow checking for SW queries.
1267ec681f3Smrg    */
1277ec681f3Smrg   unsigned verts_written;
12801e04c3fSmrg};
12901e04c3fSmrg
13001e04c3fSmrg#define MAX_GLOBAL_BUFFERS 16
13101e04c3fSmrgstruct fd_global_bindings_stateobj {
1327ec681f3Smrg   struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
1337ec681f3Smrg   uint32_t enabled_mask;
13401e04c3fSmrg};
13501e04c3fSmrg
136af69d88dSmrg/* group together the vertex and vertexbuf state.. for ease of passing
137af69d88dSmrg * around, and because various internal operations (gmem<->mem, etc)
138af69d88dSmrg * need their own vertex state:
139af69d88dSmrg */
140af69d88dSmrgstruct fd_vertex_state {
1417ec681f3Smrg   struct fd_vertex_stateobj *vtx;
1427ec681f3Smrg   struct fd_vertexbuf_stateobj vertexbuf;
143af69d88dSmrg};
144af69d88dSmrg
14501e04c3fSmrg/* global 3d pipeline dirty state: */
14601e04c3fSmrgenum fd_dirty_3d_state {
1477ec681f3Smrg   FD_DIRTY_BLEND = BIT(0),
1487ec681f3Smrg   FD_DIRTY_RASTERIZER = BIT(1),
1497ec681f3Smrg   FD_DIRTY_ZSA = BIT(2),
1507ec681f3Smrg   FD_DIRTY_BLEND_COLOR = BIT(3),
1517ec681f3Smrg   FD_DIRTY_STENCIL_REF = BIT(4),
1527ec681f3Smrg   FD_DIRTY_SAMPLE_MASK = BIT(5),
1537ec681f3Smrg   FD_DIRTY_FRAMEBUFFER = BIT(6),
1547ec681f3Smrg   FD_DIRTY_STIPPLE = BIT(7),
1557ec681f3Smrg   FD_DIRTY_VIEWPORT = BIT(8),
1567ec681f3Smrg   FD_DIRTY_VTXSTATE = BIT(9),
1577ec681f3Smrg   FD_DIRTY_VTXBUF = BIT(10),
1587ec681f3Smrg   FD_DIRTY_MIN_SAMPLES = BIT(11),
1597ec681f3Smrg   FD_DIRTY_SCISSOR = BIT(12),
1607ec681f3Smrg   FD_DIRTY_STREAMOUT = BIT(13),
1617ec681f3Smrg   FD_DIRTY_UCP = BIT(14),
1627ec681f3Smrg   FD_DIRTY_PROG = BIT(15),
1637ec681f3Smrg   FD_DIRTY_CONST = BIT(16),
1647ec681f3Smrg   FD_DIRTY_TEX = BIT(17),
1657ec681f3Smrg   FD_DIRTY_IMAGE = BIT(18),
1667ec681f3Smrg   FD_DIRTY_SSBO = BIT(19),
1677ec681f3Smrg
1687ec681f3Smrg   /* only used by a2xx.. possibly can be removed.. */
1697ec681f3Smrg   FD_DIRTY_TEXSTATE = BIT(20),
1707ec681f3Smrg
1717ec681f3Smrg   /* fine grained state changes, for cases where state is not orthogonal
1727ec681f3Smrg    * from hw perspective:
1737ec681f3Smrg    */
1747ec681f3Smrg   FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
1757ec681f3Smrg   FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
1767ec681f3Smrg   FD_DIRTY_BLEND_DUAL = BIT(26),
1777ec681f3Smrg#define NUM_DIRTY_BITS 27
1787ec681f3Smrg
1797ec681f3Smrg   /* additional flag for state requires updated resource tracking: */
1807ec681f3Smrg   FD_DIRTY_RESOURCE = BIT(31),
181af69d88dSmrg};
182af69d88dSmrg
18301e04c3fSmrg/* per shader-stage dirty state: */
18401e04c3fSmrgenum fd_dirty_shader_state {
1857ec681f3Smrg   FD_DIRTY_SHADER_PROG = BIT(0),
1867ec681f3Smrg   FD_DIRTY_SHADER_CONST = BIT(1),
1877ec681f3Smrg   FD_DIRTY_SHADER_TEX = BIT(2),
1887ec681f3Smrg   FD_DIRTY_SHADER_SSBO = BIT(3),
1897ec681f3Smrg   FD_DIRTY_SHADER_IMAGE = BIT(4),
1907ec681f3Smrg#define NUM_DIRTY_SHADER_BITS 5
19101e04c3fSmrg};
192af69d88dSmrg
1937ec681f3Smrg#define MAX_HW_SAMPLE_PROVIDERS 7
1947ec681f3Smrgstruct fd_hw_sample_provider;
1957ec681f3Smrgstruct fd_hw_sample;
1967ec681f3Smrg
1977ec681f3Smrgstruct ir3_shader_key;
1987ec681f3Smrg
199af69d88dSmrgstruct fd_context {
2007ec681f3Smrg   struct pipe_context base;
2017ec681f3Smrg
2027ec681f3Smrg   struct threaded_context *tc;
2037ec681f3Smrg
2047ec681f3Smrg   struct list_head node; /* node in screen->context_list */
2057ec681f3Smrg
2067ec681f3Smrg   /* We currently need to serialize emitting GMEM batches, because of
2077ec681f3Smrg    * VSC state access in the context.
2087ec681f3Smrg    *
2097ec681f3Smrg    * In practice this lock should not be contended, since pipe_context
2107ec681f3Smrg    * use should be single threaded.  But it is needed to protect the
2117ec681f3Smrg    * case, with batch reordering where a ctxB batch triggers flushing
2127ec681f3Smrg    * a ctxA batch
2137ec681f3Smrg    */
2147ec681f3Smrg   simple_mtx_t gmem_lock;
2157ec681f3Smrg
2167ec681f3Smrg   struct fd_device *dev;
2177ec681f3Smrg   struct fd_screen *screen;
2187ec681f3Smrg   struct fd_pipe *pipe;
2197ec681f3Smrg
2207ec681f3Smrg   struct blitter_context *blitter dt;
2217ec681f3Smrg   void *clear_rs_state[2] dt;
2227ec681f3Smrg
2237ec681f3Smrg   /* slab for pipe_transfer allocations: */
2247ec681f3Smrg   struct slab_child_pool transfer_pool dt;
2257ec681f3Smrg   struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
2267ec681f3Smrg
2277ec681f3Smrg   struct fd_autotune autotune dt;
2287ec681f3Smrg
2297ec681f3Smrg   /**
2307ec681f3Smrg    * query related state:
2317ec681f3Smrg    */
2327ec681f3Smrg   /*@{*/
2337ec681f3Smrg   /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
2347ec681f3Smrg   struct slab_mempool sample_pool dt;
2357ec681f3Smrg   struct slab_mempool sample_period_pool dt;
2367ec681f3Smrg
2377ec681f3Smrg   /* sample-providers for hw queries: */
2387ec681f3Smrg   const struct fd_hw_sample_provider
2397ec681f3Smrg      *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
2407ec681f3Smrg
2417ec681f3Smrg   /* list of active queries: */
2427ec681f3Smrg   struct list_head hw_active_queries dt;
2437ec681f3Smrg
2447ec681f3Smrg   /* sample-providers for accumulating hw queries: */
2457ec681f3Smrg   const struct fd_acc_sample_provider
2467ec681f3Smrg      *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
2477ec681f3Smrg
2487ec681f3Smrg   /* list of active accumulating queries: */
2497ec681f3Smrg   struct list_head acc_active_queries dt;
2507ec681f3Smrg   /*@}*/
2517ec681f3Smrg
2527ec681f3Smrg   uint8_t patch_vertices;
2537ec681f3Smrg
2547ec681f3Smrg   /* Whether we need to recheck the active_queries list next
2557ec681f3Smrg    * fd_batch_update_queries().
2567ec681f3Smrg    */
2577ec681f3Smrg   bool update_active_queries dt;
2587ec681f3Smrg
2597ec681f3Smrg   /* Current state of pctx->set_active_query_state() (i.e. "should drawing
2607ec681f3Smrg    * be counted against non-perfcounter queries")
2617ec681f3Smrg    */
2627ec681f3Smrg   bool active_queries dt;
2637ec681f3Smrg
2647ec681f3Smrg   /* shaders used by clear, and gmem->mem blits: */
2657ec681f3Smrg   struct fd_program_stateobj solid_prog; // TODO move to screen?
2667ec681f3Smrg   struct fd_program_stateobj solid_layered_prog;
2677ec681f3Smrg
2687ec681f3Smrg   /* shaders used by mem->gmem blits: */
2697ec681f3Smrg   struct fd_program_stateobj
2707ec681f3Smrg      blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
2717ec681f3Smrg   struct fd_program_stateobj blit_z, blit_zs;
2727ec681f3Smrg
2737ec681f3Smrg   /* Stats/counters:
2747ec681f3Smrg    */
2757ec681f3Smrg   struct {
2767ec681f3Smrg      uint64_t prims_emitted;
2777ec681f3Smrg      uint64_t prims_generated;
2787ec681f3Smrg      uint64_t draw_calls;
2797ec681f3Smrg      uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
2807ec681f3Smrg         batch_restore;
2817ec681f3Smrg      uint64_t staging_uploads, shadow_uploads;
2827ec681f3Smrg      uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
2837ec681f3Smrg   } stats dt;
2847ec681f3Smrg
2857ec681f3Smrg   /* Counter for number of users who need sw counters (so we can
2867ec681f3Smrg    * skip collecting them when not needed)
2877ec681f3Smrg    */
2887ec681f3Smrg   unsigned stats_users;
2897ec681f3Smrg
2907ec681f3Smrg   /* Current batch.. the rule here is that you can deref ctx->batch
2917ec681f3Smrg    * in codepaths from pipe_context entrypoints.  But not in code-
2927ec681f3Smrg    * paths from fd_batch_flush() (basically, the stuff that gets
2937ec681f3Smrg    * called from GMEM code), since in those code-paths the batch
2947ec681f3Smrg    * you care about is not necessarily the same as ctx->batch.
2957ec681f3Smrg    */
2967ec681f3Smrg   struct fd_batch *batch dt;
2977ec681f3Smrg
2987ec681f3Smrg   /* NULL if there has been rendering since last flush.  Otherwise
2997ec681f3Smrg    * keeps a reference to the last fence so we can re-use it rather
3007ec681f3Smrg    * than having to flush no-op batch.
3017ec681f3Smrg    */
3027ec681f3Smrg   struct pipe_fence_handle *last_fence dt;
3037ec681f3Smrg
3047ec681f3Smrg   /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
3057ec681f3Smrg    * if none).  The in-fence is transferred over to the batch on the
3067ec681f3Smrg    * next draw/blit/grid.
3077ec681f3Smrg    *
3087ec681f3Smrg    * The reason for this extra complexity is that apps will typically
3097ec681f3Smrg    * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
3107ec681f3Smrg    * first draw.  But mesa/st doesn't flush down framebuffer state
3117ec681f3Smrg    * change until we hit a draw, so at ->fence_server_sync() time, we
3127ec681f3Smrg    * don't yet have the correct batch.  If we created a batch at that
3137ec681f3Smrg    * point, it would be the wrong one, and we'd have to flush it pre-
3147ec681f3Smrg    * maturely, causing us to stall early in the frame where we could
3157ec681f3Smrg    * be building up cmdstream.
3167ec681f3Smrg    */
3177ec681f3Smrg   int in_fence_fd dt;
3187ec681f3Smrg
3197ec681f3Smrg   /* track last known reset status globally and per-context to
3207ec681f3Smrg    * determine if more resets occurred since then.  If global reset
3217ec681f3Smrg    * count increases, it means some other context crashed.  If
3227ec681f3Smrg    * per-context reset count increases, it means we crashed the
3237ec681f3Smrg    * gpu.
3247ec681f3Smrg    *
3257ec681f3Smrg    * Only accessed by front-end thread, never accessed by TC driver
3267ec681f3Smrg    * thread.
3277ec681f3Smrg    */
3287ec681f3Smrg   uint32_t context_reset_count;
3297ec681f3Smrg   uint32_t global_reset_count;
3307ec681f3Smrg
3317ec681f3Smrg   /* Context sequence #, used for batch-cache key: */
3327ec681f3Smrg   uint16_t seqno;
3337ec681f3Smrg
3347ec681f3Smrg   /* Cost per draw, used in conjunction with samples-passed history to
3357ec681f3Smrg    * estimate whether GMEM or bypass is the better option.
3367ec681f3Smrg    */
3377ec681f3Smrg   uint8_t draw_cost;
3387ec681f3Smrg
3397ec681f3Smrg   /* Are we in process of shadowing a resource? Used to detect recursion
3407ec681f3Smrg    * in transfer_map, and skip unneeded synchronization.
3417ec681f3Smrg    */
3427ec681f3Smrg   bool in_shadow : 1 dt;
3437ec681f3Smrg
3447ec681f3Smrg   /* For catching recursion problems with blit fallback: */
3457ec681f3Smrg   bool in_blit : 1 dt;
3467ec681f3Smrg
3477ec681f3Smrg   /* points to either scissor or disabled_scissor depending on rast state: */
3487ec681f3Smrg   struct pipe_scissor_state *current_scissor dt;
3497ec681f3Smrg
3507ec681f3Smrg   struct pipe_scissor_state scissor dt;
3517ec681f3Smrg
3527ec681f3Smrg   /* we don't have a disable/enable bit for scissor, so instead we keep
3537ec681f3Smrg    * a disabled-scissor state which matches the entire bound framebuffer
3547ec681f3Smrg    * and use that when scissor is not enabled.
3557ec681f3Smrg    */
3567ec681f3Smrg   struct pipe_scissor_state disabled_scissor dt;
3577ec681f3Smrg
3587ec681f3Smrg   /* Per vsc pipe bo's (a2xx-a5xx): */
3597ec681f3Smrg   struct fd_bo *vsc_pipe_bo[32] dt;
3607ec681f3Smrg
3617ec681f3Smrg   /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
3627ec681f3Smrg    * specific bitmask of state "groups".
3637ec681f3Smrg    */
3647ec681f3Smrg   uint32_t gen_dirty_map[NUM_DIRTY_BITS];
3657ec681f3Smrg   uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
3667ec681f3Smrg
3677ec681f3Smrg   /* Bitmask of all possible gen_dirty bits: */
3687ec681f3Smrg   uint32_t gen_all_dirty;
3697ec681f3Smrg
3707ec681f3Smrg   /* Generation specific bitmask of dirty state groups: */
3717ec681f3Smrg   uint32_t gen_dirty;
3727ec681f3Smrg
3737ec681f3Smrg   /* which state objects need to be re-emit'd: */
3747ec681f3Smrg   enum fd_dirty_3d_state dirty dt;
3757ec681f3Smrg
3767ec681f3Smrg   /* per shader-stage dirty status: */
3777ec681f3Smrg   enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
3787ec681f3Smrg
3797ec681f3Smrg   void *compute dt;
3807ec681f3Smrg   struct pipe_blend_state *blend dt;
3817ec681f3Smrg   struct pipe_rasterizer_state *rasterizer dt;
3827ec681f3Smrg   struct pipe_depth_stencil_alpha_state *zsa dt;
3837ec681f3Smrg
3847ec681f3Smrg   struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
3857ec681f3Smrg
3867ec681f3Smrg   struct fd_program_stateobj prog dt;
3877ec681f3Smrg   uint32_t bound_shader_stages dt;
3887ec681f3Smrg
3897ec681f3Smrg   struct fd_vertex_state vtx dt;
3907ec681f3Smrg
3917ec681f3Smrg   struct pipe_blend_color blend_color dt;
3927ec681f3Smrg   struct pipe_stencil_ref stencil_ref dt;
3937ec681f3Smrg   unsigned sample_mask dt;
3947ec681f3Smrg   unsigned min_samples dt;
3957ec681f3Smrg   /* local context fb state, for when ctx->batch is null: */
3967ec681f3Smrg   struct pipe_framebuffer_state framebuffer dt;
3977ec681f3Smrg   struct pipe_poly_stipple stipple dt;
3987ec681f3Smrg   struct pipe_viewport_state viewport dt;
3997ec681f3Smrg   struct pipe_scissor_state viewport_scissor dt;
4007ec681f3Smrg   struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
4017ec681f3Smrg   struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
4027ec681f3Smrg   struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
4037ec681f3Smrg   struct fd_streamout_stateobj streamout dt;
4047ec681f3Smrg   struct fd_global_bindings_stateobj global_bindings dt;
4057ec681f3Smrg   struct pipe_clip_state ucp dt;
4067ec681f3Smrg
4077ec681f3Smrg   struct pipe_query *cond_query dt;
4087ec681f3Smrg   bool cond_cond dt; /* inverted rendering condition */
4097ec681f3Smrg   uint cond_mode dt;
4107ec681f3Smrg
4117ec681f3Smrg   /* Private memory is a memory space where each fiber gets its own piece of
4127ec681f3Smrg    * memory, in addition to registers. It is backed by a buffer which needs
4137ec681f3Smrg    * to be large enough to hold the contents of every possible wavefront in
4147ec681f3Smrg    * every core of the GPU. Because it allocates space via the internal
4157ec681f3Smrg    * wavefront ID which is shared between all currently executing shaders,
4167ec681f3Smrg    * the same buffer can be reused by all shaders, as long as all shaders
4177ec681f3Smrg    * sharing the same buffer use the exact same configuration. There are two
4187ec681f3Smrg    * inputs to the configuration, the amount of per-fiber space and whether
4197ec681f3Smrg    * to use the newer per-wave or older per-fiber layout. We only ever
4207ec681f3Smrg    * increase the size, and shaders with a smaller size requirement simply
4217ec681f3Smrg    * use the larger existing buffer, so that we only need to keep track of
4227ec681f3Smrg    * one buffer and its size, but we still need to keep track of per-fiber
4237ec681f3Smrg    * and per-wave buffers separately so that we never use the same buffer
4247ec681f3Smrg    * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
4257ec681f3Smrg    * per-wave.
4267ec681f3Smrg    */
4277ec681f3Smrg   struct {
4287ec681f3Smrg      struct fd_bo *bo;
4297ec681f3Smrg      uint32_t per_fiber_size;
4307ec681f3Smrg   } pvtmem[2] dt;
4317ec681f3Smrg
4327ec681f3Smrg   /* maps per-shader-stage state plus variant key to hw
4337ec681f3Smrg    * program stateobj:
4347ec681f3Smrg    */
4357ec681f3Smrg   struct ir3_cache *shader_cache;
4367ec681f3Smrg
4377ec681f3Smrg   struct pipe_debug_callback debug;
4387ec681f3Smrg
4397ec681f3Smrg   struct u_trace_context trace_context dt;
4407ec681f3Smrg
4417ec681f3Smrg#ifdef HAVE_PERFETTO
4427ec681f3Smrg   struct fd_perfetto_state perfetto;
4437ec681f3Smrg#endif
4447ec681f3Smrg
4457ec681f3Smrg   /*
4467ec681f3Smrg    * Counter to generate submit-ids
4477ec681f3Smrg    */
4487ec681f3Smrg   uint32_t submit_count;
4497ec681f3Smrg
4507ec681f3Smrg   /* Called on rebind_resource() for any per-gen cleanup required: */
4517ec681f3Smrg   void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
4527ec681f3Smrg
4537ec681f3Smrg   /* GMEM/tile handling fxns: */
4547ec681f3Smrg   void (*emit_tile_init)(struct fd_batch *batch) dt;
4557ec681f3Smrg   void (*emit_tile_prep)(struct fd_batch *batch,
4567ec681f3Smrg                          const struct fd_tile *tile) dt;
4577ec681f3Smrg   void (*emit_tile_mem2gmem)(struct fd_batch *batch,
4587ec681f3Smrg                              const struct fd_tile *tile) dt;
4597ec681f3Smrg   void (*emit_tile_renderprep)(struct fd_batch *batch,
4607ec681f3Smrg                                const struct fd_tile *tile) dt;
4617ec681f3Smrg   void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
4627ec681f3Smrg   void (*emit_tile_gmem2mem)(struct fd_batch *batch,
4637ec681f3Smrg                              const struct fd_tile *tile) dt;
4647ec681f3Smrg   void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
4657ec681f3Smrg
4667ec681f3Smrg   /* optional, for GMEM bypass: */
4677ec681f3Smrg   void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
4687ec681f3Smrg   void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
4697ec681f3Smrg
4707ec681f3Smrg   /* draw: */
4717ec681f3Smrg   bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
4727ec681f3Smrg			unsigned drawid_offset,
4737ec681f3Smrg                    const struct pipe_draw_indirect_info *indirect,
4747ec681f3Smrg			const struct pipe_draw_start_count_bias *draw,
4757ec681f3Smrg                    unsigned index_offset) dt;
4767ec681f3Smrg   bool (*clear)(struct fd_context *ctx, unsigned buffers,
4777ec681f3Smrg                 const union pipe_color_union *color, double depth,
4787ec681f3Smrg                 unsigned stencil) dt;
4797ec681f3Smrg
4807ec681f3Smrg   /* compute: */
4817ec681f3Smrg   void (*launch_grid)(struct fd_context *ctx,
4827ec681f3Smrg                       const struct pipe_grid_info *info) dt;
4837ec681f3Smrg
4847ec681f3Smrg   /* query: */
4857ec681f3Smrg   struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
4867ec681f3Smrg                                    unsigned index);
4877ec681f3Smrg   void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
4887ec681f3Smrg   void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
4897ec681f3Smrg                              struct fd_ringbuffer *ring) dt;
4907ec681f3Smrg   void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
4917ec681f3Smrg
4927ec681f3Smrg   /* blitter: */
4937ec681f3Smrg   bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
4947ec681f3Smrg   void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
4957ec681f3Smrg
4967ec681f3Smrg   /* uncompress resource, if necessary, to use as the specified format: */
4977ec681f3Smrg   void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
4987ec681f3Smrg                           enum pipe_format format) dt;
4997ec681f3Smrg
5007ec681f3Smrg   /* handling for barriers: */
5017ec681f3Smrg   void (*framebuffer_barrier)(struct fd_context *ctx) dt;
5027ec681f3Smrg
5037ec681f3Smrg   /* logger: */
5047ec681f3Smrg   void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
5057ec681f3Smrg                            unsigned offset);
5067ec681f3Smrg   uint64_t (*ts_to_ns)(uint64_t ts);
5077ec681f3Smrg
5087ec681f3Smrg   /*
5097ec681f3Smrg    * Common pre-cooked VBO state (used for a3xx and later):
5107ec681f3Smrg    */
5117ec681f3Smrg
5127ec681f3Smrg   /* for clear/gmem->mem vertices, and mem->gmem */
5137ec681f3Smrg   struct pipe_resource *solid_vbuf;
5147ec681f3Smrg
5157ec681f3Smrg   /* for mem->gmem tex coords: */
5167ec681f3Smrg   struct pipe_resource *blit_texcoord_vbuf;
5177ec681f3Smrg
5187ec681f3Smrg   /* vertex state for solid_vbuf:
5197ec681f3Smrg    *    - solid_vbuf / 12 / R32G32B32_FLOAT
5207ec681f3Smrg    */
5217ec681f3Smrg   struct fd_vertex_state solid_vbuf_state;
5227ec681f3Smrg
5237ec681f3Smrg   /* vertex state for blit_prog:
5247ec681f3Smrg    *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
5257ec681f3Smrg    *    - solid_vbuf / 12 / R32G32B32_FLOAT
5267ec681f3Smrg    */
5277ec681f3Smrg   struct fd_vertex_state blit_vbuf_state;
5287ec681f3Smrg
5297ec681f3Smrg   /*
5307ec681f3Smrg    * Info about state of previous draw, for state that comes from
5317ec681f3Smrg    * pipe_draw_info (ie. not part of a CSO).  This allows us to
5327ec681f3Smrg    * skip some register emit when the state doesn't change from
5337ec681f3Smrg    * draw-to-draw
5347ec681f3Smrg    */
5357ec681f3Smrg   struct {
5367ec681f3Smrg      bool dirty; /* last draw state unknown */
5377ec681f3Smrg      bool primitive_restart;
5387ec681f3Smrg      uint32_t index_start;
5397ec681f3Smrg      uint32_t instance_start;
5407ec681f3Smrg      uint32_t restart_index;
5417ec681f3Smrg      uint32_t streamout_mask;
5427ec681f3Smrg
5437ec681f3Smrg      /* some state changes require a different shader variant.  Keep
5447ec681f3Smrg       * track of this so we know when we need to re-emit shader state
5457ec681f3Smrg       * due to variant change.  See ir3_fixup_shader_state()
5467ec681f3Smrg       *
5477ec681f3Smrg       * (used for a3xx+, NULL otherwise)
5487ec681f3Smrg       */
5497ec681f3Smrg      struct ir3_shader_key *key;
5507ec681f3Smrg
5517ec681f3Smrg   } last dt;
552af69d88dSmrg};
553af69d88dSmrg
55401e04c3fSmrgstatic inline struct fd_context *
555af69d88dSmrgfd_context(struct pipe_context *pctx)
556af69d88dSmrg{
5577ec681f3Smrg   return (struct fd_context *)pctx;
558af69d88dSmrg}
559af69d88dSmrg
5607ec681f3Smrgstatic inline struct fd_stream_output_target *
5617ec681f3Smrgfd_stream_output_target(struct pipe_stream_output_target *target)
56201e04c3fSmrg{
5637ec681f3Smrg   return (struct fd_stream_output_target *)target;
56401e04c3fSmrg}
56501e04c3fSmrg
5667ec681f3Smrg/**
5677ec681f3Smrg * Does the dirty state require resource tracking, ie. in general
5687ec681f3Smrg * does it reference some resource.  There are some special cases:
5697ec681f3Smrg *
5707ec681f3Smrg * - FD_DIRTY_CONST can reference a resource, but cb0 is handled
5717ec681f3Smrg *   specially as if it is not a user-buffer, we expect it to be
5727ec681f3Smrg *   coming from const_uploader, so we can make some assumptions
5737ec681f3Smrg *   that future transfer_map will be UNSYNCRONIZED
5747ec681f3Smrg * - FD_DIRTY_ZSA controls how the framebuffer is accessed
5757ec681f3Smrg * - FD_DIRTY_BLEND needs to update GMEM reason
5767ec681f3Smrg *
5777ec681f3Smrg * TODO if we can make assumptions that framebuffer state is bound
5787ec681f3Smrg * first, before blend/zsa/etc state we can move some of the ZSA/
5797ec681f3Smrg * BLEND state handling from draw time to bind time.  I think this
5807ec681f3Smrg * is true of mesa/st, perhaps we can just document it to be a
5817ec681f3Smrg * frontend requirement?
5827ec681f3Smrg */
5837ec681f3Smrgstatic inline bool
5847ec681f3Smrgfd_context_dirty_resource(enum fd_dirty_3d_state dirty)
58501e04c3fSmrg{
5867ec681f3Smrg   return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND |
5877ec681f3Smrg                   FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF |
5887ec681f3Smrg                   FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
58901e04c3fSmrg}
59001e04c3fSmrg
5917ec681f3Smrg#ifdef __cplusplus
5927ec681f3Smrg#define or_dirty(d, mask)                                                      \
5937ec681f3Smrg   do {                                                                        \
5947ec681f3Smrg      decltype(mask) _d = (d);                                                 \
5957ec681f3Smrg      d = (decltype(mask))(_d | (mask));                                       \
5967ec681f3Smrg   } while (0)
5977ec681f3Smrg#else
5987ec681f3Smrg#define or_dirty(d, mask)                                                      \
5997ec681f3Smrg   do {                                                                        \
6007ec681f3Smrg      d |= (mask);                                                             \
6017ec681f3Smrg   } while (0)
6027ec681f3Smrg#endif
6037ec681f3Smrg
6047ec681f3Smrg/* Mark specified non-shader-stage related state as dirty: */
60501e04c3fSmrgstatic inline void
6067ec681f3Smrgfd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt
60701e04c3fSmrg{
6087ec681f3Smrg   assert(util_is_power_of_two_nonzero(dirty));
6097ec681f3Smrg   STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
6107ec681f3Smrg
6117ec681f3Smrg   ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
6127ec681f3Smrg
6137ec681f3Smrg   if (fd_context_dirty_resource(dirty))
6147ec681f3Smrg      or_dirty(dirty, FD_DIRTY_RESOURCE);
6157ec681f3Smrg
6167ec681f3Smrg   or_dirty(ctx->dirty, dirty);
61701e04c3fSmrg}
61801e04c3fSmrg
61901e04c3fSmrgstatic inline void
6207ec681f3Smrgfd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
6217ec681f3Smrg                        enum fd_dirty_shader_state dirty) assert_dt
62201e04c3fSmrg{
6237ec681f3Smrg   const enum fd_dirty_3d_state map[] = {
6247ec681f3Smrg      FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
6257ec681f3Smrg      FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
6267ec681f3Smrg   };
6277ec681f3Smrg
6287ec681f3Smrg   /* Need to update the table above if these shift: */
6297ec681f3Smrg   STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
6307ec681f3Smrg   STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
6317ec681f3Smrg   STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
6327ec681f3Smrg   STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
6337ec681f3Smrg   STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
6347ec681f3Smrg
6357ec681f3Smrg   assert(util_is_power_of_two_nonzero(dirty));
6367ec681f3Smrg   assert(ffs(dirty) <= ARRAY_SIZE(map));
6377ec681f3Smrg
6387ec681f3Smrg   ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
6397ec681f3Smrg
6407ec681f3Smrg   or_dirty(ctx->dirty_shader[shader], dirty);
6417ec681f3Smrg   fd_context_dirty(ctx, map[ffs(dirty) - 1]);
64201e04c3fSmrg}
64301e04c3fSmrg
6447ec681f3Smrg/* mark all state dirty: */
64501e04c3fSmrgstatic inline void
6467ec681f3Smrgfd_context_all_dirty(struct fd_context *ctx) assert_dt
64701e04c3fSmrg{
6487ec681f3Smrg   ctx->last.dirty = true;
6497ec681f3Smrg   ctx->dirty = (enum fd_dirty_3d_state) ~0;
6507ec681f3Smrg
6517ec681f3Smrg   /* NOTE: don't use ~0 for gen_dirty, because the gen specific
6527ec681f3Smrg    * emit code will loop over all the bits:
6537ec681f3Smrg    */
6547ec681f3Smrg   ctx->gen_dirty = ctx->gen_all_dirty;
6557ec681f3Smrg
6567ec681f3Smrg   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
6577ec681f3Smrg      ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
65801e04c3fSmrg}
65901e04c3fSmrg
6607ec681f3Smrgstatic inline void
6617ec681f3Smrgfd_context_all_clean(struct fd_context *ctx) assert_dt
662af69d88dSmrg{
6637ec681f3Smrg   ctx->last.dirty = false;
6647ec681f3Smrg   ctx->dirty = (enum fd_dirty_3d_state)0;
6657ec681f3Smrg   ctx->gen_dirty = 0;
6667ec681f3Smrg   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
6677ec681f3Smrg      /* don't mark compute state as clean, since it is not emitted
6687ec681f3Smrg       * during normal draw call.  The places that call _all_dirty(),
6697ec681f3Smrg       * it is safe to mark compute state dirty as well, but the
6707ec681f3Smrg       * inverse is not true.
6717ec681f3Smrg       */
6727ec681f3Smrg      if (i == PIPE_SHADER_COMPUTE)
6737ec681f3Smrg         continue;
6747ec681f3Smrg      ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
6757ec681f3Smrg   }
676af69d88dSmrg}
677af69d88dSmrg
6787ec681f3Smrg/**
6797ec681f3Smrg * Add mapping between global dirty bit and generation specific dirty
6807ec681f3Smrg * bit.
6817ec681f3Smrg */
6827ec681f3Smrgstatic inline void
6837ec681f3Smrgfd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty,
6847ec681f3Smrg                   uint32_t gen_dirty)
685af69d88dSmrg{
6867ec681f3Smrg   u_foreach_bit (b, dirty) {
6877ec681f3Smrg      ctx->gen_dirty_map[b] |= gen_dirty;
6887ec681f3Smrg   }
6897ec681f3Smrg   ctx->gen_all_dirty |= gen_dirty;
690af69d88dSmrg}
691af69d88dSmrg
6927ec681f3Smrg/**
6937ec681f3Smrg * Add mapping between shader stage specific dirty bit and generation
6947ec681f3Smrg * specific dirty bit
6957ec681f3Smrg */
6967ec681f3Smrgstatic inline void
6977ec681f3Smrgfd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
6987ec681f3Smrg                          enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
699af69d88dSmrg{
7007ec681f3Smrg   u_foreach_bit (b, dirty) {
7017ec681f3Smrg      ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
7027ec681f3Smrg   }
7037ec681f3Smrg   ctx->gen_all_dirty |= gen_dirty;
704af69d88dSmrg}
705af69d88dSmrg
7067ec681f3Smrgstatic inline struct pipe_scissor_state *
7077ec681f3Smrgfd_context_get_scissor(struct fd_context *ctx) assert_dt
708af69d88dSmrg{
7097ec681f3Smrg   return ctx->current_scissor;
710af69d88dSmrg}
711af69d88dSmrg
7127ec681f3Smrgvoid fd_context_switch_from(struct fd_context *ctx) assert_dt;
7137ec681f3Smrgvoid fd_context_switch_to(struct fd_context *ctx,
7147ec681f3Smrg                          struct fd_batch *batch) assert_dt;
7157ec681f3Smrgstruct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
7167ec681f3Smrgstruct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
7177ec681f3Smrg
71801e04c3fSmrgvoid fd_context_setup_common_vbos(struct fd_context *ctx);
71901e04c3fSmrgvoid fd_context_cleanup_common_vbos(struct fd_context *ctx);
7207ec681f3Smrgvoid fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
7217ec681f3Smrgvoid fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
72201e04c3fSmrg
7237ec681f3Smrgstruct pipe_context *fd_context_init(struct fd_context *ctx,
7247ec681f3Smrg                                     struct pipe_screen *pscreen,
7257ec681f3Smrg                                     void *priv, unsigned flags);
7267ec681f3Smrgstruct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
7277ec681f3Smrg                                        unsigned flags);
728af69d88dSmrg
7297ec681f3Smrgvoid fd_context_destroy(struct pipe_context *pctx) assert_dt;
7307ec681f3Smrg
7317ec681f3Smrg#ifdef __cplusplus
7327ec681f3Smrg}
7337ec681f3Smrg#endif
734af69d88dSmrg
735af69d88dSmrg#endif /* FREEDRENO_CONTEXT_H_ */
736