1af69d88dSmrg/* 2af69d88dSmrg * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3af69d88dSmrg * 4af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5af69d88dSmrg * copy of this software and associated documentation files (the "Software"), 6af69d88dSmrg * to deal in the Software without restriction, including without limitation 7af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9af69d88dSmrg * Software is furnished to do so, subject to the following conditions: 10af69d88dSmrg * 11af69d88dSmrg * The above copyright notice and this permission notice (including the next 12af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the 13af69d88dSmrg * Software. 14af69d88dSmrg * 15af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20af69d88dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21af69d88dSmrg * SOFTWARE. 22af69d88dSmrg * 23af69d88dSmrg * Authors: 24af69d88dSmrg * Rob Clark <robclark@freedesktop.org> 25af69d88dSmrg */ 26af69d88dSmrg 27af69d88dSmrg#ifndef FREEDRENO_CONTEXT_H_ 28af69d88dSmrg#define FREEDRENO_CONTEXT_H_ 29af69d88dSmrg 30af69d88dSmrg#include "pipe/p_context.h" 317ec681f3Smrg#include "util/libsync.h" 3201e04c3fSmrg#include "util/list.h" 3301e04c3fSmrg#include "util/slab.h" 347ec681f3Smrg#include "util/u_blitter.h" 35af69d88dSmrg#include "util/u_string.h" 367ec681f3Smrg#include "util/u_threaded_context.h" 377ec681f3Smrg#include "util/perf/u_trace.h" 38af69d88dSmrg 397ec681f3Smrg#include "freedreno_autotune.h" 40af69d88dSmrg#include "freedreno_gmem.h" 417ec681f3Smrg#include "freedreno_perfetto.h" 427ec681f3Smrg#include "freedreno_screen.h" 43af69d88dSmrg#include "freedreno_util.h" 44af69d88dSmrg 457ec681f3Smrg#ifdef __cplusplus 467ec681f3Smrgextern "C" { 477ec681f3Smrg#endif 487ec681f3Smrg 4901e04c3fSmrg#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE) 5001e04c3fSmrg 51af69d88dSmrgstruct fd_vertex_stateobj; 527ec681f3Smrgstruct fd_batch; 53af69d88dSmrg 54af69d88dSmrgstruct fd_texture_stateobj { 557ec681f3Smrg struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; 567ec681f3Smrg unsigned num_textures; 577ec681f3Smrg unsigned valid_textures; 587ec681f3Smrg struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; 597ec681f3Smrg unsigned num_samplers; 607ec681f3Smrg unsigned valid_samplers; 61af69d88dSmrg}; 62af69d88dSmrg 63af69d88dSmrgstruct fd_program_stateobj { 647ec681f3Smrg void *vs, *hs, *ds, *gs, *fs; 65af69d88dSmrg}; 66af69d88dSmrg 67af69d88dSmrgstruct fd_constbuf_stateobj { 687ec681f3Smrg struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; 697ec681f3Smrg uint32_t enabled_mask; 7001e04c3fSmrg}; 7101e04c3fSmrg 7201e04c3fSmrgstruct fd_shaderbuf_stateobj { 737ec681f3Smrg struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS]; 747ec681f3Smrg uint32_t enabled_mask; 757ec681f3Smrg uint32_t writable_mask; 7601e04c3fSmrg}; 7701e04c3fSmrg 7801e04c3fSmrgstruct fd_shaderimg_stateobj { 797ec681f3Smrg struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES]; 807ec681f3Smrg uint32_t enabled_mask; 81af69d88dSmrg}; 82af69d88dSmrg 83af69d88dSmrgstruct fd_vertexbuf_stateobj { 847ec681f3Smrg struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; 857ec681f3Smrg unsigned count; 867ec681f3Smrg uint32_t enabled_mask; 87af69d88dSmrg}; 88af69d88dSmrg 89af69d88dSmrgstruct fd_vertex_stateobj { 907ec681f3Smrg struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; 917ec681f3Smrg unsigned num_elements; 927ec681f3Smrg}; 937ec681f3Smrg 947ec681f3Smrgstruct fd_stream_output_target { 957ec681f3Smrg struct pipe_stream_output_target base; 967ec681f3Smrg struct pipe_resource *offset_buf; 977ec681f3Smrg /* stride of the last stream out recorded to this target, for 987ec681f3Smrg * glDrawTransformFeedback(). */ 997ec681f3Smrg uint32_t stride; 100af69d88dSmrg}; 101af69d88dSmrg 10201e04c3fSmrgstruct fd_streamout_stateobj { 1037ec681f3Smrg struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; 1047ec681f3Smrg /* Bitmask of stream that should be reset. */ 1057ec681f3Smrg unsigned reset; 1067ec681f3Smrg 1077ec681f3Smrg unsigned num_targets; 1087ec681f3Smrg /* Track offset from vtxcnt for streamout data. This counter 1097ec681f3Smrg * is just incremented by # of vertices on each draw until 1107ec681f3Smrg * reset or new streamout buffer bound. 1117ec681f3Smrg * 1127ec681f3Smrg * When we eventually have GS, the CPU won't actually know the 1137ec681f3Smrg * number of vertices per draw, so I think we'll have to do 1147ec681f3Smrg * something more clever. 1157ec681f3Smrg */ 1167ec681f3Smrg unsigned offsets[PIPE_MAX_SO_BUFFERS]; 1177ec681f3Smrg 1187ec681f3Smrg /* Pre-a6xx, the maximum number of vertices that could be recorded to this 1197ec681f3Smrg * set of targets with the current vertex shader. a6xx and newer, hardware 1207ec681f3Smrg * queries are used. 1217ec681f3Smrg */ 1227ec681f3Smrg unsigned max_tf_vtx; 1237ec681f3Smrg 1247ec681f3Smrg /* Pre-a6xx, the number of verts written to the buffers since the last 1257ec681f3Smrg * Begin. Used for overflow checking for SW queries. 1267ec681f3Smrg */ 1277ec681f3Smrg unsigned verts_written; 12801e04c3fSmrg}; 12901e04c3fSmrg 13001e04c3fSmrg#define MAX_GLOBAL_BUFFERS 16 13101e04c3fSmrgstruct fd_global_bindings_stateobj { 1327ec681f3Smrg struct pipe_resource *buf[MAX_GLOBAL_BUFFERS]; 1337ec681f3Smrg uint32_t enabled_mask; 13401e04c3fSmrg}; 13501e04c3fSmrg 136af69d88dSmrg/* group together the vertex and vertexbuf state.. for ease of passing 137af69d88dSmrg * around, and because various internal operations (gmem<->mem, etc) 138af69d88dSmrg * need their own vertex state: 139af69d88dSmrg */ 140af69d88dSmrgstruct fd_vertex_state { 1417ec681f3Smrg struct fd_vertex_stateobj *vtx; 1427ec681f3Smrg struct fd_vertexbuf_stateobj vertexbuf; 143af69d88dSmrg}; 144af69d88dSmrg 14501e04c3fSmrg/* global 3d pipeline dirty state: */ 14601e04c3fSmrgenum fd_dirty_3d_state { 1477ec681f3Smrg FD_DIRTY_BLEND = BIT(0), 1487ec681f3Smrg FD_DIRTY_RASTERIZER = BIT(1), 1497ec681f3Smrg FD_DIRTY_ZSA = BIT(2), 1507ec681f3Smrg FD_DIRTY_BLEND_COLOR = BIT(3), 1517ec681f3Smrg FD_DIRTY_STENCIL_REF = BIT(4), 1527ec681f3Smrg FD_DIRTY_SAMPLE_MASK = BIT(5), 1537ec681f3Smrg FD_DIRTY_FRAMEBUFFER = BIT(6), 1547ec681f3Smrg FD_DIRTY_STIPPLE = BIT(7), 1557ec681f3Smrg FD_DIRTY_VIEWPORT = BIT(8), 1567ec681f3Smrg FD_DIRTY_VTXSTATE = BIT(9), 1577ec681f3Smrg FD_DIRTY_VTXBUF = BIT(10), 1587ec681f3Smrg FD_DIRTY_MIN_SAMPLES = BIT(11), 1597ec681f3Smrg FD_DIRTY_SCISSOR = BIT(12), 1607ec681f3Smrg FD_DIRTY_STREAMOUT = BIT(13), 1617ec681f3Smrg FD_DIRTY_UCP = BIT(14), 1627ec681f3Smrg FD_DIRTY_PROG = BIT(15), 1637ec681f3Smrg FD_DIRTY_CONST = BIT(16), 1647ec681f3Smrg FD_DIRTY_TEX = BIT(17), 1657ec681f3Smrg FD_DIRTY_IMAGE = BIT(18), 1667ec681f3Smrg FD_DIRTY_SSBO = BIT(19), 1677ec681f3Smrg 1687ec681f3Smrg /* only used by a2xx.. possibly can be removed.. */ 1697ec681f3Smrg FD_DIRTY_TEXSTATE = BIT(20), 1707ec681f3Smrg 1717ec681f3Smrg /* fine grained state changes, for cases where state is not orthogonal 1727ec681f3Smrg * from hw perspective: 1737ec681f3Smrg */ 1747ec681f3Smrg FD_DIRTY_RASTERIZER_DISCARD = BIT(24), 1757ec681f3Smrg FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25), 1767ec681f3Smrg FD_DIRTY_BLEND_DUAL = BIT(26), 1777ec681f3Smrg#define NUM_DIRTY_BITS 27 1787ec681f3Smrg 1797ec681f3Smrg /* additional flag for state requires updated resource tracking: */ 1807ec681f3Smrg FD_DIRTY_RESOURCE = BIT(31), 181af69d88dSmrg}; 182af69d88dSmrg 18301e04c3fSmrg/* per shader-stage dirty state: */ 18401e04c3fSmrgenum fd_dirty_shader_state { 1857ec681f3Smrg FD_DIRTY_SHADER_PROG = BIT(0), 1867ec681f3Smrg FD_DIRTY_SHADER_CONST = BIT(1), 1877ec681f3Smrg FD_DIRTY_SHADER_TEX = BIT(2), 1887ec681f3Smrg FD_DIRTY_SHADER_SSBO = BIT(3), 1897ec681f3Smrg FD_DIRTY_SHADER_IMAGE = BIT(4), 1907ec681f3Smrg#define NUM_DIRTY_SHADER_BITS 5 19101e04c3fSmrg}; 192af69d88dSmrg 1937ec681f3Smrg#define MAX_HW_SAMPLE_PROVIDERS 7 1947ec681f3Smrgstruct fd_hw_sample_provider; 1957ec681f3Smrgstruct fd_hw_sample; 1967ec681f3Smrg 1977ec681f3Smrgstruct ir3_shader_key; 1987ec681f3Smrg 199af69d88dSmrgstruct fd_context { 2007ec681f3Smrg struct pipe_context base; 2017ec681f3Smrg 2027ec681f3Smrg struct threaded_context *tc; 2037ec681f3Smrg 2047ec681f3Smrg struct list_head node; /* node in screen->context_list */ 2057ec681f3Smrg 2067ec681f3Smrg /* We currently need to serialize emitting GMEM batches, because of 2077ec681f3Smrg * VSC state access in the context. 2087ec681f3Smrg * 2097ec681f3Smrg * In practice this lock should not be contended, since pipe_context 2107ec681f3Smrg * use should be single threaded. But it is needed to protect the 2117ec681f3Smrg * case, with batch reordering where a ctxB batch triggers flushing 2127ec681f3Smrg * a ctxA batch 2137ec681f3Smrg */ 2147ec681f3Smrg simple_mtx_t gmem_lock; 2157ec681f3Smrg 2167ec681f3Smrg struct fd_device *dev; 2177ec681f3Smrg struct fd_screen *screen; 2187ec681f3Smrg struct fd_pipe *pipe; 2197ec681f3Smrg 2207ec681f3Smrg struct blitter_context *blitter dt; 2217ec681f3Smrg void *clear_rs_state[2] dt; 2227ec681f3Smrg 2237ec681f3Smrg /* slab for pipe_transfer allocations: */ 2247ec681f3Smrg struct slab_child_pool transfer_pool dt; 2257ec681f3Smrg struct slab_child_pool transfer_pool_unsync; /* for threaded_context */ 2267ec681f3Smrg 2277ec681f3Smrg struct fd_autotune autotune dt; 2287ec681f3Smrg 2297ec681f3Smrg /** 2307ec681f3Smrg * query related state: 2317ec681f3Smrg */ 2327ec681f3Smrg /*@{*/ 2337ec681f3Smrg /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ 2347ec681f3Smrg struct slab_mempool sample_pool dt; 2357ec681f3Smrg struct slab_mempool sample_period_pool dt; 2367ec681f3Smrg 2377ec681f3Smrg /* sample-providers for hw queries: */ 2387ec681f3Smrg const struct fd_hw_sample_provider 2397ec681f3Smrg *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; 2407ec681f3Smrg 2417ec681f3Smrg /* list of active queries: */ 2427ec681f3Smrg struct list_head hw_active_queries dt; 2437ec681f3Smrg 2447ec681f3Smrg /* sample-providers for accumulating hw queries: */ 2457ec681f3Smrg const struct fd_acc_sample_provider 2467ec681f3Smrg *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS]; 2477ec681f3Smrg 2487ec681f3Smrg /* list of active accumulating queries: */ 2497ec681f3Smrg struct list_head acc_active_queries dt; 2507ec681f3Smrg /*@}*/ 2517ec681f3Smrg 2527ec681f3Smrg uint8_t patch_vertices; 2537ec681f3Smrg 2547ec681f3Smrg /* Whether we need to recheck the active_queries list next 2557ec681f3Smrg * fd_batch_update_queries(). 2567ec681f3Smrg */ 2577ec681f3Smrg bool update_active_queries dt; 2587ec681f3Smrg 2597ec681f3Smrg /* Current state of pctx->set_active_query_state() (i.e. "should drawing 2607ec681f3Smrg * be counted against non-perfcounter queries") 2617ec681f3Smrg */ 2627ec681f3Smrg bool active_queries dt; 2637ec681f3Smrg 2647ec681f3Smrg /* shaders used by clear, and gmem->mem blits: */ 2657ec681f3Smrg struct fd_program_stateobj solid_prog; // TODO move to screen? 2667ec681f3Smrg struct fd_program_stateobj solid_layered_prog; 2677ec681f3Smrg 2687ec681f3Smrg /* shaders used by mem->gmem blits: */ 2697ec681f3Smrg struct fd_program_stateobj 2707ec681f3Smrg blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen? 2717ec681f3Smrg struct fd_program_stateobj blit_z, blit_zs; 2727ec681f3Smrg 2737ec681f3Smrg /* Stats/counters: 2747ec681f3Smrg */ 2757ec681f3Smrg struct { 2767ec681f3Smrg uint64_t prims_emitted; 2777ec681f3Smrg uint64_t prims_generated; 2787ec681f3Smrg uint64_t draw_calls; 2797ec681f3Smrg uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, 2807ec681f3Smrg batch_restore; 2817ec681f3Smrg uint64_t staging_uploads, shadow_uploads; 2827ec681f3Smrg uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs; 2837ec681f3Smrg } stats dt; 2847ec681f3Smrg 2857ec681f3Smrg /* Counter for number of users who need sw counters (so we can 2867ec681f3Smrg * skip collecting them when not needed) 2877ec681f3Smrg */ 2887ec681f3Smrg unsigned stats_users; 2897ec681f3Smrg 2907ec681f3Smrg /* Current batch.. the rule here is that you can deref ctx->batch 2917ec681f3Smrg * in codepaths from pipe_context entrypoints. But not in code- 2927ec681f3Smrg * paths from fd_batch_flush() (basically, the stuff that gets 2937ec681f3Smrg * called from GMEM code), since in those code-paths the batch 2947ec681f3Smrg * you care about is not necessarily the same as ctx->batch. 2957ec681f3Smrg */ 2967ec681f3Smrg struct fd_batch *batch dt; 2977ec681f3Smrg 2987ec681f3Smrg /* NULL if there has been rendering since last flush. Otherwise 2997ec681f3Smrg * keeps a reference to the last fence so we can re-use it rather 3007ec681f3Smrg * than having to flush no-op batch. 3017ec681f3Smrg */ 3027ec681f3Smrg struct pipe_fence_handle *last_fence dt; 3037ec681f3Smrg 3047ec681f3Smrg /* Fence fd we are told to wait on via ->fence_server_sync() (or -1 3057ec681f3Smrg * if none). The in-fence is transferred over to the batch on the 3067ec681f3Smrg * next draw/blit/grid. 3077ec681f3Smrg * 3087ec681f3Smrg * The reason for this extra complexity is that apps will typically 3097ec681f3Smrg * do eglWaitSyncKHR()/etc at the beginning of the frame, before the 3107ec681f3Smrg * first draw. But mesa/st doesn't flush down framebuffer state 3117ec681f3Smrg * change until we hit a draw, so at ->fence_server_sync() time, we 3127ec681f3Smrg * don't yet have the correct batch. If we created a batch at that 3137ec681f3Smrg * point, it would be the wrong one, and we'd have to flush it pre- 3147ec681f3Smrg * maturely, causing us to stall early in the frame where we could 3157ec681f3Smrg * be building up cmdstream. 3167ec681f3Smrg */ 3177ec681f3Smrg int in_fence_fd dt; 3187ec681f3Smrg 3197ec681f3Smrg /* track last known reset status globally and per-context to 3207ec681f3Smrg * determine if more resets occurred since then. If global reset 3217ec681f3Smrg * count increases, it means some other context crashed. If 3227ec681f3Smrg * per-context reset count increases, it means we crashed the 3237ec681f3Smrg * gpu. 3247ec681f3Smrg * 3257ec681f3Smrg * Only accessed by front-end thread, never accessed by TC driver 3267ec681f3Smrg * thread. 3277ec681f3Smrg */ 3287ec681f3Smrg uint32_t context_reset_count; 3297ec681f3Smrg uint32_t global_reset_count; 3307ec681f3Smrg 3317ec681f3Smrg /* Context sequence #, used for batch-cache key: */ 3327ec681f3Smrg uint16_t seqno; 3337ec681f3Smrg 3347ec681f3Smrg /* Cost per draw, used in conjunction with samples-passed history to 3357ec681f3Smrg * estimate whether GMEM or bypass is the better option. 3367ec681f3Smrg */ 3377ec681f3Smrg uint8_t draw_cost; 3387ec681f3Smrg 3397ec681f3Smrg /* Are we in process of shadowing a resource? Used to detect recursion 3407ec681f3Smrg * in transfer_map, and skip unneeded synchronization. 3417ec681f3Smrg */ 3427ec681f3Smrg bool in_shadow : 1 dt; 3437ec681f3Smrg 3447ec681f3Smrg /* For catching recursion problems with blit fallback: */ 3457ec681f3Smrg bool in_blit : 1 dt; 3467ec681f3Smrg 3477ec681f3Smrg /* points to either scissor or disabled_scissor depending on rast state: */ 3487ec681f3Smrg struct pipe_scissor_state *current_scissor dt; 3497ec681f3Smrg 3507ec681f3Smrg struct pipe_scissor_state scissor dt; 3517ec681f3Smrg 3527ec681f3Smrg /* we don't have a disable/enable bit for scissor, so instead we keep 3537ec681f3Smrg * a disabled-scissor state which matches the entire bound framebuffer 3547ec681f3Smrg * and use that when scissor is not enabled. 3557ec681f3Smrg */ 3567ec681f3Smrg struct pipe_scissor_state disabled_scissor dt; 3577ec681f3Smrg 3587ec681f3Smrg /* Per vsc pipe bo's (a2xx-a5xx): */ 3597ec681f3Smrg struct fd_bo *vsc_pipe_bo[32] dt; 3607ec681f3Smrg 3617ec681f3Smrg /* Maps generic gallium oriented fd_dirty_3d_state bits to generation 3627ec681f3Smrg * specific bitmask of state "groups". 3637ec681f3Smrg */ 3647ec681f3Smrg uint32_t gen_dirty_map[NUM_DIRTY_BITS]; 3657ec681f3Smrg uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS]; 3667ec681f3Smrg 3677ec681f3Smrg /* Bitmask of all possible gen_dirty bits: */ 3687ec681f3Smrg uint32_t gen_all_dirty; 3697ec681f3Smrg 3707ec681f3Smrg /* Generation specific bitmask of dirty state groups: */ 3717ec681f3Smrg uint32_t gen_dirty; 3727ec681f3Smrg 3737ec681f3Smrg /* which state objects need to be re-emit'd: */ 3747ec681f3Smrg enum fd_dirty_3d_state dirty dt; 3757ec681f3Smrg 3767ec681f3Smrg /* per shader-stage dirty status: */ 3777ec681f3Smrg enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt; 3787ec681f3Smrg 3797ec681f3Smrg void *compute dt; 3807ec681f3Smrg struct pipe_blend_state *blend dt; 3817ec681f3Smrg struct pipe_rasterizer_state *rasterizer dt; 3827ec681f3Smrg struct pipe_depth_stencil_alpha_state *zsa dt; 3837ec681f3Smrg 3847ec681f3Smrg struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt; 3857ec681f3Smrg 3867ec681f3Smrg struct fd_program_stateobj prog dt; 3877ec681f3Smrg uint32_t bound_shader_stages dt; 3887ec681f3Smrg 3897ec681f3Smrg struct fd_vertex_state vtx dt; 3907ec681f3Smrg 3917ec681f3Smrg struct pipe_blend_color blend_color dt; 3927ec681f3Smrg struct pipe_stencil_ref stencil_ref dt; 3937ec681f3Smrg unsigned sample_mask dt; 3947ec681f3Smrg unsigned min_samples dt; 3957ec681f3Smrg /* local context fb state, for when ctx->batch is null: */ 3967ec681f3Smrg struct pipe_framebuffer_state framebuffer dt; 3977ec681f3Smrg struct pipe_poly_stipple stipple dt; 3987ec681f3Smrg struct pipe_viewport_state viewport dt; 3997ec681f3Smrg struct pipe_scissor_state viewport_scissor dt; 4007ec681f3Smrg struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt; 4017ec681f3Smrg struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt; 4027ec681f3Smrg struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt; 4037ec681f3Smrg struct fd_streamout_stateobj streamout dt; 4047ec681f3Smrg struct fd_global_bindings_stateobj global_bindings dt; 4057ec681f3Smrg struct pipe_clip_state ucp dt; 4067ec681f3Smrg 4077ec681f3Smrg struct pipe_query *cond_query dt; 4087ec681f3Smrg bool cond_cond dt; /* inverted rendering condition */ 4097ec681f3Smrg uint cond_mode dt; 4107ec681f3Smrg 4117ec681f3Smrg /* Private memory is a memory space where each fiber gets its own piece of 4127ec681f3Smrg * memory, in addition to registers. It is backed by a buffer which needs 4137ec681f3Smrg * to be large enough to hold the contents of every possible wavefront in 4147ec681f3Smrg * every core of the GPU. Because it allocates space via the internal 4157ec681f3Smrg * wavefront ID which is shared between all currently executing shaders, 4167ec681f3Smrg * the same buffer can be reused by all shaders, as long as all shaders 4177ec681f3Smrg * sharing the same buffer use the exact same configuration. There are two 4187ec681f3Smrg * inputs to the configuration, the amount of per-fiber space and whether 4197ec681f3Smrg * to use the newer per-wave or older per-fiber layout. We only ever 4207ec681f3Smrg * increase the size, and shaders with a smaller size requirement simply 4217ec681f3Smrg * use the larger existing buffer, so that we only need to keep track of 4227ec681f3Smrg * one buffer and its size, but we still need to keep track of per-fiber 4237ec681f3Smrg * and per-wave buffers separately so that we never use the same buffer 4247ec681f3Smrg * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for 4257ec681f3Smrg * per-wave. 4267ec681f3Smrg */ 4277ec681f3Smrg struct { 4287ec681f3Smrg struct fd_bo *bo; 4297ec681f3Smrg uint32_t per_fiber_size; 4307ec681f3Smrg } pvtmem[2] dt; 4317ec681f3Smrg 4327ec681f3Smrg /* maps per-shader-stage state plus variant key to hw 4337ec681f3Smrg * program stateobj: 4347ec681f3Smrg */ 4357ec681f3Smrg struct ir3_cache *shader_cache; 4367ec681f3Smrg 4377ec681f3Smrg struct pipe_debug_callback debug; 4387ec681f3Smrg 4397ec681f3Smrg struct u_trace_context trace_context dt; 4407ec681f3Smrg 4417ec681f3Smrg#ifdef HAVE_PERFETTO 4427ec681f3Smrg struct fd_perfetto_state perfetto; 4437ec681f3Smrg#endif 4447ec681f3Smrg 4457ec681f3Smrg /* 4467ec681f3Smrg * Counter to generate submit-ids 4477ec681f3Smrg */ 4487ec681f3Smrg uint32_t submit_count; 4497ec681f3Smrg 4507ec681f3Smrg /* Called on rebind_resource() for any per-gen cleanup required: */ 4517ec681f3Smrg void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt; 4527ec681f3Smrg 4537ec681f3Smrg /* GMEM/tile handling fxns: */ 4547ec681f3Smrg void (*emit_tile_init)(struct fd_batch *batch) dt; 4557ec681f3Smrg void (*emit_tile_prep)(struct fd_batch *batch, 4567ec681f3Smrg const struct fd_tile *tile) dt; 4577ec681f3Smrg void (*emit_tile_mem2gmem)(struct fd_batch *batch, 4587ec681f3Smrg const struct fd_tile *tile) dt; 4597ec681f3Smrg void (*emit_tile_renderprep)(struct fd_batch *batch, 4607ec681f3Smrg const struct fd_tile *tile) dt; 4617ec681f3Smrg void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt; 4627ec681f3Smrg void (*emit_tile_gmem2mem)(struct fd_batch *batch, 4637ec681f3Smrg const struct fd_tile *tile) dt; 4647ec681f3Smrg void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */ 4657ec681f3Smrg 4667ec681f3Smrg /* optional, for GMEM bypass: */ 4677ec681f3Smrg void (*emit_sysmem_prep)(struct fd_batch *batch) dt; 4687ec681f3Smrg void (*emit_sysmem_fini)(struct fd_batch *batch) dt; 4697ec681f3Smrg 4707ec681f3Smrg /* draw: */ 4717ec681f3Smrg bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info, 4727ec681f3Smrg unsigned drawid_offset, 4737ec681f3Smrg const struct pipe_draw_indirect_info *indirect, 4747ec681f3Smrg const struct pipe_draw_start_count_bias *draw, 4757ec681f3Smrg unsigned index_offset) dt; 4767ec681f3Smrg bool (*clear)(struct fd_context *ctx, unsigned buffers, 4777ec681f3Smrg const union pipe_color_union *color, double depth, 4787ec681f3Smrg unsigned stencil) dt; 4797ec681f3Smrg 4807ec681f3Smrg /* compute: */ 4817ec681f3Smrg void (*launch_grid)(struct fd_context *ctx, 4827ec681f3Smrg const struct pipe_grid_info *info) dt; 4837ec681f3Smrg 4847ec681f3Smrg /* query: */ 4857ec681f3Smrg struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type, 4867ec681f3Smrg unsigned index); 4877ec681f3Smrg void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt; 4887ec681f3Smrg void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n, 4897ec681f3Smrg struct fd_ringbuffer *ring) dt; 4907ec681f3Smrg void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt; 4917ec681f3Smrg 4927ec681f3Smrg /* blitter: */ 4937ec681f3Smrg bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt; 4947ec681f3Smrg void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt; 4957ec681f3Smrg 4967ec681f3Smrg /* uncompress resource, if necessary, to use as the specified format: */ 4977ec681f3Smrg void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc, 4987ec681f3Smrg enum pipe_format format) dt; 4997ec681f3Smrg 5007ec681f3Smrg /* handling for barriers: */ 5017ec681f3Smrg void (*framebuffer_barrier)(struct fd_context *ctx) dt; 5027ec681f3Smrg 5037ec681f3Smrg /* logger: */ 5047ec681f3Smrg void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo, 5057ec681f3Smrg unsigned offset); 5067ec681f3Smrg uint64_t (*ts_to_ns)(uint64_t ts); 5077ec681f3Smrg 5087ec681f3Smrg /* 5097ec681f3Smrg * Common pre-cooked VBO state (used for a3xx and later): 5107ec681f3Smrg */ 5117ec681f3Smrg 5127ec681f3Smrg /* for clear/gmem->mem vertices, and mem->gmem */ 5137ec681f3Smrg struct pipe_resource *solid_vbuf; 5147ec681f3Smrg 5157ec681f3Smrg /* for mem->gmem tex coords: */ 5167ec681f3Smrg struct pipe_resource *blit_texcoord_vbuf; 5177ec681f3Smrg 5187ec681f3Smrg /* vertex state for solid_vbuf: 5197ec681f3Smrg * - solid_vbuf / 12 / R32G32B32_FLOAT 5207ec681f3Smrg */ 5217ec681f3Smrg struct fd_vertex_state solid_vbuf_state; 5227ec681f3Smrg 5237ec681f3Smrg /* vertex state for blit_prog: 5247ec681f3Smrg * - blit_texcoord_vbuf / 8 / R32G32_FLOAT 5257ec681f3Smrg * - solid_vbuf / 12 / R32G32B32_FLOAT 5267ec681f3Smrg */ 5277ec681f3Smrg struct fd_vertex_state blit_vbuf_state; 5287ec681f3Smrg 5297ec681f3Smrg /* 5307ec681f3Smrg * Info about state of previous draw, for state that comes from 5317ec681f3Smrg * pipe_draw_info (ie. not part of a CSO). This allows us to 5327ec681f3Smrg * skip some register emit when the state doesn't change from 5337ec681f3Smrg * draw-to-draw 5347ec681f3Smrg */ 5357ec681f3Smrg struct { 5367ec681f3Smrg bool dirty; /* last draw state unknown */ 5377ec681f3Smrg bool primitive_restart; 5387ec681f3Smrg uint32_t index_start; 5397ec681f3Smrg uint32_t instance_start; 5407ec681f3Smrg uint32_t restart_index; 5417ec681f3Smrg uint32_t streamout_mask; 5427ec681f3Smrg 5437ec681f3Smrg /* some state changes require a different shader variant. Keep 5447ec681f3Smrg * track of this so we know when we need to re-emit shader state 5457ec681f3Smrg * due to variant change. See ir3_fixup_shader_state() 5467ec681f3Smrg * 5477ec681f3Smrg * (used for a3xx+, NULL otherwise) 5487ec681f3Smrg */ 5497ec681f3Smrg struct ir3_shader_key *key; 5507ec681f3Smrg 5517ec681f3Smrg } last dt; 552af69d88dSmrg}; 553af69d88dSmrg 55401e04c3fSmrgstatic inline struct fd_context * 555af69d88dSmrgfd_context(struct pipe_context *pctx) 556af69d88dSmrg{ 5577ec681f3Smrg return (struct fd_context *)pctx; 558af69d88dSmrg} 559af69d88dSmrg 5607ec681f3Smrgstatic inline struct fd_stream_output_target * 5617ec681f3Smrgfd_stream_output_target(struct pipe_stream_output_target *target) 56201e04c3fSmrg{ 5637ec681f3Smrg return (struct fd_stream_output_target *)target; 56401e04c3fSmrg} 56501e04c3fSmrg 5667ec681f3Smrg/** 5677ec681f3Smrg * Does the dirty state require resource tracking, ie. in general 5687ec681f3Smrg * does it reference some resource. There are some special cases: 5697ec681f3Smrg * 5707ec681f3Smrg * - FD_DIRTY_CONST can reference a resource, but cb0 is handled 5717ec681f3Smrg * specially as if it is not a user-buffer, we expect it to be 5727ec681f3Smrg * coming from const_uploader, so we can make some assumptions 5737ec681f3Smrg * that future transfer_map will be UNSYNCRONIZED 5747ec681f3Smrg * - FD_DIRTY_ZSA controls how the framebuffer is accessed 5757ec681f3Smrg * - FD_DIRTY_BLEND needs to update GMEM reason 5767ec681f3Smrg * 5777ec681f3Smrg * TODO if we can make assumptions that framebuffer state is bound 5787ec681f3Smrg * first, before blend/zsa/etc state we can move some of the ZSA/ 5797ec681f3Smrg * BLEND state handling from draw time to bind time. I think this 5807ec681f3Smrg * is true of mesa/st, perhaps we can just document it to be a 5817ec681f3Smrg * frontend requirement? 5827ec681f3Smrg */ 5837ec681f3Smrgstatic inline bool 5847ec681f3Smrgfd_context_dirty_resource(enum fd_dirty_3d_state dirty) 58501e04c3fSmrg{ 5867ec681f3Smrg return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND | 5877ec681f3Smrg FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF | 5887ec681f3Smrg FD_DIRTY_TEX | FD_DIRTY_STREAMOUT); 58901e04c3fSmrg} 59001e04c3fSmrg 5917ec681f3Smrg#ifdef __cplusplus 5927ec681f3Smrg#define or_dirty(d, mask) \ 5937ec681f3Smrg do { \ 5947ec681f3Smrg decltype(mask) _d = (d); \ 5957ec681f3Smrg d = (decltype(mask))(_d | (mask)); \ 5967ec681f3Smrg } while (0) 5977ec681f3Smrg#else 5987ec681f3Smrg#define or_dirty(d, mask) \ 5997ec681f3Smrg do { \ 6007ec681f3Smrg d |= (mask); \ 6017ec681f3Smrg } while (0) 6027ec681f3Smrg#endif 6037ec681f3Smrg 6047ec681f3Smrg/* Mark specified non-shader-stage related state as dirty: */ 60501e04c3fSmrgstatic inline void 6067ec681f3Smrgfd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt 60701e04c3fSmrg{ 6087ec681f3Smrg assert(util_is_power_of_two_nonzero(dirty)); 6097ec681f3Smrg STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map)); 6107ec681f3Smrg 6117ec681f3Smrg ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1]; 6127ec681f3Smrg 6137ec681f3Smrg if (fd_context_dirty_resource(dirty)) 6147ec681f3Smrg or_dirty(dirty, FD_DIRTY_RESOURCE); 6157ec681f3Smrg 6167ec681f3Smrg or_dirty(ctx->dirty, dirty); 61701e04c3fSmrg} 61801e04c3fSmrg 61901e04c3fSmrgstatic inline void 6207ec681f3Smrgfd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader, 6217ec681f3Smrg enum fd_dirty_shader_state dirty) assert_dt 62201e04c3fSmrg{ 6237ec681f3Smrg const enum fd_dirty_3d_state map[] = { 6247ec681f3Smrg FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX, 6257ec681f3Smrg FD_DIRTY_SSBO, FD_DIRTY_IMAGE, 6267ec681f3Smrg }; 6277ec681f3Smrg 6287ec681f3Smrg /* Need to update the table above if these shift: */ 6297ec681f3Smrg STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0)); 6307ec681f3Smrg STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1)); 6317ec681f3Smrg STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2)); 6327ec681f3Smrg STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3)); 6337ec681f3Smrg STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4)); 6347ec681f3Smrg 6357ec681f3Smrg assert(util_is_power_of_two_nonzero(dirty)); 6367ec681f3Smrg assert(ffs(dirty) <= ARRAY_SIZE(map)); 6377ec681f3Smrg 6387ec681f3Smrg ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1]; 6397ec681f3Smrg 6407ec681f3Smrg or_dirty(ctx->dirty_shader[shader], dirty); 6417ec681f3Smrg fd_context_dirty(ctx, map[ffs(dirty) - 1]); 64201e04c3fSmrg} 64301e04c3fSmrg 6447ec681f3Smrg/* mark all state dirty: */ 64501e04c3fSmrgstatic inline void 6467ec681f3Smrgfd_context_all_dirty(struct fd_context *ctx) assert_dt 64701e04c3fSmrg{ 6487ec681f3Smrg ctx->last.dirty = true; 6497ec681f3Smrg ctx->dirty = (enum fd_dirty_3d_state) ~0; 6507ec681f3Smrg 6517ec681f3Smrg /* NOTE: don't use ~0 for gen_dirty, because the gen specific 6527ec681f3Smrg * emit code will loop over all the bits: 6537ec681f3Smrg */ 6547ec681f3Smrg ctx->gen_dirty = ctx->gen_all_dirty; 6557ec681f3Smrg 6567ec681f3Smrg for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) 6577ec681f3Smrg ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0; 65801e04c3fSmrg} 65901e04c3fSmrg 6607ec681f3Smrgstatic inline void 6617ec681f3Smrgfd_context_all_clean(struct fd_context *ctx) assert_dt 662af69d88dSmrg{ 6637ec681f3Smrg ctx->last.dirty = false; 6647ec681f3Smrg ctx->dirty = (enum fd_dirty_3d_state)0; 6657ec681f3Smrg ctx->gen_dirty = 0; 6667ec681f3Smrg for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { 6677ec681f3Smrg /* don't mark compute state as clean, since it is not emitted 6687ec681f3Smrg * during normal draw call. The places that call _all_dirty(), 6697ec681f3Smrg * it is safe to mark compute state dirty as well, but the 6707ec681f3Smrg * inverse is not true. 6717ec681f3Smrg */ 6727ec681f3Smrg if (i == PIPE_SHADER_COMPUTE) 6737ec681f3Smrg continue; 6747ec681f3Smrg ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0; 6757ec681f3Smrg } 676af69d88dSmrg} 677af69d88dSmrg 6787ec681f3Smrg/** 6797ec681f3Smrg * Add mapping between global dirty bit and generation specific dirty 6807ec681f3Smrg * bit. 6817ec681f3Smrg */ 6827ec681f3Smrgstatic inline void 6837ec681f3Smrgfd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty, 6847ec681f3Smrg uint32_t gen_dirty) 685af69d88dSmrg{ 6867ec681f3Smrg u_foreach_bit (b, dirty) { 6877ec681f3Smrg ctx->gen_dirty_map[b] |= gen_dirty; 6887ec681f3Smrg } 6897ec681f3Smrg ctx->gen_all_dirty |= gen_dirty; 690af69d88dSmrg} 691af69d88dSmrg 6927ec681f3Smrg/** 6937ec681f3Smrg * Add mapping between shader stage specific dirty bit and generation 6947ec681f3Smrg * specific dirty bit 6957ec681f3Smrg */ 6967ec681f3Smrgstatic inline void 6977ec681f3Smrgfd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader, 6987ec681f3Smrg enum fd_dirty_shader_state dirty, uint32_t gen_dirty) 699af69d88dSmrg{ 7007ec681f3Smrg u_foreach_bit (b, dirty) { 7017ec681f3Smrg ctx->gen_dirty_shader_map[shader][b] |= gen_dirty; 7027ec681f3Smrg } 7037ec681f3Smrg ctx->gen_all_dirty |= gen_dirty; 704af69d88dSmrg} 705af69d88dSmrg 7067ec681f3Smrgstatic inline struct pipe_scissor_state * 7077ec681f3Smrgfd_context_get_scissor(struct fd_context *ctx) assert_dt 708af69d88dSmrg{ 7097ec681f3Smrg return ctx->current_scissor; 710af69d88dSmrg} 711af69d88dSmrg 7127ec681f3Smrgvoid fd_context_switch_from(struct fd_context *ctx) assert_dt; 7137ec681f3Smrgvoid fd_context_switch_to(struct fd_context *ctx, 7147ec681f3Smrg struct fd_batch *batch) assert_dt; 7157ec681f3Smrgstruct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt; 7167ec681f3Smrgstruct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt; 7177ec681f3Smrg 71801e04c3fSmrgvoid fd_context_setup_common_vbos(struct fd_context *ctx); 71901e04c3fSmrgvoid fd_context_cleanup_common_vbos(struct fd_context *ctx); 7207ec681f3Smrgvoid fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len); 7217ec681f3Smrgvoid fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len); 72201e04c3fSmrg 7237ec681f3Smrgstruct pipe_context *fd_context_init(struct fd_context *ctx, 7247ec681f3Smrg struct pipe_screen *pscreen, 7257ec681f3Smrg void *priv, unsigned flags); 7267ec681f3Smrgstruct pipe_context *fd_context_init_tc(struct pipe_context *pctx, 7277ec681f3Smrg unsigned flags); 728af69d88dSmrg 7297ec681f3Smrgvoid fd_context_destroy(struct pipe_context *pctx) assert_dt; 7307ec681f3Smrg 7317ec681f3Smrg#ifdef __cplusplus 7327ec681f3Smrg} 7337ec681f3Smrg#endif 734af69d88dSmrg 735af69d88dSmrg#endif /* FREEDRENO_CONTEXT_H_ */ 736