freedreno_batch.h revision 7ec681f3
1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FREEDRENO_BATCH_H_ 28#define FREEDRENO_BATCH_H_ 29 30#include "util/list.h" 31#include "util/simple_mtx.h" 32#include "util/u_inlines.h" 33#include "util/u_queue.h" 34#include "util/perf/u_trace.h" 35 36#include "freedreno_context.h" 37#include "freedreno_fence.h" 38#include "freedreno_util.h" 39 40#ifdef __cplusplus 41extern "C" { 42#endif 43 44struct fd_resource; 45struct fd_batch_key; 46struct fd_batch_result; 47 48/* A batch tracks everything about a cmdstream batch/submit, including the 49 * ringbuffers used for binning, draw, and gmem cmds, list of associated 50 * fd_resource-s, etc. 51 */ 52struct fd_batch { 53 struct pipe_reference reference; 54 unsigned seqno; 55 unsigned idx; /* index into cache->batches[] */ 56 57 struct u_trace trace; 58 59 /* To detect cases where we can skip cmdstream to record timestamp: */ 60 uint32_t *last_timestamp_cmd; 61 62 int in_fence_fd; 63 struct pipe_fence_handle *fence; 64 65 struct fd_context *ctx; 66 67 /* emit_lock serializes cmdstream emission and flush. Acquire before 68 * screen->lock. 69 */ 70 simple_mtx_t submit_lock; 71 72 /* do we need to mem2gmem before rendering. We don't, if for example, 73 * there was a glClear() that invalidated the entire previous buffer 74 * contents. Keep track of which buffer(s) are cleared, or needs 75 * restore. Masks of PIPE_CLEAR_* 76 * 77 * The 'cleared' bits will be set for buffers which are *entirely* 78 * cleared, and 'partial_cleared' bits will be set if you must 79 * check cleared_scissor. 80 * 81 * The 'invalidated' bits are set for cleared buffers, and buffers 82 * where the contents are undefined, ie. what we don't need to restore 83 * to gmem. 84 */ 85 enum { 86 /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ 87 FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, 88 FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, 89 FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, 90 FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, 91 } invalidated, cleared, fast_cleared, restore, resolve; 92 93 /* is this a non-draw batch (ie compute/blit which has no pfb state)? */ 94 bool nondraw : 1; 95 bool needs_flush : 1; 96 bool flushed : 1; 97 bool tessellation : 1; /* tessellation used in batch */ 98 99 /* Keep track if WAIT_FOR_IDLE is needed for registers we need 100 * to update via RMW: 101 */ 102 bool needs_wfi : 1; 103 104 /* To decide whether to render to system memory, keep track of the 105 * number of draws, and whether any of them require multisample, 106 * depth_test (or depth write), stencil_test, blending, and 107 * color_logic_Op (since those functions are disabled when by- 108 * passing GMEM. 109 */ 110 enum fd_gmem_reason gmem_reason; 111 112 /* At submit time, once we've decided that this batch will use GMEM 113 * rendering, the appropriate gmem state is looked up: 114 */ 115 const struct fd_gmem_stateobj *gmem_state; 116 117 /* A calculated "draw cost" value for the batch, which tries to 118 * estimate the bandwidth-per-sample of all the draws according 119 * to: 120 * 121 * foreach_draw (...) { 122 * cost += num_mrt; 123 * if (blend_enabled) 124 * cost += num_mrt; 125 * if (depth_test_enabled) 126 * cost++; 127 * if (depth_write_enabled) 128 * cost++; 129 * } 130 * 131 * The idea is that each sample-passed minimally does one write 132 * per MRT. If blend is enabled, the hw will additionally do 133 * a framebuffer read per sample-passed (for each MRT with blend 134 * enabled). If depth-test is enabled, the hw will additionally 135 * a depth buffer read. If depth-write is enable, the hw will 136 * additionally do a depth buffer write. 137 * 138 * This does ignore depth buffer traffic for samples which do not 139 * pass do to depth-test fail, and some other details. But it is 140 * just intended to be a rough estimate that is easy to calculate. 141 */ 142 unsigned cost; 143 144 /* Tells the gen specific backend where to write stats used for 145 * the autotune module. 146 * 147 * Pointer only valid during gmem emit code. 148 */ 149 struct fd_batch_result *autotune_result; 150 151 unsigned num_draws; /* number of draws in current batch */ 152 unsigned num_vertices; /* number of vertices in current batch */ 153 154 /* Currently only used on a6xx, to calculate vsc prim/draw stream 155 * sizes: 156 */ 157 unsigned num_bins_per_pipe; 158 unsigned prim_strm_bits; 159 unsigned draw_strm_bits; 160 161 /* Track the maximal bounds of the scissor of all the draws within a 162 * batch. Used at the tile rendering step (fd_gmem_render_tiles(), 163 * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem. 164 */ 165 struct pipe_scissor_state max_scissor; 166 167 /* Keep track of DRAW initiators that need to be patched up depending 168 * on whether we using binning or not: 169 */ 170 struct util_dynarray draw_patches; 171 172 /* texture state that needs patching for fb_read: */ 173 struct util_dynarray fb_read_patches; 174 175 /* Keep track of writes to RB_RENDER_CONTROL which need to be patched 176 * once we know whether or not to use GMEM, and GMEM tile pitch. 177 * 178 * (only for a3xx.. but having gen specific subclasses of fd_batch 179 * seemed overkill for now) 180 */ 181 struct util_dynarray rbrc_patches; 182 183 /* Keep track of GMEM related values that need to be patched up once we 184 * know the gmem layout: 185 */ 186 struct util_dynarray gmem_patches; 187 188 /* Keep track of pointer to start of MEM exports for a20x binning shaders 189 * 190 * this is so the end of the shader can be cut off at the right point 191 * depending on the GMEM configuration 192 */ 193 struct util_dynarray shader_patches; 194 195 struct pipe_framebuffer_state framebuffer; 196 197 struct fd_submit *submit; 198 199 /** draw pass cmdstream: */ 200 struct fd_ringbuffer *draw; 201 /** binning pass cmdstream: */ 202 struct fd_ringbuffer *binning; 203 /** tiling/gmem (IB0) cmdstream: */ 204 struct fd_ringbuffer *gmem; 205 206 /** preemble cmdstream (executed once before first tile): */ 207 struct fd_ringbuffer *prologue; 208 209 /** epilogue cmdstream (executed after each tile): */ 210 struct fd_ringbuffer *epilogue; 211 212 struct fd_ringbuffer *tile_setup; 213 struct fd_ringbuffer *tile_fini; 214 215 union pipe_color_union clear_color[MAX_RENDER_TARGETS]; 216 double clear_depth; 217 unsigned clear_stencil; 218 219 /** 220 * hw query related state: 221 */ 222 /*@{*/ 223 /* next sample offset.. incremented for each sample in the batch/ 224 * submit, reset to zero on next submit. 225 */ 226 uint32_t next_sample_offset; 227 228 /* cached samples (in case multiple queries need to reference 229 * the same sample snapshot) 230 */ 231 struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; 232 233 /* which sample providers were used in the current batch: */ 234 uint32_t query_providers_used; 235 236 /* which sample providers are currently enabled in the batch: */ 237 uint32_t query_providers_active; 238 239 /* list of samples in current batch: */ 240 struct util_dynarray samples; 241 242 /* current query result bo and tile stride: */ 243 struct pipe_resource *query_buf; 244 uint32_t query_tile_stride; 245 /*@}*/ 246 247 /* Set of resources used by currently-unsubmitted batch (read or 248 * write).. does not hold a reference to the resource. 249 */ 250 struct set *resources; 251 252 /** key in batch-cache (if not null): */ 253 struct fd_batch_key *key; 254 uint32_t hash; 255 256 /** set of dependent batches.. holds refs to dependent batches: */ 257 uint32_t dependents_mask; 258 259 /* Buffer for tessellation engine input 260 */ 261 struct fd_bo *tessfactor_bo; 262 uint32_t tessfactor_size; 263 264 /* Buffer for passing parameters between TCS and TES 265 */ 266 struct fd_bo *tessparam_bo; 267 uint32_t tessparam_size; 268 269 struct fd_ringbuffer *tess_addrs_constobj; 270}; 271 272struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw); 273 274void fd_batch_reset(struct fd_batch *batch) assert_dt; 275void fd_batch_flush(struct fd_batch *batch) assert_dt; 276void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt; 277void fd_batch_resource_write(struct fd_batch *batch, 278 struct fd_resource *rsc) assert_dt; 279void fd_batch_resource_read_slowpath(struct fd_batch *batch, 280 struct fd_resource *rsc) assert_dt; 281void fd_batch_check_size(struct fd_batch *batch) assert_dt; 282 283uint32_t fd_batch_key_hash(const void *_key); 284bool fd_batch_key_equals(const void *_a, const void *_b); 285struct fd_batch_key *fd_batch_key_clone(void *mem_ctx, 286 const struct fd_batch_key *key); 287 288/* not called directly: */ 289void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt; 290void __fd_batch_destroy(struct fd_batch *batch); 291 292/* 293 * NOTE the rule is, you need to hold the screen->lock when destroying 294 * a batch.. so either use fd_batch_reference() (which grabs the lock 295 * for you) if you don't hold the lock, or fd_batch_reference_locked() 296 * if you do hold the lock. 297 * 298 * WARNING the _locked() version can briefly drop the lock. Without 299 * recursive mutexes, I'm not sure there is much else we can do (since 300 * __fd_batch_destroy() needs to unref resources) 301 * 302 * WARNING you must acquire the screen->lock and use the _locked() 303 * version in case that the batch being ref'd can disappear under 304 * you. 305 */ 306 307static inline void 308fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch) 309{ 310 struct fd_batch *old_batch = *ptr; 311 312 /* only need lock if a reference is dropped: */ 313 if (old_batch) 314 fd_screen_assert_locked(old_batch->ctx->screen); 315 316 if (pipe_reference_described( 317 &(*ptr)->reference, &batch->reference, 318 (debug_reference_descriptor)__fd_batch_describe)) 319 __fd_batch_destroy(old_batch); 320 321 *ptr = batch; 322} 323 324static inline void 325fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch) 326{ 327 struct fd_batch *old_batch = *ptr; 328 struct fd_context *ctx = old_batch ? old_batch->ctx : NULL; 329 330 if (ctx) 331 fd_screen_lock(ctx->screen); 332 333 fd_batch_reference_locked(ptr, batch); 334 335 if (ctx) 336 fd_screen_unlock(ctx->screen); 337} 338 339static inline void 340fd_batch_unlock_submit(struct fd_batch *batch) 341{ 342 simple_mtx_unlock(&batch->submit_lock); 343} 344 345/** 346 * Returns true if emit-lock was acquired, false if failed to acquire lock, 347 * ie. batch already flushed. 348 */ 349static inline bool MUST_CHECK 350fd_batch_lock_submit(struct fd_batch *batch) 351{ 352 simple_mtx_lock(&batch->submit_lock); 353 bool ret = !batch->flushed; 354 if (!ret) 355 fd_batch_unlock_submit(batch); 356 return ret; 357} 358 359/** 360 * Mark the batch as having something worth flushing (rendering, blit, query, 361 * etc) 362 */ 363static inline void 364fd_batch_needs_flush(struct fd_batch *batch) 365{ 366 batch->needs_flush = true; 367 fd_fence_ref(&batch->ctx->last_fence, NULL); 368} 369 370/* Since we reorder batches and can pause/resume queries (notably for disabling 371 * queries dueing some meta operations), we update the current query state for 372 * the batch before each draw. 373 */ 374static inline void 375fd_batch_update_queries(struct fd_batch *batch) assert_dt 376{ 377 struct fd_context *ctx = batch->ctx; 378 379 if (ctx->query_update_batch) 380 ctx->query_update_batch(batch, false); 381} 382 383static inline void 384fd_batch_finish_queries(struct fd_batch *batch) assert_dt 385{ 386 struct fd_context *ctx = batch->ctx; 387 388 if (ctx->query_update_batch) 389 ctx->query_update_batch(batch, true); 390} 391 392static inline void 393fd_reset_wfi(struct fd_batch *batch) 394{ 395 batch->needs_wfi = true; 396} 397 398void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt; 399 400/* emit a CP_EVENT_WRITE: 401 */ 402static inline void 403fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring, 404 enum vgt_event_type evt) 405{ 406 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 407 OUT_RING(ring, evt); 408 fd_reset_wfi(batch); 409} 410 411/* Get per-tile epilogue */ 412static inline struct fd_ringbuffer * 413fd_batch_get_epilogue(struct fd_batch *batch) 414{ 415 if (batch->epilogue == NULL) { 416 batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000, 417 (enum fd_ringbuffer_flags)0); 418 } 419 420 return batch->epilogue; 421} 422 423struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch); 424 425#ifdef __cplusplus 426} 427#endif 428 429#endif /* FREEDRENO_BATCH_H_ */ 430