1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef IRIS_BATCH_DOT_H 25#define IRIS_BATCH_DOT_H 26 27#include <stdint.h> 28#include <stdbool.h> 29#include <string.h> 30 31#include "util/u_dynarray.h" 32 33#include "drm-uapi/i915_drm.h" 34#include "common/intel_decoder.h" 35 36#include "iris_fence.h" 37#include "iris_fine_fence.h" 38 39struct iris_context; 40 41/* The kernel assumes batchbuffers are smaller than 256kB. */ 42#define MAX_BATCH_SIZE (256 * 1024) 43 44/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12 45 * bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for 46 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP 47 * invalidation pipe control. 48 */ 49#define BATCH_RESERVED 60 50 51/* Our target batch size - flush approximately at this point. */ 52#define BATCH_SZ (64 * 1024 - BATCH_RESERVED) 53 54enum iris_batch_name { 55 IRIS_BATCH_RENDER, 56 IRIS_BATCH_COMPUTE, 57}; 58 59struct iris_batch { 60 struct iris_context *ice; 61 struct iris_screen *screen; 62 struct pipe_debug_callback *dbg; 63 struct pipe_device_reset_callback *reset; 64 65 /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */ 66 enum iris_batch_name name; 67 68 /** Current batchbuffer being queued up. */ 69 struct iris_bo *bo; 70 void *map; 71 void *map_next; 72 73 /** Size of the primary batch being submitted to execbuf (in bytes). */ 74 unsigned primary_batch_size; 75 76 /** Total size of all chained batches (in bytes). */ 77 unsigned total_chained_batch_size; 78 79 /** Last Surface State Base Address set in this hardware context. */ 80 uint64_t last_surface_base_address; 81 82 uint32_t hw_ctx_id; 83 84 /** A list of all BOs referenced by this batch */ 85 struct iris_bo **exec_bos; 86 int exec_count; 87 int exec_array_size; 88 /** Bitset of whether this batch writes to BO `i'. */ 89 BITSET_WORD *bos_written; 90 uint32_t max_gem_handle; 91 92 /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first 93 * instruction is a MI_BATCH_BUFFER_END). 94 */ 95 bool noop_enabled; 96 97 /** 98 * A list of iris_syncobjs associated with this batch. 99 * 100 * The first list entry will always be a signalling sync-point, indicating 101 * that this batch has completed. The others are likely to be sync-points 102 * to wait on before executing the batch. 103 */ 104 struct util_dynarray syncobjs; 105 106 /** A list of drm_i915_exec_fences to have execbuf signal or wait on */ 107 struct util_dynarray exec_fences; 108 109 /** The amount of aperture space (in bytes) used by all exec_bos */ 110 int aperture_space; 111 112 struct { 113 /** Uploader to use for sequence numbers */ 114 struct u_upload_mgr *uploader; 115 116 /** GPU buffer and CPU map where our seqno's will be written. */ 117 struct iris_state_ref ref; 118 uint32_t *map; 119 120 /** The sequence number to write the next time we add a fence. */ 121 uint32_t next; 122 } fine_fences; 123 124 /** A seqno (and syncobj) for the last batch that was submitted. */ 125 struct iris_fine_fence *last_fence; 126 127 /** List of other batches which we might need to flush to use a BO */ 128 struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1]; 129 130 struct { 131 /** 132 * Set of struct brw_bo * that have been rendered to within this 133 * batchbuffer and would need flushing before being used from another 134 * cache domain that isn't coherent with it (i.e. the sampler). 135 */ 136 struct hash_table *render; 137 } cache; 138 139 struct intel_batch_decode_ctx decoder; 140 struct hash_table_u64 *state_sizes; 141 142 /** 143 * Matrix representation of the cache coherency status of the GPU at the 144 * current end point of the batch. For every i and j, 145 * coherent_seqnos[i][j] denotes the seqno of the most recent flush of 146 * cache domain j visible to cache domain i (which obviously implies that 147 * coherent_seqnos[i][i] is the most recent flush of cache domain i). This 148 * can be used to efficiently determine whether synchronization is 149 * necessary before accessing data from cache domain i if it was previously 150 * accessed from another cache domain j. 151 */ 152 uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS]; 153 154 /** 155 * Sequence number used to track the completion of any subsequent memory 156 * operations in the batch until the next sync boundary. 157 */ 158 uint64_t next_seqno; 159 160 /** Have we emitted any draw calls to this batch? */ 161 bool contains_draw; 162 163 /** Have we emitted any draw calls with next_seqno? */ 164 bool contains_draw_with_next_seqno; 165 166 /** Batch contains fence signal operation. */ 167 bool contains_fence_signal; 168 169 /** 170 * Number of times iris_batch_sync_region_start() has been called without a 171 * matching iris_batch_sync_region_end() on this batch. 172 */ 173 uint32_t sync_region_depth; 174 175 uint32_t last_aux_map_state; 176 struct iris_measure_batch *measure; 177}; 178 179void iris_init_batch(struct iris_context *ice, 180 enum iris_batch_name name, 181 int priority); 182void iris_chain_to_new_batch(struct iris_batch *batch); 183void iris_batch_free(struct iris_batch *batch); 184void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate); 185 186void _iris_batch_flush(struct iris_batch *batch, const char *file, int line); 187#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__) 188 189bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo); 190 191bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable); 192 193#define RELOC_WRITE EXEC_OBJECT_WRITE 194 195void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo, 196 bool writable, enum iris_domain access); 197 198enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch); 199 200static inline unsigned 201iris_batch_bytes_used(struct iris_batch *batch) 202{ 203 return batch->map_next - batch->map; 204} 205 206/** 207 * Ensure the current command buffer has \param size bytes of space 208 * remaining. If not, this creates a secondary batch buffer and emits 209 * a jump from the primary batch to the start of the secondary. 210 * 211 * Most callers want iris_get_command_space() instead. 212 */ 213static inline void 214iris_require_command_space(struct iris_batch *batch, unsigned size) 215{ 216 const unsigned required_bytes = iris_batch_bytes_used(batch) + size; 217 218 if (required_bytes >= BATCH_SZ) { 219 iris_chain_to_new_batch(batch); 220 } 221} 222 223/** 224 * Allocate space in the current command buffer, and return a pointer 225 * to the mapped area so the caller can write commands there. 226 * 227 * This should be called whenever emitting commands. 228 */ 229static inline void * 230iris_get_command_space(struct iris_batch *batch, unsigned bytes) 231{ 232 iris_require_command_space(batch, bytes); 233 void *map = batch->map_next; 234 batch->map_next += bytes; 235 return map; 236} 237 238/** 239 * Helper to emit GPU commands - allocates space, copies them there. 240 */ 241static inline void 242iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size) 243{ 244 void *map = iris_get_command_space(batch, size); 245 memcpy(map, data, size); 246} 247 248/** 249 * Get a pointer to the batch's signalling syncobj. Does not refcount. 250 */ 251static inline struct iris_syncobj * 252iris_batch_get_signal_syncobj(struct iris_batch *batch) 253{ 254 /* The signalling syncobj is the first one in the list. */ 255 struct iris_syncobj *syncobj = 256 ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0]; 257 return syncobj; 258} 259 260 261/** 262 * Take a reference to the batch's signalling syncobj. 263 * 264 * Callers can use this to wait for the the current batch under construction 265 * to complete (after flushing it). 266 */ 267static inline void 268iris_batch_reference_signal_syncobj(struct iris_batch *batch, 269 struct iris_syncobj **out_syncobj) 270{ 271 struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch); 272 iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj); 273} 274 275/** 276 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing. 277 */ 278static inline void 279iris_record_state_size(struct hash_table_u64 *ht, 280 uint32_t offset_from_base, 281 uint32_t size) 282{ 283 if (ht) { 284 _mesa_hash_table_u64_insert(ht, offset_from_base, 285 (void *)(uintptr_t) size); 286 } 287} 288 289/** 290 * Mark the start of a region in the batch with stable synchronization 291 * sequence number. Any buffer object accessed by the batch buffer only needs 292 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited 293 * by iris_batch_sync_region_start() and iris_batch_sync_region_end(). 294 */ 295static inline void 296iris_batch_sync_region_start(struct iris_batch *batch) 297{ 298 batch->sync_region_depth++; 299} 300 301/** 302 * Mark the end of a region in the batch with stable synchronization sequence 303 * number. Should be called once after each call to 304 * iris_batch_sync_region_start(). 305 */ 306static inline void 307iris_batch_sync_region_end(struct iris_batch *batch) 308{ 309 assert(batch->sync_region_depth); 310 batch->sync_region_depth--; 311} 312 313/** 314 * Start a new synchronization section at the current point of the batch, 315 * unless disallowed by a previous iris_batch_sync_region_start(). 316 */ 317static inline void 318iris_batch_sync_boundary(struct iris_batch *batch) 319{ 320 if (!batch->sync_region_depth) { 321 batch->contains_draw_with_next_seqno = false; 322 batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno); 323 assert(batch->next_seqno > 0); 324 } 325} 326 327/** 328 * Update the cache coherency status of the batch to reflect a flush of the 329 * specified caching domain. 330 */ 331static inline void 332iris_batch_mark_flush_sync(struct iris_batch *batch, 333 enum iris_domain access) 334{ 335 batch->coherent_seqnos[access][access] = batch->next_seqno - 1; 336} 337 338/** 339 * Update the cache coherency status of the batch to reflect an invalidation 340 * of the specified caching domain. All prior flushes of other caches will be 341 * considered visible to the specified caching domain. 342 */ 343static inline void 344iris_batch_mark_invalidate_sync(struct iris_batch *batch, 345 enum iris_domain access) 346{ 347 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) 348 batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i]; 349} 350 351/** 352 * Update the cache coherency status of the batch to reflect a reset. All 353 * previously accessed data can be considered visible to every caching domain 354 * thanks to the kernel's heavyweight flushing at batch buffer boundaries. 355 */ 356static inline void 357iris_batch_mark_reset_sync(struct iris_batch *batch) 358{ 359 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) 360 for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++) 361 batch->coherent_seqnos[i][j] = batch->next_seqno - 1; 362} 363 364#endif 365