17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2017 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef CROCUS_BATCH_DOT_H 257ec681f3Smrg#define CROCUS_BATCH_DOT_H 267ec681f3Smrg 277ec681f3Smrg#include <stdbool.h> 287ec681f3Smrg#include <stdint.h> 297ec681f3Smrg#include <string.h> 307ec681f3Smrg 317ec681f3Smrg#include "util/u_dynarray.h" 327ec681f3Smrg 337ec681f3Smrg#include "common/intel_decoder.h" 347ec681f3Smrg#include "drm-uapi/i915_drm.h" 357ec681f3Smrg 367ec681f3Smrg#include "crocus_fence.h" 377ec681f3Smrg#include "crocus_fine_fence.h" 387ec681f3Smrg 397ec681f3Smrg#include "crocus_bufmgr.h" 407ec681f3Smrg/* The kernel assumes batchbuffers are smaller than 256kB. */ 417ec681f3Smrg#define MAX_BATCH_SIZE (256 * 1024) 427ec681f3Smrg 437ec681f3Smrg/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base 447ec681f3Smrg * Address, which means that we can't put binding tables beyond 64kB. This 457ec681f3Smrg * effectively limits the maximum statebuffer size to 64kB. 467ec681f3Smrg */ 477ec681f3Smrg#define MAX_STATE_SIZE (64 * 1024) 487ec681f3Smrg 497ec681f3Smrg/* Our target batch size - flush approximately at this point. */ 507ec681f3Smrg#define BATCH_SZ (20 * 1024) 517ec681f3Smrg#define STATE_SZ (16 * 1024) 527ec681f3Smrg 537ec681f3Smrgenum crocus_batch_name { 547ec681f3Smrg CROCUS_BATCH_RENDER, 557ec681f3Smrg CROCUS_BATCH_COMPUTE, 567ec681f3Smrg}; 577ec681f3Smrg 587ec681f3Smrg#define CROCUS_BATCH_COUNT 2 597ec681f3Smrg 607ec681f3Smrgstruct crocus_address { 617ec681f3Smrg struct crocus_bo *bo; 627ec681f3Smrg int32_t offset; 637ec681f3Smrg uint32_t reloc_flags; 647ec681f3Smrg}; 657ec681f3Smrg 667ec681f3Smrgstruct crocus_reloc_list { 677ec681f3Smrg struct drm_i915_gem_relocation_entry *relocs; 687ec681f3Smrg int reloc_count; 697ec681f3Smrg int reloc_array_size; 707ec681f3Smrg}; 717ec681f3Smrg 727ec681f3Smrgstruct crocus_growing_bo { 737ec681f3Smrg struct crocus_bo *bo; 747ec681f3Smrg void *map; 757ec681f3Smrg void *map_next; 767ec681f3Smrg struct crocus_bo *partial_bo; 777ec681f3Smrg void *partial_bo_map; 787ec681f3Smrg unsigned partial_bytes; 797ec681f3Smrg struct crocus_reloc_list relocs; 807ec681f3Smrg unsigned used; 817ec681f3Smrg}; 827ec681f3Smrg 837ec681f3Smrgstruct crocus_batch { 847ec681f3Smrg struct crocus_context *ice; 857ec681f3Smrg struct crocus_screen *screen; 867ec681f3Smrg struct pipe_debug_callback *dbg; 877ec681f3Smrg struct pipe_device_reset_callback *reset; 887ec681f3Smrg 897ec681f3Smrg /** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */ 907ec681f3Smrg enum crocus_batch_name name; 917ec681f3Smrg 927ec681f3Smrg /** buffers: command, state */ 937ec681f3Smrg struct crocus_growing_bo command, state; 947ec681f3Smrg 957ec681f3Smrg /** Size of the primary batch if we've moved on to a secondary. */ 967ec681f3Smrg unsigned primary_batch_size; 977ec681f3Smrg 987ec681f3Smrg bool state_base_address_emitted; 997ec681f3Smrg uint8_t pipe_controls_since_last_cs_stall; 1007ec681f3Smrg 1017ec681f3Smrg uint32_t hw_ctx_id; 1027ec681f3Smrg 1037ec681f3Smrg uint32_t valid_reloc_flags; 1047ec681f3Smrg 1057ec681f3Smrg bool use_shadow_copy; 1067ec681f3Smrg bool no_wrap; 1077ec681f3Smrg 1087ec681f3Smrg /** The validation list */ 1097ec681f3Smrg struct drm_i915_gem_exec_object2 *validation_list; 1107ec681f3Smrg struct crocus_bo **exec_bos; 1117ec681f3Smrg int exec_count; 1127ec681f3Smrg int exec_array_size; 1137ec681f3Smrg 1147ec681f3Smrg /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first 1157ec681f3Smrg * instruction is a MI_BATCH_BUFFER_END). 1167ec681f3Smrg */ 1177ec681f3Smrg bool noop_enabled; 1187ec681f3Smrg 1197ec681f3Smrg /** 1207ec681f3Smrg * A list of crocus_syncobjs associated with this batch. 1217ec681f3Smrg * 1227ec681f3Smrg * The first list entry will always be a signalling sync-point, indicating 1237ec681f3Smrg * that this batch has completed. The others are likely to be sync-points 1247ec681f3Smrg * to wait on before executing the batch. 1257ec681f3Smrg */ 1267ec681f3Smrg struct util_dynarray syncobjs; 1277ec681f3Smrg 1287ec681f3Smrg /** A list of drm_i915_exec_fences to have execbuf signal or wait on */ 1297ec681f3Smrg struct util_dynarray exec_fences; 1307ec681f3Smrg 1317ec681f3Smrg /** The amount of aperture space (in bytes) used by all exec_bos */ 1327ec681f3Smrg int aperture_space; 1337ec681f3Smrg 1347ec681f3Smrg struct { 1357ec681f3Smrg /** Uploader to use for sequence numbers */ 1367ec681f3Smrg struct u_upload_mgr *uploader; 1377ec681f3Smrg 1387ec681f3Smrg /** GPU buffer and CPU map where our seqno's will be written. */ 1397ec681f3Smrg struct crocus_state_ref ref; 1407ec681f3Smrg uint32_t *map; 1417ec681f3Smrg 1427ec681f3Smrg /** The sequence number to write the next time we add a fence. */ 1437ec681f3Smrg uint32_t next; 1447ec681f3Smrg } fine_fences; 1457ec681f3Smrg 1467ec681f3Smrg /** A seqno (and syncobj) for the last batch that was submitted. */ 1477ec681f3Smrg struct crocus_fine_fence *last_fence; 1487ec681f3Smrg 1497ec681f3Smrg /** List of other batches which we might need to flush to use a BO */ 1507ec681f3Smrg struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1]; 1517ec681f3Smrg 1527ec681f3Smrg struct { 1537ec681f3Smrg /** 1547ec681f3Smrg * Set of struct brw_bo * that have been rendered to within this 1557ec681f3Smrg * batchbuffer and would need flushing before being used from another 1567ec681f3Smrg * cache domain that isn't coherent with it (i.e. the sampler). 1577ec681f3Smrg */ 1587ec681f3Smrg struct hash_table *render; 1597ec681f3Smrg 1607ec681f3Smrg /** 1617ec681f3Smrg * Set of struct brw_bo * that have been used as a depth buffer within 1627ec681f3Smrg * this batchbuffer and would need flushing before being used from 1637ec681f3Smrg * another cache domain that isn't coherent with it (i.e. the sampler). 1647ec681f3Smrg */ 1657ec681f3Smrg struct set *depth; 1667ec681f3Smrg } cache; 1677ec681f3Smrg 1687ec681f3Smrg struct intel_batch_decode_ctx decoder; 1697ec681f3Smrg struct hash_table_u64 *state_sizes; 1707ec681f3Smrg 1717ec681f3Smrg /** Have we emitted any draw calls to this batch? */ 1727ec681f3Smrg bool contains_draw; 1737ec681f3Smrg 1747ec681f3Smrg /** Batch contains fence signal operation. */ 1757ec681f3Smrg bool contains_fence_signal; 1767ec681f3Smrg}; 1777ec681f3Smrg 1787ec681f3Smrgstatic inline bool 1797ec681f3Smrgbatch_has_fine_fence(struct crocus_batch *batch) 1807ec681f3Smrg{ 1817ec681f3Smrg return !!batch->fine_fences.uploader; 1827ec681f3Smrg} 1837ec681f3Smrg 1847ec681f3Smrg#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader) 1857ec681f3Smrgvoid crocus_init_batch(struct crocus_context *ctx, 1867ec681f3Smrg enum crocus_batch_name name, 1877ec681f3Smrg int priority); 1887ec681f3Smrgvoid crocus_batch_free(struct crocus_batch *batch); 1897ec681f3Smrgvoid crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate); 1907ec681f3Smrg 1917ec681f3Smrgvoid _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line); 1927ec681f3Smrg#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__) 1937ec681f3Smrg 1947ec681f3Smrgbool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo); 1957ec681f3Smrg 1967ec681f3Smrgbool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable); 1977ec681f3Smrg 1987ec681f3Smrg#define RELOC_WRITE EXEC_OBJECT_WRITE 1997ec681f3Smrg#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT 2007ec681f3Smrg/* Inverted meaning, but using the same bit...emit_reloc will flip it. */ 2017ec681f3Smrg#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS 2027ec681f3Smrg 2037ec681f3Smrgvoid crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo, 2047ec681f3Smrg bool writable); 2057ec681f3Smrguint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset, 2067ec681f3Smrg struct crocus_bo *target, uint32_t target_offset, 2077ec681f3Smrg unsigned int reloc_flags); 2087ec681f3Smrguint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset, 2097ec681f3Smrg struct crocus_bo *target, uint32_t target_offset, 2107ec681f3Smrg unsigned int reloc_flags); 2117ec681f3Smrg 2127ec681f3Smrgenum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch); 2137ec681f3Smrg 2147ec681f3Smrgvoid crocus_grow_buffer(struct crocus_batch *batch, bool grow_state, 2157ec681f3Smrg unsigned used, unsigned new_size); 2167ec681f3Smrg 2177ec681f3Smrgstatic inline unsigned 2187ec681f3Smrgcrocus_batch_bytes_used(struct crocus_batch *batch) 2197ec681f3Smrg{ 2207ec681f3Smrg return batch->command.map_next - batch->command.map; 2217ec681f3Smrg} 2227ec681f3Smrg 2237ec681f3Smrg/** 2247ec681f3Smrg * Ensure the current command buffer has \param size bytes of space 2257ec681f3Smrg * remaining. If not, this creates a secondary batch buffer and emits 2267ec681f3Smrg * a jump from the primary batch to the start of the secondary. 2277ec681f3Smrg * 2287ec681f3Smrg * Most callers want crocus_get_command_space() instead. 2297ec681f3Smrg */ 2307ec681f3Smrgstatic inline void 2317ec681f3Smrgcrocus_require_command_space(struct crocus_batch *batch, unsigned size) 2327ec681f3Smrg{ 2337ec681f3Smrg const unsigned required_bytes = crocus_batch_bytes_used(batch) + size; 2347ec681f3Smrg unsigned used = crocus_batch_bytes_used(batch); 2357ec681f3Smrg if (required_bytes >= BATCH_SZ && !batch->no_wrap) { 2367ec681f3Smrg crocus_batch_flush(batch); 2377ec681f3Smrg } else if (used + size >= batch->command.bo->size) { 2387ec681f3Smrg const unsigned new_size = 2397ec681f3Smrg MIN2(batch->command.bo->size + batch->command.bo->size / 2, 2407ec681f3Smrg MAX_BATCH_SIZE); 2417ec681f3Smrg 2427ec681f3Smrg crocus_grow_buffer(batch, false, used, new_size); 2437ec681f3Smrg batch->command.map_next = (void *)batch->command.map + used; 2447ec681f3Smrg assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size); 2457ec681f3Smrg } 2467ec681f3Smrg} 2477ec681f3Smrg 2487ec681f3Smrg/** 2497ec681f3Smrg * Allocate space in the current command buffer, and return a pointer 2507ec681f3Smrg * to the mapped area so the caller can write commands there. 2517ec681f3Smrg * 2527ec681f3Smrg * This should be called whenever emitting commands. 2537ec681f3Smrg */ 2547ec681f3Smrgstatic inline void * 2557ec681f3Smrgcrocus_get_command_space(struct crocus_batch *batch, unsigned bytes) 2567ec681f3Smrg{ 2577ec681f3Smrg crocus_require_command_space(batch, bytes); 2587ec681f3Smrg void *map = batch->command.map_next; 2597ec681f3Smrg batch->command.map_next += bytes; 2607ec681f3Smrg return map; 2617ec681f3Smrg} 2627ec681f3Smrg 2637ec681f3Smrg/** 2647ec681f3Smrg * Helper to emit GPU commands - allocates space, copies them there. 2657ec681f3Smrg */ 2667ec681f3Smrgstatic inline void 2677ec681f3Smrgcrocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size) 2687ec681f3Smrg{ 2697ec681f3Smrg void *map = crocus_get_command_space(batch, size); 2707ec681f3Smrg memcpy(map, data, size); 2717ec681f3Smrg} 2727ec681f3Smrg 2737ec681f3Smrg/** 2747ec681f3Smrg * Get a pointer to the batch's signalling syncobj. Does not refcount. 2757ec681f3Smrg */ 2767ec681f3Smrgstatic inline struct crocus_syncobj * 2777ec681f3Smrgcrocus_batch_get_signal_syncobj(struct crocus_batch *batch) 2787ec681f3Smrg{ 2797ec681f3Smrg /* The signalling syncobj is the first one in the list. */ 2807ec681f3Smrg struct crocus_syncobj *syncobj = 2817ec681f3Smrg ((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0]; 2827ec681f3Smrg return syncobj; 2837ec681f3Smrg} 2847ec681f3Smrg 2857ec681f3Smrg/** 2867ec681f3Smrg * Take a reference to the batch's signalling syncobj. 2877ec681f3Smrg * 2887ec681f3Smrg * Callers can use this to wait for the the current batch under construction 2897ec681f3Smrg * to complete (after flushing it). 2907ec681f3Smrg */ 2917ec681f3Smrgstatic inline void 2927ec681f3Smrgcrocus_batch_reference_signal_syncobj(struct crocus_batch *batch, 2937ec681f3Smrg struct crocus_syncobj **out_syncobj) 2947ec681f3Smrg{ 2957ec681f3Smrg struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch); 2967ec681f3Smrg crocus_syncobj_reference(batch->screen, out_syncobj, syncobj); 2977ec681f3Smrg} 2987ec681f3Smrg 2997ec681f3Smrg/** 3007ec681f3Smrg * Record the size of a piece of state for use in INTEL_DEBUG=bat printing. 3017ec681f3Smrg */ 3027ec681f3Smrgstatic inline void 3037ec681f3Smrgcrocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base, 3047ec681f3Smrg uint32_t size) 3057ec681f3Smrg{ 3067ec681f3Smrg if (ht) { 3077ec681f3Smrg _mesa_hash_table_u64_insert(ht, offset_from_base, 3087ec681f3Smrg (void *)(uintptr_t)size); 3097ec681f3Smrg } 3107ec681f3Smrg} 3117ec681f3Smrg 3127ec681f3Smrgstatic inline bool 3137ec681f3Smrgcrocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p) 3147ec681f3Smrg{ 3157ec681f3Smrg return (char *)p >= (char *)batch->state.map && 3167ec681f3Smrg (char *)p < (char *)batch->state.map + batch->state.bo->size; 3177ec681f3Smrg} 3187ec681f3Smrg 3197ec681f3Smrgstatic inline void 3207ec681f3Smrgcrocus_require_statebuffer_space(struct crocus_batch *batch, int size) 3217ec681f3Smrg{ 3227ec681f3Smrg if (batch->state.used + size >= STATE_SZ) 3237ec681f3Smrg crocus_batch_flush(batch); 3247ec681f3Smrg} 3257ec681f3Smrg#endif 326