1/* 2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 3 * Copyright © 2015 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#ifndef AMDGPU_CS_H 29#define AMDGPU_CS_H 30 31#include "amdgpu_bo.h" 32#include "util/u_memory.h" 33#include "drm-uapi/amdgpu_drm.h" 34 35/* Smaller submits means the GPU gets busy sooner and there is less 36 * waiting for buffers and fences. Proof: 37 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 38 */ 39#define IB_MAX_SUBMIT_DWORDS (20 * 1024) 40 41struct amdgpu_ctx { 42 struct amdgpu_winsys *ws; 43 amdgpu_context_handle ctx; 44 amdgpu_bo_handle user_fence_bo; 45 uint64_t *user_fence_cpu_address_base; 46 int refcount; 47 unsigned initial_num_total_rejected_cs; 48 unsigned num_rejected_cs; 49}; 50 51struct amdgpu_cs_buffer { 52 struct amdgpu_winsys_bo *bo; 53 union { 54 struct { 55 uint32_t priority_usage; 56 } real; 57 struct { 58 uint32_t real_idx; /* index of underlying real BO */ 59 } slab; 60 } u; 61 enum radeon_bo_usage usage; 62}; 63 64enum ib_type { 65 IB_PREAMBLE, 66 IB_MAIN, 67 IB_NUM, 68}; 69 70struct amdgpu_ib { 71 struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */ 72 73 /* A buffer out of which new IBs are allocated. */ 74 struct pb_buffer *big_ib_buffer; 75 uint8_t *ib_mapped; 76 unsigned used_ib_space; 77 78 /* The maximum seen size from cs_check_space. If the driver does 79 * cs_check_space and flush, the newly allocated IB should have at least 80 * this size. 81 */ 82 unsigned max_check_space_size; 83 84 unsigned max_ib_size; 85 uint32_t *ptr_ib_size; 86 bool ptr_ib_size_inside_ib; 87 enum ib_type ib_type; 88}; 89 90struct amdgpu_fence_list { 91 struct pipe_fence_handle **list; 92 unsigned num; 93 unsigned max; 94}; 95 96struct amdgpu_cs_context { 97 struct drm_amdgpu_cs_chunk_ib ib[IB_NUM]; 98 uint32_t *ib_main_addr; /* the beginning of IB before chaining */ 99 100 /* Buffers. */ 101 unsigned max_real_buffers; 102 unsigned num_real_buffers; 103 struct amdgpu_cs_buffer *real_buffers; 104 105 unsigned num_slab_buffers; 106 unsigned max_slab_buffers; 107 struct amdgpu_cs_buffer *slab_buffers; 108 109 unsigned num_sparse_buffers; 110 unsigned max_sparse_buffers; 111 struct amdgpu_cs_buffer *sparse_buffers; 112 113 int16_t *buffer_indices_hashlist; 114 115 struct amdgpu_winsys_bo *last_added_bo; 116 unsigned last_added_bo_index; 117 unsigned last_added_bo_usage; 118 uint32_t last_added_bo_priority_usage; 119 120 struct amdgpu_fence_list fence_dependencies; 121 struct amdgpu_fence_list syncobj_dependencies; 122 struct amdgpu_fence_list syncobj_to_signal; 123 124 struct pipe_fence_handle *fence; 125 126 /* the error returned from cs_flush for non-async submissions */ 127 int error_code; 128 129 /* TMZ: will this command be submitted using the TMZ flag */ 130 bool secure; 131}; 132 133#define BUFFER_HASHLIST_SIZE 4096 134 135struct amdgpu_cs { 136 struct amdgpu_ib main; /* must be first because this is inherited */ 137 struct amdgpu_winsys *ws; 138 struct amdgpu_ctx *ctx; 139 enum ring_type ring_type; 140 struct drm_amdgpu_cs_chunk_fence fence_chunk; 141 142 /* We flip between these two CS. While one is being consumed 143 * by the kernel in another thread, the other one is being filled 144 * by the pipe driver. */ 145 struct amdgpu_cs_context csc1; 146 struct amdgpu_cs_context csc2; 147 /* The currently-used CS. */ 148 struct amdgpu_cs_context *csc; 149 /* The CS being currently-owned by the other thread. */ 150 struct amdgpu_cs_context *cst; 151 /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo 152 * isn't part of any buffer lists or the index where the bo could be found. 153 * Since 1) hash collisions of 2 different bo can happen and 2) we use a 154 * single hashlist for the 3 buffer list, this is only a hint. 155 * amdgpu_lookup_buffer uses this hint to speed up buffers look up. 156 */ 157 int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE]; 158 159 /* Flush CS. */ 160 void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); 161 void *flush_data; 162 bool stop_exec_on_failure; 163 bool noop; 164 bool has_chaining; 165 166 struct util_queue_fence flush_completed; 167 struct pipe_fence_handle *next_fence; 168 struct pb_buffer *preamble_ib_bo; 169}; 170 171struct amdgpu_fence { 172 struct pipe_reference reference; 173 /* If ctx == NULL, this fence is syncobj-based. */ 174 uint32_t syncobj; 175 176 struct amdgpu_winsys *ws; 177 struct amdgpu_ctx *ctx; /* submission context */ 178 struct amdgpu_cs_fence fence; 179 uint64_t *user_fence_cpu_address; 180 181 /* If the fence has been submitted. This is unsignalled for deferred fences 182 * (cs->next_fence) and while an IB is still being submitted in the submit 183 * thread. */ 184 struct util_queue_fence submitted; 185 186 volatile int signalled; /* bool (int for atomicity) */ 187}; 188 189static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence) 190{ 191 return fence->ctx == NULL; 192} 193 194static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx) 195{ 196 if (p_atomic_dec_zero(&ctx->refcount)) { 197 amdgpu_cs_ctx_free(ctx->ctx); 198 amdgpu_bo_free(ctx->user_fence_bo); 199 FREE(ctx); 200 } 201} 202 203static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, 204 struct pipe_fence_handle *src) 205{ 206 struct amdgpu_fence **adst = (struct amdgpu_fence **)dst; 207 struct amdgpu_fence *asrc = (struct amdgpu_fence *)src; 208 209 if (pipe_reference(&(*adst)->reference, &asrc->reference)) { 210 struct amdgpu_fence *fence = *adst; 211 212 if (amdgpu_fence_is_syncobj(fence)) 213 amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj); 214 else 215 amdgpu_ctx_unref(fence->ctx); 216 217 util_queue_fence_destroy(&fence->submitted); 218 FREE(fence); 219 } 220 *adst = asrc; 221} 222 223int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo); 224 225static inline struct amdgpu_cs * 226amdgpu_cs(struct radeon_cmdbuf *rcs) 227{ 228 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv; 229 assert(!cs || cs->main.ib_type == IB_MAIN); 230 return cs; 231} 232 233#define get_container(member_ptr, container_type, container_member) \ 234 (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member)) 235 236static inline bool 237amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs, 238 struct amdgpu_winsys_bo *bo) 239{ 240 return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1; 241} 242 243static inline bool 244amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs, 245 struct amdgpu_winsys_bo *bo, 246 enum radeon_bo_usage usage) 247{ 248 int index; 249 struct amdgpu_cs_buffer *buffer; 250 251 index = amdgpu_lookup_buffer_any_type(cs->csc, bo); 252 if (index == -1) 253 return false; 254 255 buffer = bo->bo ? &cs->csc->real_buffers[index] : 256 bo->base.usage & RADEON_FLAG_SPARSE ? &cs->csc->sparse_buffers[index] : 257 &cs->csc->slab_buffers[index]; 258 259 return (buffer->usage & usage) != 0; 260} 261 262bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, 263 bool absolute); 264void amdgpu_add_fences(struct amdgpu_winsys_bo *bo, 265 unsigned num_fences, 266 struct pipe_fence_handle **fences); 267void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs); 268void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws); 269 270#endif 271