1/* 2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include <assert.h> 28#include <inttypes.h> 29 30#include "util/hash_table.h" 31#include "util/slab.h" 32 33#include "drm/freedreno_ringbuffer.h" 34#include "msm_priv.h" 35 36/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead 37 * by avoiding the additional tracking necessary to build cmds/relocs tables 38 * (but still builds a bos table) 39 */ 40 41 42#define INIT_SIZE 0x1000 43 44static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; 45 46 47struct msm_submit_sp { 48 struct fd_submit base; 49 50 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); 51 DECLARE_ARRAY(struct fd_bo *, bos); 52 53 unsigned seqno; 54 55 /* maps fd_bo to idx in bos table: */ 56 struct hash_table *bo_table; 57 58 struct slab_mempool ring_pool; 59 60 struct fd_ringbuffer *primary; 61 62 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing 63 * the same underlying bo).. 64 * 65 * We also rely on previous stateobj having been fully constructed 66 * so we can reclaim extra space at it's end. 67 */ 68 struct fd_ringbuffer *suballoc_ring; 69}; 70FD_DEFINE_CAST(fd_submit, msm_submit_sp); 71 72/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers 73 * and sizes. Ie. a finalized buffer can have no more commands appended to 74 * it. 75 */ 76struct msm_cmd_sp { 77 struct fd_bo *ring_bo; 78 unsigned size; 79}; 80 81/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to 82 * later copy into the submit when the stateobj rb is later referenced by 83 * a regular rb: 84 */ 85struct msm_reloc_bo_sp { 86 struct fd_bo *bo; 87 unsigned flags; 88}; 89 90struct msm_ringbuffer_sp { 91 struct fd_ringbuffer base; 92 93 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ 94 unsigned offset; 95 96// TODO check disasm.. hopefully compilers CSE can realize that 97// reloc_bos and cmds are at the same offsets and optimize some 98// divergent cases into single case 99 union { 100 /* for _FD_RINGBUFFER_OBJECT case: */ 101 struct { 102 struct fd_pipe *pipe; 103 DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); 104 }; 105 /* for other cases: */ 106 struct { 107 struct fd_submit *submit; 108 DECLARE_ARRAY(struct msm_cmd_sp, cmds); 109 }; 110 } u; 111 112 struct fd_bo *ring_bo; 113}; 114FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp); 115 116static void finalize_current_cmd(struct fd_ringbuffer *ring); 117static struct fd_ringbuffer * msm_ringbuffer_sp_init( 118 struct msm_ringbuffer_sp *msm_ring, 119 uint32_t size, enum fd_ringbuffer_flags flags); 120 121/* add (if needed) bo to submit and return index: */ 122static uint32_t 123append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) 124{ 125 struct msm_bo *msm_bo = to_msm_bo(bo); 126 uint32_t idx; 127 pthread_mutex_lock(&idx_lock); 128 if (likely(msm_bo->current_submit_seqno == submit->seqno)) { 129 idx = msm_bo->idx; 130 } else { 131 uint32_t hash = _mesa_hash_pointer(bo); 132 struct hash_entry *entry; 133 134 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); 135 if (entry) { 136 /* found */ 137 idx = (uint32_t)(uintptr_t)entry->data; 138 } else { 139 idx = APPEND(submit, submit_bos); 140 idx = APPEND(submit, bos); 141 142 submit->submit_bos[idx].flags = 0; 143 submit->submit_bos[idx].handle = bo->handle; 144 submit->submit_bos[idx].presumed = 0; 145 146 submit->bos[idx] = fd_bo_ref(bo); 147 148 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, 149 (void *)(uintptr_t)idx); 150 } 151 msm_bo->current_submit_seqno = submit->seqno; 152 msm_bo->idx = idx; 153 } 154 pthread_mutex_unlock(&idx_lock); 155 if (flags & FD_RELOC_READ) 156 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; 157 if (flags & FD_RELOC_WRITE) 158 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; 159 if (flags & FD_RELOC_DUMP) 160 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP; 161 return idx; 162} 163 164static void 165msm_submit_suballoc_ring_bo(struct fd_submit *submit, 166 struct msm_ringbuffer_sp *msm_ring, uint32_t size) 167{ 168 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); 169 unsigned suballoc_offset = 0; 170 struct fd_bo *suballoc_bo = NULL; 171 172 if (msm_submit->suballoc_ring) { 173 struct msm_ringbuffer_sp *suballoc_ring = 174 to_msm_ringbuffer_sp(msm_submit->suballoc_ring); 175 176 suballoc_bo = suballoc_ring->ring_bo; 177 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + 178 suballoc_ring->offset; 179 180 suballoc_offset = align(suballoc_offset, 0x10); 181 182 if ((size + suballoc_offset) > suballoc_bo->size) { 183 suballoc_bo = NULL; 184 } 185 } 186 187 if (!suballoc_bo) { 188 // TODO possibly larger size for streaming bo? 189 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 190 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY); 191 msm_ring->offset = 0; 192 } else { 193 msm_ring->ring_bo = fd_bo_ref(suballoc_bo); 194 msm_ring->offset = suballoc_offset; 195 } 196 197 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; 198 199 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); 200 201 if (old_suballoc_ring) 202 fd_ringbuffer_del(old_suballoc_ring); 203} 204 205static struct fd_ringbuffer * 206msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, 207 enum fd_ringbuffer_flags flags) 208{ 209 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); 210 struct msm_ringbuffer_sp *msm_ring; 211 212 msm_ring = slab_alloc_st(&msm_submit->ring_pool); 213 214 msm_ring->u.submit = submit; 215 216 /* NOTE: needs to be before _suballoc_ring_bo() since it could 217 * increment the refcnt of the current ring 218 */ 219 msm_ring->base.refcnt = 1; 220 221 if (flags & FD_RINGBUFFER_STREAMING) { 222 msm_submit_suballoc_ring_bo(submit, msm_ring, size); 223 } else { 224 if (flags & FD_RINGBUFFER_GROWABLE) 225 size = INIT_SIZE; 226 227 msm_ring->offset = 0; 228 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 229 DRM_FREEDRENO_GEM_GPUREADONLY); 230 } 231 232 if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) 233 return NULL; 234 235 if (flags & FD_RINGBUFFER_PRIMARY) { 236 debug_assert(!msm_submit->primary); 237 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); 238 } 239 240 return &msm_ring->base; 241} 242 243static int 244msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, 245 int *out_fence_fd, uint32_t *out_fence) 246{ 247 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); 248 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); 249 struct drm_msm_gem_submit req = { 250 .flags = msm_pipe->pipe, 251 .queueid = msm_pipe->queue_id, 252 }; 253 int ret; 254 255 debug_assert(msm_submit->primary); 256 finalize_current_cmd(msm_submit->primary); 257 258 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary); 259 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds]; 260 261 for (unsigned i = 0; i < primary->u.nr_cmds; i++) { 262 cmds[i].type = MSM_SUBMIT_CMD_BUF; 263 cmds[i].submit_idx = append_bo(msm_submit, 264 primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP); 265 cmds[i].submit_offset = primary->offset; 266 cmds[i].size = primary->u.cmds[i].size; 267 cmds[i].pad = 0; 268 cmds[i].nr_relocs = 0; 269 } 270 271 if (in_fence_fd != -1) { 272 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; 273 req.fence_fd = in_fence_fd; 274 } 275 276 if (out_fence_fd) { 277 req.flags |= MSM_SUBMIT_FENCE_FD_OUT; 278 } 279 280 /* needs to be after get_cmd() as that could create bos/cmds table: */ 281 req.bos = VOID2U64(msm_submit->submit_bos), 282 req.nr_bos = msm_submit->nr_submit_bos; 283 req.cmds = VOID2U64(cmds), 284 req.nr_cmds = primary->u.nr_cmds; 285 286 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); 287 288 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, 289 &req, sizeof(req)); 290 if (ret) { 291 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); 292 msm_dump_submit(&req); 293 } else if (!ret) { 294 if (out_fence) 295 *out_fence = req.fence; 296 297 if (out_fence_fd) 298 *out_fence_fd = req.fence_fd; 299 } 300 301 return ret; 302} 303 304static void 305msm_submit_sp_destroy(struct fd_submit *submit) 306{ 307 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); 308 309 if (msm_submit->primary) 310 fd_ringbuffer_del(msm_submit->primary); 311 if (msm_submit->suballoc_ring) 312 fd_ringbuffer_del(msm_submit->suballoc_ring); 313 314 _mesa_hash_table_destroy(msm_submit->bo_table, NULL); 315 316 // TODO it would be nice to have a way to debug_assert() if all 317 // rb's haven't been free'd back to the slab, because that is 318 // an indication that we are leaking bo's 319 slab_destroy(&msm_submit->ring_pool); 320 321 for (unsigned i = 0; i < msm_submit->nr_bos; i++) 322 fd_bo_del(msm_submit->bos[i]); 323 324 free(msm_submit->submit_bos); 325 free(msm_submit->bos); 326 free(msm_submit); 327} 328 329static const struct fd_submit_funcs submit_funcs = { 330 .new_ringbuffer = msm_submit_sp_new_ringbuffer, 331 .flush = msm_submit_sp_flush, 332 .destroy = msm_submit_sp_destroy, 333}; 334 335struct fd_submit * 336msm_submit_sp_new(struct fd_pipe *pipe) 337{ 338 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); 339 struct fd_submit *submit; 340 static unsigned submit_cnt = 0; 341 342 msm_submit->seqno = ++submit_cnt; 343 msm_submit->bo_table = _mesa_hash_table_create(NULL, 344 _mesa_hash_pointer, _mesa_key_pointer_equal); 345 // TODO tune size: 346 slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16); 347 348 submit = &msm_submit->base; 349 submit->pipe = pipe; 350 submit->funcs = &submit_funcs; 351 352 return submit; 353} 354 355 356static void 357finalize_current_cmd(struct fd_ringbuffer *ring) 358{ 359 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); 360 361 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); 362 unsigned idx = APPEND(&msm_ring->u, cmds); 363 364 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo); 365 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start); 366} 367 368static void 369msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) 370{ 371 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); 372 struct fd_pipe *pipe = msm_ring->u.submit->pipe; 373 374 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); 375 376 finalize_current_cmd(ring); 377 378 fd_bo_del(msm_ring->ring_bo); 379 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 380 DRM_FREEDRENO_GEM_GPUREADONLY); 381 382 ring->start = fd_bo_map(msm_ring->ring_bo); 383 ring->end = &(ring->start[size/4]); 384 ring->cur = ring->start; 385 ring->size = size; 386} 387 388static void 389msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, 390 const struct fd_reloc *reloc) 391{ 392 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); 393 struct fd_pipe *pipe; 394 395 if (ring->flags & _FD_RINGBUFFER_OBJECT) { 396 unsigned idx = APPEND(&msm_ring->u, reloc_bos); 397 398 msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); 399 msm_ring->u.reloc_bos[idx].flags = reloc->flags; 400 401 pipe = msm_ring->u.pipe; 402 } else { 403 struct msm_submit_sp *msm_submit = 404 to_msm_submit_sp(msm_ring->u.submit); 405 406 append_bo(msm_submit, reloc->bo, reloc->flags); 407 408 pipe = msm_ring->u.submit->pipe; 409 } 410 411 uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset; 412 uint32_t dword = iova; 413 int shift = reloc->shift; 414 415 if (shift < 0) 416 dword >>= -shift; 417 else 418 dword <<= shift; 419 420 (*ring->cur++) = dword | reloc->or; 421 422 if (pipe->gpu_id >= 500) { 423 dword = iova >> 32; 424 shift -= 32; 425 426 if (shift < 0) 427 dword >>= -shift; 428 else 429 dword <<= shift; 430 431 (*ring->cur++) = dword | reloc->orhi; 432 } 433} 434 435static uint32_t 436msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, 437 struct fd_ringbuffer *target, uint32_t cmd_idx) 438{ 439 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); 440 struct fd_bo *bo; 441 uint32_t size; 442 443 if ((target->flags & FD_RINGBUFFER_GROWABLE) && 444 (cmd_idx < msm_target->u.nr_cmds)) { 445 bo = msm_target->u.cmds[cmd_idx].ring_bo; 446 size = msm_target->u.cmds[cmd_idx].size; 447 } else { 448 bo = msm_target->ring_bo; 449 size = offset_bytes(target->cur, target->start); 450 } 451 452 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){ 453 .bo = bo, 454 .flags = FD_RELOC_READ | FD_RELOC_DUMP, 455 .offset = msm_target->offset, 456 }); 457 458 if (!(target->flags & _FD_RINGBUFFER_OBJECT)) 459 return size; 460 461 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); 462 463 if (ring->flags & _FD_RINGBUFFER_OBJECT) { 464 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { 465 unsigned idx = APPEND(&msm_ring->u, reloc_bos); 466 467 msm_ring->u.reloc_bos[idx].bo = 468 fd_bo_ref(msm_target->u.reloc_bos[i].bo); 469 msm_ring->u.reloc_bos[idx].flags = 470 msm_target->u.reloc_bos[i].flags; 471 } 472 } else { 473 // TODO it would be nice to know whether we have already 474 // seen this target before. But hopefully we hit the 475 // append_bo() fast path enough for this to not matter: 476 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); 477 478 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { 479 append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, 480 msm_target->u.reloc_bos[i].flags); 481 } 482 } 483 484 return size; 485} 486 487static uint32_t 488msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) 489{ 490 if (ring->flags & FD_RINGBUFFER_GROWABLE) 491 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; 492 return 1; 493} 494 495static void 496msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) 497{ 498 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); 499 500 fd_bo_del(msm_ring->ring_bo); 501 502 if (ring->flags & _FD_RINGBUFFER_OBJECT) { 503 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { 504 fd_bo_del(msm_ring->u.reloc_bos[i].bo); 505 } 506 507 free(msm_ring); 508 } else { 509 struct fd_submit *submit = msm_ring->u.submit; 510 511 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { 512 fd_bo_del(msm_ring->u.cmds[i].ring_bo); 513 } 514 515 slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring); 516 } 517} 518 519static const struct fd_ringbuffer_funcs ring_funcs = { 520 .grow = msm_ringbuffer_sp_grow, 521 .emit_reloc = msm_ringbuffer_sp_emit_reloc, 522 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, 523 .cmd_count = msm_ringbuffer_sp_cmd_count, 524 .destroy = msm_ringbuffer_sp_destroy, 525}; 526 527static inline struct fd_ringbuffer * 528msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, 529 enum fd_ringbuffer_flags flags) 530{ 531 struct fd_ringbuffer *ring = &msm_ring->base; 532 533 debug_assert(msm_ring->ring_bo); 534 535 uint8_t *base = fd_bo_map(msm_ring->ring_bo); 536 ring->start = (void *)(base + msm_ring->offset); 537 ring->end = &(ring->start[size/4]); 538 ring->cur = ring->start; 539 540 ring->size = size; 541 ring->flags = flags; 542 543 ring->funcs = &ring_funcs; 544 545 // TODO initializing these could probably be conditional on flags 546 // since unneed for FD_RINGBUFFER_STAGING case.. 547 msm_ring->u.cmds = NULL; 548 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; 549 550 msm_ring->u.reloc_bos = NULL; 551 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; 552 553 return ring; 554} 555 556struct fd_ringbuffer * 557msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) 558{ 559 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); 560 561 msm_ring->u.pipe = pipe; 562 msm_ring->offset = 0; 563 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 564 DRM_FREEDRENO_GEM_GPUREADONLY); 565 msm_ring->base.refcnt = 1; 566 567 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); 568} 569