17e102996Smaya/* 27e102996Smaya * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya * 237e102996Smaya * Authors: 247e102996Smaya * Rob Clark <robclark@freedesktop.org> 257e102996Smaya */ 267e102996Smaya 277e102996Smaya#ifndef FREEDRENO_RINGBUFFER_H_ 287e102996Smaya#define FREEDRENO_RINGBUFFER_H_ 297e102996Smaya 307ec681f3Smrg#include <stdio.h> 317ec681f3Smrg#include "util/u_atomic.h" 327e102996Smaya#include "util/u_debug.h" 337ec681f3Smrg#include "util/u_queue.h" 347e102996Smaya 357ec681f3Smrg#include "adreno_common.xml.h" 367ec681f3Smrg#include "adreno_pm4.xml.h" 377e102996Smaya#include "freedreno_drmif.h" 387ec681f3Smrg#include "freedreno_pm4.h" 397ec681f3Smrg 407ec681f3Smrg#ifdef __cplusplus 417ec681f3Smrgextern "C" { 427ec681f3Smrg#endif 437e102996Smaya 447e102996Smayastruct fd_submit; 457e102996Smayastruct fd_ringbuffer; 467e102996Smaya 477e102996Smayaenum fd_ringbuffer_flags { 487e102996Smaya 497ec681f3Smrg /* Primary ringbuffer for a submit, ie. an IB1 level rb 507ec681f3Smrg * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH 517ec681f3Smrg * packets. 527ec681f3Smrg */ 537ec681f3Smrg FD_RINGBUFFER_PRIMARY = 0x1, 547ec681f3Smrg 557ec681f3Smrg /* Hint that the stateobj will be used for streaming state 567ec681f3Smrg * that is used once or a few times and then discarded. 577ec681f3Smrg * 587ec681f3Smrg * For sub-allocation, non streaming stateobj's should be 597ec681f3Smrg * sub-allocated from a page size buffer, so one long lived 607ec681f3Smrg * state obj doesn't prevent other pages from being freed. 617ec681f3Smrg * (Ie. it would be no worse than allocating a page sized 627ec681f3Smrg * bo for each small non-streaming stateobj). 637ec681f3Smrg * 647ec681f3Smrg * But streaming stateobj's could be sub-allocated from a 657ec681f3Smrg * larger buffer to reduce the alloc/del overhead. 667ec681f3Smrg */ 677ec681f3Smrg FD_RINGBUFFER_STREAMING = 0x2, 687ec681f3Smrg 697ec681f3Smrg /* Indicates that "growable" cmdstream can be used, 707ec681f3Smrg * consisting of multiple physical cmdstream buffers 717ec681f3Smrg */ 727ec681f3Smrg FD_RINGBUFFER_GROWABLE = 0x4, 737ec681f3Smrg 747ec681f3Smrg /* Internal use only: */ 757ec681f3Smrg _FD_RINGBUFFER_OBJECT = 0x8, 767e102996Smaya}; 777e102996Smaya 787e102996Smaya/* A submit object manages/tracks all the state buildup for a "submit" 797e102996Smaya * ioctl to the kernel. Additionally, with the exception of long-lived 807e102996Smaya * non-STREAMING stateobj rb's, rb's are allocated from the submit. 817e102996Smaya */ 827ec681f3Smrgstruct fd_submit *fd_submit_new(struct fd_pipe *pipe); 837e102996Smaya 847e102996Smaya/* NOTE: all ringbuffer's create from the submit should be unref'd 857e102996Smaya * before destroying the submit. 867e102996Smaya */ 877e102996Smayavoid fd_submit_del(struct fd_submit *submit); 887e102996Smaya 897ec681f3Smrgstruct fd_submit * fd_submit_ref(struct fd_submit *submit); 907ec681f3Smrg 917e102996Smaya/* Allocate a new rb from the submit. */ 927ec681f3Smrgstruct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit, 937ec681f3Smrg uint32_t size, 947ec681f3Smrg enum fd_ringbuffer_flags flags); 957ec681f3Smrg 967ec681f3Smrg/** 977ec681f3Smrg * Encapsulates submit out-fence(s), which consist of a 'timestamp' (per- 987ec681f3Smrg * pipe (submitqueue) sequence number) and optionally, if requested, an 997ec681f3Smrg * out-fence-fd 1007ec681f3Smrg */ 1017ec681f3Smrgstruct fd_submit_fence { 1027ec681f3Smrg /** 1037ec681f3Smrg * The ready fence is signaled once the submit is actually flushed down 1047ec681f3Smrg * to the kernel, and fence/fence_fd are populated. You must wait for 1057ec681f3Smrg * this fence to be signaled before reading fence/fence_fd. 1067ec681f3Smrg */ 1077ec681f3Smrg struct util_queue_fence ready; 1087ec681f3Smrg 1097ec681f3Smrg struct fd_fence fence; 1107ec681f3Smrg 1117ec681f3Smrg /** 1127ec681f3Smrg * Optional dma_fence fd, returned by submit if use_fence_fd is true 1137ec681f3Smrg */ 1147ec681f3Smrg int fence_fd; 1157ec681f3Smrg bool use_fence_fd; 1167ec681f3Smrg}; 1177e102996Smaya 1187e102996Smaya/* in_fence_fd: -1 for no in-fence, else fence fd 1197ec681f3Smrg * out_fence can be NULL if no output fence is required 1207e102996Smaya */ 1217ec681f3Smrgint fd_submit_flush(struct fd_submit *submit, int in_fence_fd, 1227ec681f3Smrg struct fd_submit_fence *out_fence); 1237e102996Smaya 1247ec681f3Smrgstruct fd_ringbuffer; 1257ec681f3Smrgstruct fd_reloc; 1267ec681f3Smrg 1277ec681f3Smrgstruct fd_ringbuffer_funcs { 1287ec681f3Smrg void (*grow)(struct fd_ringbuffer *ring, uint32_t size); 1297ec681f3Smrg void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); 1307ec681f3Smrg uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring, 1317ec681f3Smrg struct fd_ringbuffer *target, uint32_t cmd_idx); 1327ec681f3Smrg uint32_t (*cmd_count)(struct fd_ringbuffer *ring); 1337ec681f3Smrg bool (*check_size)(struct fd_ringbuffer *ring); 1347ec681f3Smrg void (*destroy)(struct fd_ringbuffer *ring); 1357ec681f3Smrg}; 1367e102996Smaya 1377e102996Smaya/* the ringbuffer object is not opaque so that OUT_RING() type stuff 1387e102996Smaya * can be inlined. Note that users should not make assumptions about 1397e102996Smaya * the size of this struct. 1407e102996Smaya */ 1417e102996Smayastruct fd_ringbuffer { 1427ec681f3Smrg uint32_t *cur, *end, *start; 1437ec681f3Smrg const struct fd_ringbuffer_funcs *funcs; 1447e102996Smaya 1457ec681f3Smrg // size or end coudl probably go away 1467ec681f3Smrg int size; 1477ec681f3Smrg int32_t refcnt; 1487ec681f3Smrg enum fd_ringbuffer_flags flags; 1497e102996Smaya}; 1507e102996Smaya 1517e102996Smaya/* Allocate a new long-lived state object, not associated with 1527e102996Smaya * a submit: 1537e102996Smaya */ 1547ec681f3Smrgstruct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe, 1557ec681f3Smrg uint32_t size); 1567ec681f3Smrg 1577ec681f3Smrgstatic inline void 1587ec681f3Smrgfd_ringbuffer_del(struct fd_ringbuffer *ring) 1597ec681f3Smrg{ 1607ec681f3Smrg if (!p_atomic_dec_zero(&ring->refcnt)) 1617ec681f3Smrg return; 1627ec681f3Smrg 1637ec681f3Smrg ring->funcs->destroy(ring); 1647ec681f3Smrg} 1657ec681f3Smrg 1667ec681f3Smrgstatic inline struct fd_ringbuffer * 1677ec681f3Smrgfd_ringbuffer_ref(struct fd_ringbuffer *ring) 1687ec681f3Smrg{ 1697ec681f3Smrg p_atomic_inc(&ring->refcnt); 1707ec681f3Smrg return ring; 1717ec681f3Smrg} 1727ec681f3Smrg 1737ec681f3Smrgstatic inline void 1747ec681f3Smrgfd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords) 1757ec681f3Smrg{ 1767ec681f3Smrg assert(ring->funcs->grow); /* unsupported on kgsl */ 1777ec681f3Smrg 1787ec681f3Smrg /* there is an upper bound on IB size, which appears to be 0x0fffff */ 1797ec681f3Smrg ring->size = MIN2(ring->size << 1, 0x0fffff); 1807e102996Smaya 1817ec681f3Smrg ring->funcs->grow(ring, ring->size); 1827ec681f3Smrg} 1837e102996Smaya 1847ec681f3Smrgstatic inline bool 1857ec681f3Smrgfd_ringbuffer_check_size(struct fd_ringbuffer *ring) 1867ec681f3Smrg{ 1877ec681f3Smrg return ring->funcs->check_size(ring); 1887ec681f3Smrg} 1897e102996Smaya 1907ec681f3Smrgstatic inline void 1917ec681f3Smrgfd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data) 1927e102996Smaya{ 1937ec681f3Smrg (*ring->cur++) = data; 1947e102996Smaya} 1957e102996Smaya 1967e102996Smayastruct fd_reloc { 1977ec681f3Smrg struct fd_bo *bo; 1987ec681f3Smrg uint64_t iova; 1997ec681f3Smrg#define FD_RELOC_READ 0x0001 2007ec681f3Smrg#define FD_RELOC_WRITE 0x0002 2017ec681f3Smrg#define FD_RELOC_DUMP 0x0004 2027ec681f3Smrg uint32_t offset; 2037ec681f3Smrg uint32_t orlo; 2047ec681f3Smrg int32_t shift; 2057ec681f3Smrg uint32_t orhi; /* used for a5xx+ */ 2067e102996Smaya}; 2077e102996Smaya 2087ec681f3Smrg/* We always mark BOs for write, instead of tracking it across reloc 2097ec681f3Smrg * sources in userspace. On the kernel side, this means we track a single 2107ec681f3Smrg * excl fence in the BO instead of a set of read fences, which is cheaper. 2117ec681f3Smrg * The downside is that a dmabuf-shared device won't be able to read in 2127ec681f3Smrg * parallel with a read-only access by freedreno, but most other drivers 2137ec681f3Smrg * have decided that that usecase isn't important enough to do this 2147ec681f3Smrg * tracking, as well. 2157ec681f3Smrg */ 2167ec681f3Smrg#define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE) 2177ec681f3Smrg 2187e102996Smaya/* NOTE: relocs are 2 dwords on a5xx+ */ 2197e102996Smaya 2207ec681f3Smrgstatic inline void 2217ec681f3Smrgfd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc) 2227ec681f3Smrg{ 2237ec681f3Smrg ring->funcs->emit_reloc(ring, reloc); 2247ec681f3Smrg} 2257ec681f3Smrg 2267ec681f3Smrgstatic inline uint32_t 2277ec681f3Smrgfd_ringbuffer_cmd_count(struct fd_ringbuffer *ring) 2287ec681f3Smrg{ 2297ec681f3Smrg if (!ring->funcs->cmd_count) 2307ec681f3Smrg return 1; 2317ec681f3Smrg return ring->funcs->cmd_count(ring); 2327ec681f3Smrg} 2337ec681f3Smrg 2347ec681f3Smrgstatic inline uint32_t 2357ec681f3Smrgfd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, 2367ec681f3Smrg struct fd_ringbuffer *target, 2377ec681f3Smrg uint32_t cmd_idx) 2387ec681f3Smrg{ 2397ec681f3Smrg return ring->funcs->emit_reloc_ring(ring, target, cmd_idx); 2407ec681f3Smrg} 2417e102996Smaya 2427e102996Smayastatic inline uint32_t 2437e102996Smayaoffset_bytes(void *end, void *start) 2447e102996Smaya{ 2457ec681f3Smrg return ((char *)end) - ((char *)start); 2467e102996Smaya} 2477e102996Smaya 2487e102996Smayastatic inline uint32_t 2497e102996Smayafd_ringbuffer_size(struct fd_ringbuffer *ring) 2507e102996Smaya{ 2517ec681f3Smrg /* only really needed for stateobj ringbuffers, and won't really 2527ec681f3Smrg * do what you expect for growable rb's.. so lets just restrict 2537ec681f3Smrg * this to stateobj's for now: 2547ec681f3Smrg */ 2557ec681f3Smrg debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE)); 2567ec681f3Smrg return offset_bytes(ring->cur, ring->start); 2577ec681f3Smrg} 2587ec681f3Smrg 2597ec681f3Smrgstatic inline bool 2607ec681f3Smrgfd_ringbuffer_empty(struct fd_ringbuffer *ring) 2617ec681f3Smrg{ 2627ec681f3Smrg return (fd_ringbuffer_cmd_count(ring) == 1) && 2637ec681f3Smrg (offset_bytes(ring->cur, ring->start) == 0); 2647ec681f3Smrg} 2657ec681f3Smrg 2667ec681f3Smrg#define LOG_DWORDS 0 2677ec681f3Smrg 2687ec681f3Smrgstatic inline void 2697ec681f3SmrgOUT_RING(struct fd_ringbuffer *ring, uint32_t data) 2707ec681f3Smrg{ 2717ec681f3Smrg if (LOG_DWORDS) { 2727ec681f3Smrg fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring, 2737ec681f3Smrg (uint32_t)(ring->cur - ring->start), data); 2747ec681f3Smrg } 2757ec681f3Smrg fd_ringbuffer_emit(ring, data); 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrg/* 2797ec681f3Smrg * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+ 2807ec681f3Smrg */ 2817ec681f3Smrg#ifndef __cplusplus 2827ec681f3Smrgstatic inline void 2837ec681f3SmrgOUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset, 2847ec681f3Smrg uint64_t or, int32_t shift) 2857ec681f3Smrg{ 2867ec681f3Smrg if (LOG_DWORDS) { 2877ec681f3Smrg fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, 2887ec681f3Smrg (uint32_t)(ring->cur - ring->start), bo, offset, shift); 2897ec681f3Smrg } 2907ec681f3Smrg debug_assert(offset < fd_bo_size(bo)); 2917ec681f3Smrg 2927ec681f3Smrg uint64_t iova = fd_bo_get_iova(bo) + offset; 2937ec681f3Smrg 2947ec681f3Smrg if (shift < 0) 2957ec681f3Smrg iova >>= -shift; 2967ec681f3Smrg else 2977ec681f3Smrg iova <<= shift; 2987ec681f3Smrg 2997ec681f3Smrg iova |= or ; 3007ec681f3Smrg 3017ec681f3Smrg fd_ringbuffer_reloc(ring, &(struct fd_reloc){ 3027ec681f3Smrg .bo = bo, 3037ec681f3Smrg .iova = iova, 3047ec681f3Smrg .offset = offset, 3057ec681f3Smrg .orlo = or 3067ec681f3Smrg , 3077ec681f3Smrg .shift = shift, 3087ec681f3Smrg .orhi = or >> 32, 3097ec681f3Smrg }); 3107ec681f3Smrg} 3117ec681f3Smrg#endif 3127ec681f3Smrg 3137ec681f3Smrgstatic inline void 3147ec681f3SmrgOUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 3157ec681f3Smrg{ 3167ec681f3Smrg fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); 3177ec681f3Smrg} 3187ec681f3Smrg 3197ec681f3Smrgstatic inline void 3207ec681f3SmrgBEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) 3217ec681f3Smrg{ 3227ec681f3Smrg if (unlikely(ring->cur + ndwords > ring->end)) 3237ec681f3Smrg fd_ringbuffer_grow(ring, ndwords); 3247ec681f3Smrg} 3257ec681f3Smrg 3267ec681f3Smrgstatic inline void 3277ec681f3SmrgOUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 3287ec681f3Smrg{ 3297ec681f3Smrg BEGIN_RING(ring, cnt + 1); 3307ec681f3Smrg OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt)); 3317ec681f3Smrg} 3327ec681f3Smrg 3337ec681f3Smrgstatic inline void 3347ec681f3SmrgOUT_PKT2(struct fd_ringbuffer *ring) 3357ec681f3Smrg{ 3367ec681f3Smrg BEGIN_RING(ring, 1); 3377ec681f3Smrg OUT_RING(ring, CP_TYPE2_PKT); 3387ec681f3Smrg} 3397ec681f3Smrg 3407ec681f3Smrgstatic inline void 3417ec681f3SmrgOUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 3427ec681f3Smrg{ 3437ec681f3Smrg BEGIN_RING(ring, cnt + 1); 3447ec681f3Smrg OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8)); 3457ec681f3Smrg} 3467ec681f3Smrg 3477ec681f3Smrg/* 3487ec681f3Smrg * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 3497ec681f3Smrg */ 3507ec681f3Smrg 3517ec681f3Smrgstatic inline void 3527ec681f3SmrgOUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) 3537ec681f3Smrg{ 3547ec681f3Smrg BEGIN_RING(ring, cnt + 1); 3557ec681f3Smrg OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt)); 3567ec681f3Smrg} 3577ec681f3Smrg 3587ec681f3Smrgstatic inline void 3597ec681f3SmrgOUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) 3607ec681f3Smrg{ 3617ec681f3Smrg BEGIN_RING(ring, cnt + 1); 3627ec681f3Smrg OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt)); 3637ec681f3Smrg} 3647ec681f3Smrg 3657ec681f3Smrgstatic inline void 3667ec681f3SmrgOUT_WFI(struct fd_ringbuffer *ring) 3677ec681f3Smrg{ 3687ec681f3Smrg OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 3697ec681f3Smrg OUT_RING(ring, 0x00000000); 3707ec681f3Smrg} 3717ec681f3Smrg 3727ec681f3Smrgstatic inline void 3737ec681f3SmrgOUT_WFI5(struct fd_ringbuffer *ring) 3747ec681f3Smrg{ 3757ec681f3Smrg OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); 3767e102996Smaya} 3777e102996Smaya 3787ec681f3Smrg#ifdef __cplusplus 3797ec681f3Smrg} /* end of extern "C" */ 3807ec681f3Smrg#endif 3817e102996Smaya 3827e102996Smaya#endif /* FREEDRENO_RINGBUFFER_H_ */ 383