17e102996Smaya/*
27e102996Smaya * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
37e102996Smaya *
47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a
57e102996Smaya * copy of this software and associated documentation files (the "Software"),
67e102996Smaya * to deal in the Software without restriction, including without limitation
77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the
97e102996Smaya * Software is furnished to do so, subject to the following conditions:
107e102996Smaya *
117e102996Smaya * The above copyright notice and this permission notice (including the next
127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the
137e102996Smaya * Software.
147e102996Smaya *
157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217e102996Smaya * SOFTWARE.
227e102996Smaya *
237e102996Smaya * Authors:
247e102996Smaya *    Rob Clark <robclark@freedesktop.org>
257e102996Smaya */
267e102996Smaya
277e102996Smaya#ifndef FREEDRENO_RINGBUFFER_H_
287e102996Smaya#define FREEDRENO_RINGBUFFER_H_
297e102996Smaya
307ec681f3Smrg#include <stdio.h>
317ec681f3Smrg#include "util/u_atomic.h"
327e102996Smaya#include "util/u_debug.h"
337ec681f3Smrg#include "util/u_queue.h"
347e102996Smaya
357ec681f3Smrg#include "adreno_common.xml.h"
367ec681f3Smrg#include "adreno_pm4.xml.h"
377e102996Smaya#include "freedreno_drmif.h"
387ec681f3Smrg#include "freedreno_pm4.h"
397ec681f3Smrg
407ec681f3Smrg#ifdef __cplusplus
417ec681f3Smrgextern "C" {
427ec681f3Smrg#endif
437e102996Smaya
447e102996Smayastruct fd_submit;
457e102996Smayastruct fd_ringbuffer;
467e102996Smaya
477e102996Smayaenum fd_ringbuffer_flags {
487e102996Smaya
497ec681f3Smrg   /* Primary ringbuffer for a submit, ie. an IB1 level rb
507ec681f3Smrg    * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
517ec681f3Smrg    * packets.
527ec681f3Smrg    */
537ec681f3Smrg   FD_RINGBUFFER_PRIMARY = 0x1,
547ec681f3Smrg
557ec681f3Smrg   /* Hint that the stateobj will be used for streaming state
567ec681f3Smrg    * that is used once or a few times and then discarded.
577ec681f3Smrg    *
587ec681f3Smrg    * For sub-allocation, non streaming stateobj's should be
597ec681f3Smrg    * sub-allocated from a page size buffer, so one long lived
607ec681f3Smrg    * state obj doesn't prevent other pages from being freed.
617ec681f3Smrg    * (Ie. it would be no worse than allocating a page sized
627ec681f3Smrg    * bo for each small non-streaming stateobj).
637ec681f3Smrg    *
647ec681f3Smrg    * But streaming stateobj's could be sub-allocated from a
657ec681f3Smrg    * larger buffer to reduce the alloc/del overhead.
667ec681f3Smrg    */
677ec681f3Smrg   FD_RINGBUFFER_STREAMING = 0x2,
687ec681f3Smrg
697ec681f3Smrg   /* Indicates that "growable" cmdstream can be used,
707ec681f3Smrg    * consisting of multiple physical cmdstream buffers
717ec681f3Smrg    */
727ec681f3Smrg   FD_RINGBUFFER_GROWABLE = 0x4,
737ec681f3Smrg
747ec681f3Smrg   /* Internal use only: */
757ec681f3Smrg   _FD_RINGBUFFER_OBJECT = 0x8,
767e102996Smaya};
777e102996Smaya
787e102996Smaya/* A submit object manages/tracks all the state buildup for a "submit"
797e102996Smaya * ioctl to the kernel.  Additionally, with the exception of long-lived
807e102996Smaya * non-STREAMING stateobj rb's, rb's are allocated from the submit.
817e102996Smaya */
827ec681f3Smrgstruct fd_submit *fd_submit_new(struct fd_pipe *pipe);
837e102996Smaya
847e102996Smaya/* NOTE: all ringbuffer's create from the submit should be unref'd
857e102996Smaya * before destroying the submit.
867e102996Smaya */
877e102996Smayavoid fd_submit_del(struct fd_submit *submit);
887e102996Smaya
897ec681f3Smrgstruct fd_submit * fd_submit_ref(struct fd_submit *submit);
907ec681f3Smrg
917e102996Smaya/* Allocate a new rb from the submit. */
927ec681f3Smrgstruct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
937ec681f3Smrg                                               uint32_t size,
947ec681f3Smrg                                               enum fd_ringbuffer_flags flags);
957ec681f3Smrg
967ec681f3Smrg/**
977ec681f3Smrg * Encapsulates submit out-fence(s), which consist of a 'timestamp' (per-
987ec681f3Smrg * pipe (submitqueue) sequence number) and optionally, if requested, an
997ec681f3Smrg * out-fence-fd
1007ec681f3Smrg */
1017ec681f3Smrgstruct fd_submit_fence {
1027ec681f3Smrg   /**
1037ec681f3Smrg    * The ready fence is signaled once the submit is actually flushed down
1047ec681f3Smrg    * to the kernel, and fence/fence_fd are populated.  You must wait for
1057ec681f3Smrg    * this fence to be signaled before reading fence/fence_fd.
1067ec681f3Smrg    */
1077ec681f3Smrg   struct util_queue_fence ready;
1087ec681f3Smrg
1097ec681f3Smrg   struct fd_fence fence;
1107ec681f3Smrg
1117ec681f3Smrg   /**
1127ec681f3Smrg    * Optional dma_fence fd, returned by submit if use_fence_fd is true
1137ec681f3Smrg    */
1147ec681f3Smrg   int fence_fd;
1157ec681f3Smrg   bool use_fence_fd;
1167ec681f3Smrg};
1177e102996Smaya
1187e102996Smaya/* in_fence_fd: -1 for no in-fence, else fence fd
1197ec681f3Smrg * out_fence can be NULL if no output fence is required
1207e102996Smaya */
1217ec681f3Smrgint fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
1227ec681f3Smrg                    struct fd_submit_fence *out_fence);
1237e102996Smaya
1247ec681f3Smrgstruct fd_ringbuffer;
1257ec681f3Smrgstruct fd_reloc;
1267ec681f3Smrg
1277ec681f3Smrgstruct fd_ringbuffer_funcs {
1287ec681f3Smrg   void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
1297ec681f3Smrg   void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
1307ec681f3Smrg   uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
1317ec681f3Smrg                               struct fd_ringbuffer *target, uint32_t cmd_idx);
1327ec681f3Smrg   uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
1337ec681f3Smrg   bool (*check_size)(struct fd_ringbuffer *ring);
1347ec681f3Smrg   void (*destroy)(struct fd_ringbuffer *ring);
1357ec681f3Smrg};
1367e102996Smaya
1377e102996Smaya/* the ringbuffer object is not opaque so that OUT_RING() type stuff
1387e102996Smaya * can be inlined.  Note that users should not make assumptions about
1397e102996Smaya * the size of this struct.
1407e102996Smaya */
1417e102996Smayastruct fd_ringbuffer {
1427ec681f3Smrg   uint32_t *cur, *end, *start;
1437ec681f3Smrg   const struct fd_ringbuffer_funcs *funcs;
1447e102996Smaya
1457ec681f3Smrg   // size or end coudl probably go away
1467ec681f3Smrg   int size;
1477ec681f3Smrg   int32_t refcnt;
1487ec681f3Smrg   enum fd_ringbuffer_flags flags;
1497e102996Smaya};
1507e102996Smaya
1517e102996Smaya/* Allocate a new long-lived state object, not associated with
1527e102996Smaya * a submit:
1537e102996Smaya */
1547ec681f3Smrgstruct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
1557ec681f3Smrg                                               uint32_t size);
1567ec681f3Smrg
1577ec681f3Smrgstatic inline void
1587ec681f3Smrgfd_ringbuffer_del(struct fd_ringbuffer *ring)
1597ec681f3Smrg{
1607ec681f3Smrg   if (!p_atomic_dec_zero(&ring->refcnt))
1617ec681f3Smrg      return;
1627ec681f3Smrg
1637ec681f3Smrg   ring->funcs->destroy(ring);
1647ec681f3Smrg}
1657ec681f3Smrg
1667ec681f3Smrgstatic inline struct fd_ringbuffer *
1677ec681f3Smrgfd_ringbuffer_ref(struct fd_ringbuffer *ring)
1687ec681f3Smrg{
1697ec681f3Smrg   p_atomic_inc(&ring->refcnt);
1707ec681f3Smrg   return ring;
1717ec681f3Smrg}
1727ec681f3Smrg
1737ec681f3Smrgstatic inline void
1747ec681f3Smrgfd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
1757ec681f3Smrg{
1767ec681f3Smrg   assert(ring->funcs->grow); /* unsupported on kgsl */
1777ec681f3Smrg
1787ec681f3Smrg   /* there is an upper bound on IB size, which appears to be 0x0fffff */
1797ec681f3Smrg   ring->size = MIN2(ring->size << 1, 0x0fffff);
1807e102996Smaya
1817ec681f3Smrg   ring->funcs->grow(ring, ring->size);
1827ec681f3Smrg}
1837e102996Smaya
1847ec681f3Smrgstatic inline bool
1857ec681f3Smrgfd_ringbuffer_check_size(struct fd_ringbuffer *ring)
1867ec681f3Smrg{
1877ec681f3Smrg   return ring->funcs->check_size(ring);
1887ec681f3Smrg}
1897e102996Smaya
1907ec681f3Smrgstatic inline void
1917ec681f3Smrgfd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
1927e102996Smaya{
1937ec681f3Smrg   (*ring->cur++) = data;
1947e102996Smaya}
1957e102996Smaya
1967e102996Smayastruct fd_reloc {
1977ec681f3Smrg   struct fd_bo *bo;
1987ec681f3Smrg   uint64_t iova;
1997ec681f3Smrg#define FD_RELOC_READ  0x0001
2007ec681f3Smrg#define FD_RELOC_WRITE 0x0002
2017ec681f3Smrg#define FD_RELOC_DUMP  0x0004
2027ec681f3Smrg   uint32_t offset;
2037ec681f3Smrg   uint32_t orlo;
2047ec681f3Smrg   int32_t shift;
2057ec681f3Smrg   uint32_t orhi; /* used for a5xx+ */
2067e102996Smaya};
2077e102996Smaya
2087ec681f3Smrg/* We always mark BOs for write, instead of tracking it across reloc
2097ec681f3Smrg * sources in userspace.  On the kernel side, this means we track a single
2107ec681f3Smrg * excl fence in the BO instead of a set of read fences, which is cheaper.
2117ec681f3Smrg * The downside is that a dmabuf-shared device won't be able to read in
2127ec681f3Smrg * parallel with a read-only access by freedreno, but most other drivers
2137ec681f3Smrg * have decided that that usecase isn't important enough to do this
2147ec681f3Smrg * tracking, as well.
2157ec681f3Smrg */
2167ec681f3Smrg#define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
2177ec681f3Smrg
2187e102996Smaya/* NOTE: relocs are 2 dwords on a5xx+ */
2197e102996Smaya
2207ec681f3Smrgstatic inline void
2217ec681f3Smrgfd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
2227ec681f3Smrg{
2237ec681f3Smrg   ring->funcs->emit_reloc(ring, reloc);
2247ec681f3Smrg}
2257ec681f3Smrg
2267ec681f3Smrgstatic inline uint32_t
2277ec681f3Smrgfd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
2287ec681f3Smrg{
2297ec681f3Smrg   if (!ring->funcs->cmd_count)
2307ec681f3Smrg      return 1;
2317ec681f3Smrg   return ring->funcs->cmd_count(ring);
2327ec681f3Smrg}
2337ec681f3Smrg
2347ec681f3Smrgstatic inline uint32_t
2357ec681f3Smrgfd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
2367ec681f3Smrg                                   struct fd_ringbuffer *target,
2377ec681f3Smrg                                   uint32_t cmd_idx)
2387ec681f3Smrg{
2397ec681f3Smrg   return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
2407ec681f3Smrg}
2417e102996Smaya
2427e102996Smayastatic inline uint32_t
2437e102996Smayaoffset_bytes(void *end, void *start)
2447e102996Smaya{
2457ec681f3Smrg   return ((char *)end) - ((char *)start);
2467e102996Smaya}
2477e102996Smaya
2487e102996Smayastatic inline uint32_t
2497e102996Smayafd_ringbuffer_size(struct fd_ringbuffer *ring)
2507e102996Smaya{
2517ec681f3Smrg   /* only really needed for stateobj ringbuffers, and won't really
2527ec681f3Smrg    * do what you expect for growable rb's.. so lets just restrict
2537ec681f3Smrg    * this to stateobj's for now:
2547ec681f3Smrg    */
2557ec681f3Smrg   debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
2567ec681f3Smrg   return offset_bytes(ring->cur, ring->start);
2577ec681f3Smrg}
2587ec681f3Smrg
2597ec681f3Smrgstatic inline bool
2607ec681f3Smrgfd_ringbuffer_empty(struct fd_ringbuffer *ring)
2617ec681f3Smrg{
2627ec681f3Smrg   return (fd_ringbuffer_cmd_count(ring) == 1) &&
2637ec681f3Smrg          (offset_bytes(ring->cur, ring->start) == 0);
2647ec681f3Smrg}
2657ec681f3Smrg
2667ec681f3Smrg#define LOG_DWORDS 0
2677ec681f3Smrg
2687ec681f3Smrgstatic inline void
2697ec681f3SmrgOUT_RING(struct fd_ringbuffer *ring, uint32_t data)
2707ec681f3Smrg{
2717ec681f3Smrg   if (LOG_DWORDS) {
2727ec681f3Smrg      fprintf(stderr, "ring[%p]: OUT_RING   %04x:  %08x", ring,
2737ec681f3Smrg              (uint32_t)(ring->cur - ring->start), data);
2747ec681f3Smrg   }
2757ec681f3Smrg   fd_ringbuffer_emit(ring, data);
2767ec681f3Smrg}
2777ec681f3Smrg
2787ec681f3Smrg/*
2797ec681f3Smrg * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
2807ec681f3Smrg */
2817ec681f3Smrg#ifndef __cplusplus
2827ec681f3Smrgstatic inline void
2837ec681f3SmrgOUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
2847ec681f3Smrg          uint64_t or, int32_t shift)
2857ec681f3Smrg{
2867ec681f3Smrg   if (LOG_DWORDS) {
2877ec681f3Smrg      fprintf(stderr, "ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
2887ec681f3Smrg              (uint32_t)(ring->cur - ring->start), bo, offset, shift);
2897ec681f3Smrg   }
2907ec681f3Smrg   debug_assert(offset < fd_bo_size(bo));
2917ec681f3Smrg
2927ec681f3Smrg   uint64_t iova = fd_bo_get_iova(bo) + offset;
2937ec681f3Smrg
2947ec681f3Smrg   if (shift < 0)
2957ec681f3Smrg      iova >>= -shift;
2967ec681f3Smrg   else
2977ec681f3Smrg      iova <<= shift;
2987ec681f3Smrg
2997ec681f3Smrg   iova |= or ;
3007ec681f3Smrg
3017ec681f3Smrg   fd_ringbuffer_reloc(ring, &(struct fd_reloc){
3027ec681f3Smrg                                .bo = bo,
3037ec681f3Smrg                                .iova = iova,
3047ec681f3Smrg                                .offset = offset,
3057ec681f3Smrg                                .orlo = or
3067ec681f3Smrg                                ,
3077ec681f3Smrg                                .shift = shift,
3087ec681f3Smrg                                .orhi = or >> 32,
3097ec681f3Smrg                             });
3107ec681f3Smrg}
3117ec681f3Smrg#endif
3127ec681f3Smrg
3137ec681f3Smrgstatic inline void
3147ec681f3SmrgOUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
3157ec681f3Smrg{
3167ec681f3Smrg   fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
3177ec681f3Smrg}
3187ec681f3Smrg
3197ec681f3Smrgstatic inline void
3207ec681f3SmrgBEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
3217ec681f3Smrg{
3227ec681f3Smrg   if (unlikely(ring->cur + ndwords > ring->end))
3237ec681f3Smrg      fd_ringbuffer_grow(ring, ndwords);
3247ec681f3Smrg}
3257ec681f3Smrg
3267ec681f3Smrgstatic inline void
3277ec681f3SmrgOUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
3287ec681f3Smrg{
3297ec681f3Smrg   BEGIN_RING(ring, cnt + 1);
3307ec681f3Smrg   OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
3317ec681f3Smrg}
3327ec681f3Smrg
3337ec681f3Smrgstatic inline void
3347ec681f3SmrgOUT_PKT2(struct fd_ringbuffer *ring)
3357ec681f3Smrg{
3367ec681f3Smrg   BEGIN_RING(ring, 1);
3377ec681f3Smrg   OUT_RING(ring, CP_TYPE2_PKT);
3387ec681f3Smrg}
3397ec681f3Smrg
3407ec681f3Smrgstatic inline void
3417ec681f3SmrgOUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
3427ec681f3Smrg{
3437ec681f3Smrg   BEGIN_RING(ring, cnt + 1);
3447ec681f3Smrg   OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
3457ec681f3Smrg}
3467ec681f3Smrg
3477ec681f3Smrg/*
3487ec681f3Smrg * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
3497ec681f3Smrg */
3507ec681f3Smrg
3517ec681f3Smrgstatic inline void
3527ec681f3SmrgOUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
3537ec681f3Smrg{
3547ec681f3Smrg   BEGIN_RING(ring, cnt + 1);
3557ec681f3Smrg   OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt));
3567ec681f3Smrg}
3577ec681f3Smrg
3587ec681f3Smrgstatic inline void
3597ec681f3SmrgOUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
3607ec681f3Smrg{
3617ec681f3Smrg   BEGIN_RING(ring, cnt + 1);
3627ec681f3Smrg   OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt));
3637ec681f3Smrg}
3647ec681f3Smrg
3657ec681f3Smrgstatic inline void
3667ec681f3SmrgOUT_WFI(struct fd_ringbuffer *ring)
3677ec681f3Smrg{
3687ec681f3Smrg   OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
3697ec681f3Smrg   OUT_RING(ring, 0x00000000);
3707ec681f3Smrg}
3717ec681f3Smrg
3727ec681f3Smrgstatic inline void
3737ec681f3SmrgOUT_WFI5(struct fd_ringbuffer *ring)
3747ec681f3Smrg{
3757ec681f3Smrg   OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
3767e102996Smaya}
3777e102996Smaya
3787ec681f3Smrg#ifdef __cplusplus
3797ec681f3Smrg} /* end of extern "C" */
3807ec681f3Smrg#endif
3817e102996Smaya
3827e102996Smaya#endif /* FREEDRENO_RINGBUFFER_H_ */
383