17ec681f3Smrg/*
27ec681f3Smrg * Copyright 2021 Google LLC
37ec681f3Smrg * SPDX-License-Identifier: MIT
47ec681f3Smrg */
57ec681f3Smrg
67ec681f3Smrg#include "vn_ring.h"
77ec681f3Smrg
87ec681f3Smrg#include "vn_cs.h"
97ec681f3Smrg#include "vn_renderer.h"
107ec681f3Smrg
117ec681f3Smrgenum vn_ring_status_flag {
127ec681f3Smrg   VN_RING_STATUS_IDLE = 1u << 0,
137ec681f3Smrg};
147ec681f3Smrg
157ec681f3Smrgstatic uint32_t
167ec681f3Smrgvn_ring_load_head(const struct vn_ring *ring)
177ec681f3Smrg{
187ec681f3Smrg   /* the renderer is expected to store the head with memory_order_release,
197ec681f3Smrg    * forming a release-acquire ordering
207ec681f3Smrg    */
217ec681f3Smrg   return atomic_load_explicit(ring->shared.head, memory_order_acquire);
227ec681f3Smrg}
237ec681f3Smrg
247ec681f3Smrgstatic void
257ec681f3Smrgvn_ring_store_tail(struct vn_ring *ring)
267ec681f3Smrg{
277ec681f3Smrg   /* the renderer is expected to load the tail with memory_order_acquire,
287ec681f3Smrg    * forming a release-acquire ordering
297ec681f3Smrg    */
307ec681f3Smrg   return atomic_store_explicit(ring->shared.tail, ring->cur,
317ec681f3Smrg                                memory_order_release);
327ec681f3Smrg}
337ec681f3Smrg
347ec681f3Smrgstatic uint32_t
357ec681f3Smrgvn_ring_load_status(const struct vn_ring *ring)
367ec681f3Smrg{
377ec681f3Smrg   /* this must be called and ordered after vn_ring_store_tail */
387ec681f3Smrg   return atomic_load_explicit(ring->shared.status, memory_order_seq_cst);
397ec681f3Smrg}
407ec681f3Smrg
417ec681f3Smrgstatic void
427ec681f3Smrgvn_ring_write_buffer(struct vn_ring *ring, const void *data, uint32_t size)
437ec681f3Smrg{
447ec681f3Smrg   assert(ring->cur + size - vn_ring_load_head(ring) <= ring->buffer_size);
457ec681f3Smrg
467ec681f3Smrg   const uint32_t offset = ring->cur & ring->buffer_mask;
477ec681f3Smrg   if (offset + size <= ring->buffer_size) {
487ec681f3Smrg      memcpy(ring->shared.buffer + offset, data, size);
497ec681f3Smrg   } else {
507ec681f3Smrg      const uint32_t s = ring->buffer_size - offset;
517ec681f3Smrg      memcpy(ring->shared.buffer + offset, data, s);
527ec681f3Smrg      memcpy(ring->shared.buffer, data + s, size - s);
537ec681f3Smrg   }
547ec681f3Smrg
557ec681f3Smrg   ring->cur += size;
567ec681f3Smrg}
577ec681f3Smrg
587ec681f3Smrgstatic bool
597ec681f3Smrgvn_ring_ge_seqno(const struct vn_ring *ring, uint32_t a, uint32_t b)
607ec681f3Smrg{
617ec681f3Smrg   /* this can return false negative when not called fast enough (e.g., when
627ec681f3Smrg    * called once every couple hours), but following calls with larger a's
637ec681f3Smrg    * will correct itself
647ec681f3Smrg    *
657ec681f3Smrg    * TODO use real seqnos?
667ec681f3Smrg    */
677ec681f3Smrg   if (a >= b)
687ec681f3Smrg      return ring->cur >= a || ring->cur < b;
697ec681f3Smrg   else
707ec681f3Smrg      return ring->cur >= a && ring->cur < b;
717ec681f3Smrg}
727ec681f3Smrg
737ec681f3Smrgstatic void
747ec681f3Smrgvn_ring_retire_submits(struct vn_ring *ring, uint32_t seqno)
757ec681f3Smrg{
767ec681f3Smrg   list_for_each_entry_safe(struct vn_ring_submit, submit, &ring->submits,
777ec681f3Smrg                            head) {
787ec681f3Smrg      if (!vn_ring_ge_seqno(ring, seqno, submit->seqno))
797ec681f3Smrg         break;
807ec681f3Smrg
817ec681f3Smrg      for (uint32_t i = 0; i < submit->shmem_count; i++)
827ec681f3Smrg         vn_renderer_shmem_unref(ring->renderer, submit->shmems[i]);
837ec681f3Smrg
847ec681f3Smrg      list_del(&submit->head);
857ec681f3Smrg      list_add(&submit->head, &ring->free_submits);
867ec681f3Smrg   }
877ec681f3Smrg}
887ec681f3Smrg
897ec681f3Smrgstatic uint32_t
907ec681f3Smrgvn_ring_wait_seqno(const struct vn_ring *ring, uint32_t seqno)
917ec681f3Smrg{
927ec681f3Smrg   /* A renderer wait incurs several hops and the renderer might poll
937ec681f3Smrg    * repeatedly anyway.  Let's just poll here.
947ec681f3Smrg    */
957ec681f3Smrg   uint32_t iter = 0;
967ec681f3Smrg   do {
977ec681f3Smrg      const uint32_t head = vn_ring_load_head(ring);
987ec681f3Smrg      if (vn_ring_ge_seqno(ring, head, seqno))
997ec681f3Smrg         return head;
1007ec681f3Smrg      vn_relax(&iter, "ring seqno");
1017ec681f3Smrg   } while (true);
1027ec681f3Smrg}
1037ec681f3Smrg
1047ec681f3Smrgstatic uint32_t
1057ec681f3Smrgvn_ring_wait_space(const struct vn_ring *ring, uint32_t size)
1067ec681f3Smrg{
1077ec681f3Smrg   assert(size <= ring->buffer_size);
1087ec681f3Smrg
1097ec681f3Smrg   /* see the reasoning in vn_ring_wait_seqno */
1107ec681f3Smrg   uint32_t iter = 0;
1117ec681f3Smrg   do {
1127ec681f3Smrg      const uint32_t head = vn_ring_load_head(ring);
1137ec681f3Smrg      if (ring->cur + size - head <= ring->buffer_size)
1147ec681f3Smrg         return head;
1157ec681f3Smrg      vn_relax(&iter, "ring space");
1167ec681f3Smrg   } while (true);
1177ec681f3Smrg}
1187ec681f3Smrg
1197ec681f3Smrgvoid
1207ec681f3Smrgvn_ring_get_layout(size_t buf_size,
1217ec681f3Smrg                   size_t extra_size,
1227ec681f3Smrg                   struct vn_ring_layout *layout)
1237ec681f3Smrg{
1247ec681f3Smrg   /* this can be changed/extended quite freely */
1257ec681f3Smrg   struct layout {
1267ec681f3Smrg      uint32_t head __attribute__((aligned(64)));
1277ec681f3Smrg      uint32_t tail __attribute__((aligned(64)));
1287ec681f3Smrg      uint32_t status __attribute__((aligned(64)));
1297ec681f3Smrg
1307ec681f3Smrg      uint8_t buffer[] __attribute__((aligned(64)));
1317ec681f3Smrg   };
1327ec681f3Smrg
1337ec681f3Smrg   assert(buf_size && util_is_power_of_two_or_zero(buf_size));
1347ec681f3Smrg
1357ec681f3Smrg   layout->head_offset = offsetof(struct layout, head);
1367ec681f3Smrg   layout->tail_offset = offsetof(struct layout, tail);
1377ec681f3Smrg   layout->status_offset = offsetof(struct layout, status);
1387ec681f3Smrg
1397ec681f3Smrg   layout->buffer_offset = offsetof(struct layout, buffer);
1407ec681f3Smrg   layout->buffer_size = buf_size;
1417ec681f3Smrg
1427ec681f3Smrg   layout->extra_offset = layout->buffer_offset + layout->buffer_size;
1437ec681f3Smrg   layout->extra_size = extra_size;
1447ec681f3Smrg
1457ec681f3Smrg   layout->shmem_size = layout->extra_offset + layout->extra_size;
1467ec681f3Smrg}
1477ec681f3Smrg
1487ec681f3Smrgvoid
1497ec681f3Smrgvn_ring_init(struct vn_ring *ring,
1507ec681f3Smrg             struct vn_renderer *renderer,
1517ec681f3Smrg             const struct vn_ring_layout *layout,
1527ec681f3Smrg             void *shared)
1537ec681f3Smrg{
1547ec681f3Smrg   memset(ring, 0, sizeof(*ring));
1557ec681f3Smrg   memset(shared, 0, layout->shmem_size);
1567ec681f3Smrg
1577ec681f3Smrg   ring->renderer = renderer;
1587ec681f3Smrg
1597ec681f3Smrg   assert(layout->buffer_size &&
1607ec681f3Smrg          util_is_power_of_two_or_zero(layout->buffer_size));
1617ec681f3Smrg   ring->buffer_size = layout->buffer_size;
1627ec681f3Smrg   ring->buffer_mask = ring->buffer_size - 1;
1637ec681f3Smrg
1647ec681f3Smrg   ring->shared.head = shared + layout->head_offset;
1657ec681f3Smrg   ring->shared.tail = shared + layout->tail_offset;
1667ec681f3Smrg   ring->shared.status = shared + layout->status_offset;
1677ec681f3Smrg   ring->shared.buffer = shared + layout->buffer_offset;
1687ec681f3Smrg   ring->shared.extra = shared + layout->extra_offset;
1697ec681f3Smrg
1707ec681f3Smrg   list_inithead(&ring->submits);
1717ec681f3Smrg   list_inithead(&ring->free_submits);
1727ec681f3Smrg}
1737ec681f3Smrg
1747ec681f3Smrgvoid
1757ec681f3Smrgvn_ring_fini(struct vn_ring *ring)
1767ec681f3Smrg{
1777ec681f3Smrg   vn_ring_retire_submits(ring, ring->cur);
1787ec681f3Smrg   assert(list_is_empty(&ring->submits));
1797ec681f3Smrg
1807ec681f3Smrg   list_for_each_entry_safe(struct vn_ring_submit, submit,
1817ec681f3Smrg                            &ring->free_submits, head)
1827ec681f3Smrg      free(submit);
1837ec681f3Smrg}
1847ec681f3Smrg
1857ec681f3Smrgstruct vn_ring_submit *
1867ec681f3Smrgvn_ring_get_submit(struct vn_ring *ring, uint32_t shmem_count)
1877ec681f3Smrg{
1887ec681f3Smrg   const uint32_t min_shmem_count = 2;
1897ec681f3Smrg   struct vn_ring_submit *submit;
1907ec681f3Smrg
1917ec681f3Smrg   /* TODO this could be simplified if we could omit shmem_count */
1927ec681f3Smrg   if (shmem_count <= min_shmem_count &&
1937ec681f3Smrg       !list_is_empty(&ring->free_submits)) {
1947ec681f3Smrg      submit =
1957ec681f3Smrg         list_first_entry(&ring->free_submits, struct vn_ring_submit, head);
1967ec681f3Smrg      list_del(&submit->head);
1977ec681f3Smrg   } else {
1987ec681f3Smrg      shmem_count = MAX2(shmem_count, min_shmem_count);
1997ec681f3Smrg      submit =
2007ec681f3Smrg         malloc(sizeof(*submit) + sizeof(submit->shmems[0]) * shmem_count);
2017ec681f3Smrg   }
2027ec681f3Smrg
2037ec681f3Smrg   return submit;
2047ec681f3Smrg}
2057ec681f3Smrg
2067ec681f3Smrgbool
2077ec681f3Smrgvn_ring_submit(struct vn_ring *ring,
2087ec681f3Smrg               struct vn_ring_submit *submit,
2097ec681f3Smrg               const struct vn_cs_encoder *cs,
2107ec681f3Smrg               uint32_t *seqno)
2117ec681f3Smrg{
2127ec681f3Smrg   /* write cs to the ring */
2137ec681f3Smrg   assert(!vn_cs_encoder_is_empty(cs));
2147ec681f3Smrg   uint32_t cur_seqno;
2157ec681f3Smrg   for (uint32_t i = 0; i < cs->buffer_count; i++) {
2167ec681f3Smrg      const struct vn_cs_encoder_buffer *buf = &cs->buffers[i];
2177ec681f3Smrg      cur_seqno = vn_ring_wait_space(ring, buf->committed_size);
2187ec681f3Smrg      vn_ring_write_buffer(ring, buf->base, buf->committed_size);
2197ec681f3Smrg   }
2207ec681f3Smrg
2217ec681f3Smrg   vn_ring_store_tail(ring);
2227ec681f3Smrg   const bool notify = vn_ring_load_status(ring) & VN_RING_STATUS_IDLE;
2237ec681f3Smrg
2247ec681f3Smrg   vn_ring_retire_submits(ring, cur_seqno);
2257ec681f3Smrg
2267ec681f3Smrg   submit->seqno = ring->cur;
2277ec681f3Smrg   list_addtail(&submit->head, &ring->submits);
2287ec681f3Smrg
2297ec681f3Smrg   *seqno = submit->seqno;
2307ec681f3Smrg   return notify;
2317ec681f3Smrg}
2327ec681f3Smrg
2337ec681f3Smrg/**
2347ec681f3Smrg * This is thread-safe.
2357ec681f3Smrg */
2367ec681f3Smrgvoid
2377ec681f3Smrgvn_ring_wait(const struct vn_ring *ring, uint32_t seqno)
2387ec681f3Smrg{
2397ec681f3Smrg   vn_ring_wait_seqno(ring, seqno);
2407ec681f3Smrg}
241