17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2021 Google LLC 37ec681f3Smrg * SPDX-License-Identifier: MIT 47ec681f3Smrg */ 57ec681f3Smrg 67ec681f3Smrg#include "vn_ring.h" 77ec681f3Smrg 87ec681f3Smrg#include "vn_cs.h" 97ec681f3Smrg#include "vn_renderer.h" 107ec681f3Smrg 117ec681f3Smrgenum vn_ring_status_flag { 127ec681f3Smrg VN_RING_STATUS_IDLE = 1u << 0, 137ec681f3Smrg}; 147ec681f3Smrg 157ec681f3Smrgstatic uint32_t 167ec681f3Smrgvn_ring_load_head(const struct vn_ring *ring) 177ec681f3Smrg{ 187ec681f3Smrg /* the renderer is expected to store the head with memory_order_release, 197ec681f3Smrg * forming a release-acquire ordering 207ec681f3Smrg */ 217ec681f3Smrg return atomic_load_explicit(ring->shared.head, memory_order_acquire); 227ec681f3Smrg} 237ec681f3Smrg 247ec681f3Smrgstatic void 257ec681f3Smrgvn_ring_store_tail(struct vn_ring *ring) 267ec681f3Smrg{ 277ec681f3Smrg /* the renderer is expected to load the tail with memory_order_acquire, 287ec681f3Smrg * forming a release-acquire ordering 297ec681f3Smrg */ 307ec681f3Smrg return atomic_store_explicit(ring->shared.tail, ring->cur, 317ec681f3Smrg memory_order_release); 327ec681f3Smrg} 337ec681f3Smrg 347ec681f3Smrgstatic uint32_t 357ec681f3Smrgvn_ring_load_status(const struct vn_ring *ring) 367ec681f3Smrg{ 377ec681f3Smrg /* this must be called and ordered after vn_ring_store_tail */ 387ec681f3Smrg return atomic_load_explicit(ring->shared.status, memory_order_seq_cst); 397ec681f3Smrg} 407ec681f3Smrg 417ec681f3Smrgstatic void 427ec681f3Smrgvn_ring_write_buffer(struct vn_ring *ring, const void *data, uint32_t size) 437ec681f3Smrg{ 447ec681f3Smrg assert(ring->cur + size - vn_ring_load_head(ring) <= ring->buffer_size); 457ec681f3Smrg 467ec681f3Smrg const uint32_t offset = ring->cur & ring->buffer_mask; 477ec681f3Smrg if (offset + size <= ring->buffer_size) { 487ec681f3Smrg memcpy(ring->shared.buffer + offset, data, size); 497ec681f3Smrg } else { 507ec681f3Smrg const uint32_t s = ring->buffer_size - offset; 517ec681f3Smrg memcpy(ring->shared.buffer + offset, data, s); 527ec681f3Smrg memcpy(ring->shared.buffer, data + s, size - s); 537ec681f3Smrg } 547ec681f3Smrg 557ec681f3Smrg ring->cur += size; 567ec681f3Smrg} 577ec681f3Smrg 587ec681f3Smrgstatic bool 597ec681f3Smrgvn_ring_ge_seqno(const struct vn_ring *ring, uint32_t a, uint32_t b) 607ec681f3Smrg{ 617ec681f3Smrg /* this can return false negative when not called fast enough (e.g., when 627ec681f3Smrg * called once every couple hours), but following calls with larger a's 637ec681f3Smrg * will correct itself 647ec681f3Smrg * 657ec681f3Smrg * TODO use real seqnos? 667ec681f3Smrg */ 677ec681f3Smrg if (a >= b) 687ec681f3Smrg return ring->cur >= a || ring->cur < b; 697ec681f3Smrg else 707ec681f3Smrg return ring->cur >= a && ring->cur < b; 717ec681f3Smrg} 727ec681f3Smrg 737ec681f3Smrgstatic void 747ec681f3Smrgvn_ring_retire_submits(struct vn_ring *ring, uint32_t seqno) 757ec681f3Smrg{ 767ec681f3Smrg list_for_each_entry_safe(struct vn_ring_submit, submit, &ring->submits, 777ec681f3Smrg head) { 787ec681f3Smrg if (!vn_ring_ge_seqno(ring, seqno, submit->seqno)) 797ec681f3Smrg break; 807ec681f3Smrg 817ec681f3Smrg for (uint32_t i = 0; i < submit->shmem_count; i++) 827ec681f3Smrg vn_renderer_shmem_unref(ring->renderer, submit->shmems[i]); 837ec681f3Smrg 847ec681f3Smrg list_del(&submit->head); 857ec681f3Smrg list_add(&submit->head, &ring->free_submits); 867ec681f3Smrg } 877ec681f3Smrg} 887ec681f3Smrg 897ec681f3Smrgstatic uint32_t 907ec681f3Smrgvn_ring_wait_seqno(const struct vn_ring *ring, uint32_t seqno) 917ec681f3Smrg{ 927ec681f3Smrg /* A renderer wait incurs several hops and the renderer might poll 937ec681f3Smrg * repeatedly anyway. Let's just poll here. 947ec681f3Smrg */ 957ec681f3Smrg uint32_t iter = 0; 967ec681f3Smrg do { 977ec681f3Smrg const uint32_t head = vn_ring_load_head(ring); 987ec681f3Smrg if (vn_ring_ge_seqno(ring, head, seqno)) 997ec681f3Smrg return head; 1007ec681f3Smrg vn_relax(&iter, "ring seqno"); 1017ec681f3Smrg } while (true); 1027ec681f3Smrg} 1037ec681f3Smrg 1047ec681f3Smrgstatic uint32_t 1057ec681f3Smrgvn_ring_wait_space(const struct vn_ring *ring, uint32_t size) 1067ec681f3Smrg{ 1077ec681f3Smrg assert(size <= ring->buffer_size); 1087ec681f3Smrg 1097ec681f3Smrg /* see the reasoning in vn_ring_wait_seqno */ 1107ec681f3Smrg uint32_t iter = 0; 1117ec681f3Smrg do { 1127ec681f3Smrg const uint32_t head = vn_ring_load_head(ring); 1137ec681f3Smrg if (ring->cur + size - head <= ring->buffer_size) 1147ec681f3Smrg return head; 1157ec681f3Smrg vn_relax(&iter, "ring space"); 1167ec681f3Smrg } while (true); 1177ec681f3Smrg} 1187ec681f3Smrg 1197ec681f3Smrgvoid 1207ec681f3Smrgvn_ring_get_layout(size_t buf_size, 1217ec681f3Smrg size_t extra_size, 1227ec681f3Smrg struct vn_ring_layout *layout) 1237ec681f3Smrg{ 1247ec681f3Smrg /* this can be changed/extended quite freely */ 1257ec681f3Smrg struct layout { 1267ec681f3Smrg uint32_t head __attribute__((aligned(64))); 1277ec681f3Smrg uint32_t tail __attribute__((aligned(64))); 1287ec681f3Smrg uint32_t status __attribute__((aligned(64))); 1297ec681f3Smrg 1307ec681f3Smrg uint8_t buffer[] __attribute__((aligned(64))); 1317ec681f3Smrg }; 1327ec681f3Smrg 1337ec681f3Smrg assert(buf_size && util_is_power_of_two_or_zero(buf_size)); 1347ec681f3Smrg 1357ec681f3Smrg layout->head_offset = offsetof(struct layout, head); 1367ec681f3Smrg layout->tail_offset = offsetof(struct layout, tail); 1377ec681f3Smrg layout->status_offset = offsetof(struct layout, status); 1387ec681f3Smrg 1397ec681f3Smrg layout->buffer_offset = offsetof(struct layout, buffer); 1407ec681f3Smrg layout->buffer_size = buf_size; 1417ec681f3Smrg 1427ec681f3Smrg layout->extra_offset = layout->buffer_offset + layout->buffer_size; 1437ec681f3Smrg layout->extra_size = extra_size; 1447ec681f3Smrg 1457ec681f3Smrg layout->shmem_size = layout->extra_offset + layout->extra_size; 1467ec681f3Smrg} 1477ec681f3Smrg 1487ec681f3Smrgvoid 1497ec681f3Smrgvn_ring_init(struct vn_ring *ring, 1507ec681f3Smrg struct vn_renderer *renderer, 1517ec681f3Smrg const struct vn_ring_layout *layout, 1527ec681f3Smrg void *shared) 1537ec681f3Smrg{ 1547ec681f3Smrg memset(ring, 0, sizeof(*ring)); 1557ec681f3Smrg memset(shared, 0, layout->shmem_size); 1567ec681f3Smrg 1577ec681f3Smrg ring->renderer = renderer; 1587ec681f3Smrg 1597ec681f3Smrg assert(layout->buffer_size && 1607ec681f3Smrg util_is_power_of_two_or_zero(layout->buffer_size)); 1617ec681f3Smrg ring->buffer_size = layout->buffer_size; 1627ec681f3Smrg ring->buffer_mask = ring->buffer_size - 1; 1637ec681f3Smrg 1647ec681f3Smrg ring->shared.head = shared + layout->head_offset; 1657ec681f3Smrg ring->shared.tail = shared + layout->tail_offset; 1667ec681f3Smrg ring->shared.status = shared + layout->status_offset; 1677ec681f3Smrg ring->shared.buffer = shared + layout->buffer_offset; 1687ec681f3Smrg ring->shared.extra = shared + layout->extra_offset; 1697ec681f3Smrg 1707ec681f3Smrg list_inithead(&ring->submits); 1717ec681f3Smrg list_inithead(&ring->free_submits); 1727ec681f3Smrg} 1737ec681f3Smrg 1747ec681f3Smrgvoid 1757ec681f3Smrgvn_ring_fini(struct vn_ring *ring) 1767ec681f3Smrg{ 1777ec681f3Smrg vn_ring_retire_submits(ring, ring->cur); 1787ec681f3Smrg assert(list_is_empty(&ring->submits)); 1797ec681f3Smrg 1807ec681f3Smrg list_for_each_entry_safe(struct vn_ring_submit, submit, 1817ec681f3Smrg &ring->free_submits, head) 1827ec681f3Smrg free(submit); 1837ec681f3Smrg} 1847ec681f3Smrg 1857ec681f3Smrgstruct vn_ring_submit * 1867ec681f3Smrgvn_ring_get_submit(struct vn_ring *ring, uint32_t shmem_count) 1877ec681f3Smrg{ 1887ec681f3Smrg const uint32_t min_shmem_count = 2; 1897ec681f3Smrg struct vn_ring_submit *submit; 1907ec681f3Smrg 1917ec681f3Smrg /* TODO this could be simplified if we could omit shmem_count */ 1927ec681f3Smrg if (shmem_count <= min_shmem_count && 1937ec681f3Smrg !list_is_empty(&ring->free_submits)) { 1947ec681f3Smrg submit = 1957ec681f3Smrg list_first_entry(&ring->free_submits, struct vn_ring_submit, head); 1967ec681f3Smrg list_del(&submit->head); 1977ec681f3Smrg } else { 1987ec681f3Smrg shmem_count = MAX2(shmem_count, min_shmem_count); 1997ec681f3Smrg submit = 2007ec681f3Smrg malloc(sizeof(*submit) + sizeof(submit->shmems[0]) * shmem_count); 2017ec681f3Smrg } 2027ec681f3Smrg 2037ec681f3Smrg return submit; 2047ec681f3Smrg} 2057ec681f3Smrg 2067ec681f3Smrgbool 2077ec681f3Smrgvn_ring_submit(struct vn_ring *ring, 2087ec681f3Smrg struct vn_ring_submit *submit, 2097ec681f3Smrg const struct vn_cs_encoder *cs, 2107ec681f3Smrg uint32_t *seqno) 2117ec681f3Smrg{ 2127ec681f3Smrg /* write cs to the ring */ 2137ec681f3Smrg assert(!vn_cs_encoder_is_empty(cs)); 2147ec681f3Smrg uint32_t cur_seqno; 2157ec681f3Smrg for (uint32_t i = 0; i < cs->buffer_count; i++) { 2167ec681f3Smrg const struct vn_cs_encoder_buffer *buf = &cs->buffers[i]; 2177ec681f3Smrg cur_seqno = vn_ring_wait_space(ring, buf->committed_size); 2187ec681f3Smrg vn_ring_write_buffer(ring, buf->base, buf->committed_size); 2197ec681f3Smrg } 2207ec681f3Smrg 2217ec681f3Smrg vn_ring_store_tail(ring); 2227ec681f3Smrg const bool notify = vn_ring_load_status(ring) & VN_RING_STATUS_IDLE; 2237ec681f3Smrg 2247ec681f3Smrg vn_ring_retire_submits(ring, cur_seqno); 2257ec681f3Smrg 2267ec681f3Smrg submit->seqno = ring->cur; 2277ec681f3Smrg list_addtail(&submit->head, &ring->submits); 2287ec681f3Smrg 2297ec681f3Smrg *seqno = submit->seqno; 2307ec681f3Smrg return notify; 2317ec681f3Smrg} 2327ec681f3Smrg 2337ec681f3Smrg/** 2347ec681f3Smrg * This is thread-safe. 2357ec681f3Smrg */ 2367ec681f3Smrgvoid 2377ec681f3Smrgvn_ring_wait(const struct vn_ring *ring, uint32_t seqno) 2387ec681f3Smrg{ 2397ec681f3Smrg vn_ring_wait_seqno(ring, seqno); 2407ec681f3Smrg} 241