tu_cs.h revision 7ec681f3
1/*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23#ifndef TU_CS_H
24#define TU_CS_H
25
26#include "tu_private.h"
27
28#include "adreno_pm4.xml.h"
29
30#include "freedreno_pm4.h"
31
32void
33tu_cs_init(struct tu_cs *cs,
34           struct tu_device *device,
35           enum tu_cs_mode mode,
36           uint32_t initial_size);
37
38void
39tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
40                    uint32_t *start, uint32_t *end);
41
42void
43tu_cs_finish(struct tu_cs *cs);
44
45void
46tu_cs_begin(struct tu_cs *cs);
47
48void
49tu_cs_end(struct tu_cs *cs);
50
51VkResult
52tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
53
54VkResult
55tu_cs_alloc(struct tu_cs *cs,
56            uint32_t count,
57            uint32_t size,
58            struct tu_cs_memory *memory);
59
60struct tu_cs_entry
61tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
62
63static inline struct tu_draw_state
64tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)
65{
66   struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);
67   return (struct tu_draw_state) {
68      .iova = entry.bo->iova + entry.offset,
69      .size = entry.size / sizeof(uint32_t),
70   };
71}
72
73VkResult
74tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
75
76static inline struct tu_draw_state
77tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)
78{
79   struct tu_cs_memory memory;
80
81   /* TODO: clean this up */
82   tu_cs_alloc(sub_cs, size, 1, &memory);
83   tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size);
84   tu_cs_begin(cs);
85   tu_cs_reserve_space(cs, size);
86
87   return (struct tu_draw_state) {
88      .iova = memory.iova,
89      .size = size,
90   };
91}
92
93void
94tu_cs_reset(struct tu_cs *cs);
95
96VkResult
97tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
98
99/**
100 * Get the size of the command packets emitted since the last call to
101 * tu_cs_add_entry.
102 */
103static inline uint32_t
104tu_cs_get_size(const struct tu_cs *cs)
105{
106   return cs->cur - cs->start;
107}
108
109/**
110 * Return true if there is no command packet emitted since the last call to
111 * tu_cs_add_entry.
112 */
113static inline uint32_t
114tu_cs_is_empty(const struct tu_cs *cs)
115{
116   return tu_cs_get_size(cs) == 0;
117}
118
119/**
120 * Discard all entries.  This allows \a cs to be reused while keeping the
121 * existing BOs and command packets intact.
122 */
123static inline void
124tu_cs_discard_entries(struct tu_cs *cs)
125{
126   assert(cs->mode == TU_CS_MODE_GROW);
127   cs->entry_count = 0;
128}
129
130/**
131 * Get the size needed for tu_cs_emit_call.
132 */
133static inline uint32_t
134tu_cs_get_call_size(const struct tu_cs *cs)
135{
136   assert(cs->mode == TU_CS_MODE_GROW);
137   /* each CP_INDIRECT_BUFFER needs 4 dwords */
138   return cs->entry_count * 4;
139}
140
141/**
142 * Assert that we did not exceed the reserved space.
143 */
144static inline void
145tu_cs_sanity_check(const struct tu_cs *cs)
146{
147   assert(cs->start <= cs->cur);
148   assert(cs->cur <= cs->reserved_end);
149   assert(cs->reserved_end <= cs->end);
150}
151
152/**
153 * Emit a uint32_t value into a command stream, without boundary checking.
154 */
155static inline void
156tu_cs_emit(struct tu_cs *cs, uint32_t value)
157{
158   assert(cs->cur < cs->reserved_end);
159   *cs->cur = value;
160   ++cs->cur;
161}
162
163/**
164 * Emit an array of uint32_t into a command stream, without boundary checking.
165 */
166static inline void
167tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
168{
169   assert(cs->cur + length <= cs->reserved_end);
170   memcpy(cs->cur, values, sizeof(uint32_t) * length);
171   cs->cur += length;
172}
173
174/**
175 * Get the size of the remaining space in the current BO.
176 */
177static inline uint32_t
178tu_cs_get_space(const struct tu_cs *cs)
179{
180   return cs->end - cs->cur;
181}
182
183static inline void
184tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
185{
186   if (cs->mode != TU_CS_MODE_GROW) {
187      assert(tu_cs_get_space(cs) >= reserved_size);
188      assert(cs->reserved_end == cs->end);
189      return;
190   }
191
192   if (tu_cs_get_space(cs) >= reserved_size &&
193       cs->entry_count < cs->entry_capacity) {
194      cs->reserved_end = cs->cur + reserved_size;
195      return;
196   }
197
198   ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size);
199   /* TODO: set this error in tu_cs and use it */
200   assert(result == VK_SUCCESS);
201}
202
203/**
204 * Emit a type-4 command packet header into a command stream.
205 */
206static inline void
207tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
208{
209   tu_cs_reserve(cs, cnt + 1);
210   tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt));
211}
212
213/**
214 * Emit a type-7 command packet header into a command stream.
215 */
216static inline void
217tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
218{
219   tu_cs_reserve(cs, cnt + 1);
220   tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
221}
222
223static inline void
224tu_cs_emit_wfi(struct tu_cs *cs)
225{
226   tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
227}
228
229static inline void
230tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
231{
232   tu_cs_emit(cs, (uint32_t) value);
233   tu_cs_emit(cs, (uint32_t) (value >> 32));
234}
235
236static inline void
237tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
238{
239   tu_cs_emit_pkt4(cs, reg, 1);
240   tu_cs_emit(cs, value);
241}
242
243/**
244 * Emit a CP_INDIRECT_BUFFER command packet.
245 */
246static inline void
247tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
248{
249   assert(entry->bo);
250   assert(entry->size && entry->offset + entry->size <= entry->bo->size);
251   assert(entry->size % sizeof(uint32_t) == 0);
252   assert(entry->offset % sizeof(uint32_t) == 0);
253
254   tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
255   tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
256   tu_cs_emit(cs, entry->size / sizeof(uint32_t));
257}
258
259/* for compute which isn't using SET_DRAW_STATE */
260static inline void
261tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)
262{
263   if (state.size) {
264      tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
265      tu_cs_emit_qw(cs, state.iova);
266      tu_cs_emit(cs, state.size);
267   }
268}
269
270/**
271 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
272 * command stream.
273 */
274static inline void
275tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
276{
277   assert(target->mode == TU_CS_MODE_GROW);
278   for (uint32_t i = 0; i < target->entry_count; i++)
279      tu_cs_emit_ib(cs, target->entries + i);
280}
281
282/* Helpers for bracketing a large sequence of commands of unknown size inside
283 * a CP_COND_REG_EXEC packet.
284 */
285static inline void
286tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
287{
288   assert(cs->mode == TU_CS_MODE_GROW);
289   assert(!cs->cond_flags && cond_flags);
290
291   tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
292   tu_cs_emit(cs, cond_flags);
293
294   cs->cond_flags = cond_flags;
295   cs->cond_dwords = cs->cur;
296
297   /* Emit dummy DWORD field here */
298   tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
299}
300#define CP_COND_EXEC_0_RENDER_MODE_GMEM \
301   (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
302#define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
303   (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
304
305static inline void
306tu_cond_exec_end(struct tu_cs *cs)
307{
308   assert(cs->cond_flags);
309
310   cs->cond_flags = 0;
311   /* Subtract one here to account for the DWORD field itself. */
312   *cs->cond_dwords = cs->cur - cs->cond_dwords - 1;
313}
314
315#define fd_reg_pair tu_reg_value
316#define __bo_type struct tu_bo *
317
318#include "a6xx.xml.h"
319#include "a6xx-pack.xml.h"
320
321#define __assert_eq(a, b)                                               \
322   do {                                                                 \
323      if ((a) != (b)) {                                                 \
324         fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
325         assert((a) == (b));                                            \
326      }                                                                 \
327   } while (0)
328
329#define __ONE_REG(i, regs)                                      \
330   do {                                                         \
331      if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {            \
332         __assert_eq(regs[0].reg + i, regs[i].reg);             \
333         if (regs[i].bo) {                                      \
334            uint64_t v = regs[i].bo->iova + regs[i].bo_offset;  \
335            v >>= regs[i].bo_shift;                             \
336            v |= regs[i].value;                                 \
337                                                                \
338            *p++ = v;                                           \
339            *p++ = v >> 32;                                     \
340         } else {                                               \
341            *p++ = regs[i].value;                               \
342            if (regs[i].is_address)                             \
343               *p++ = regs[i].value >> 32;                      \
344         }                                                      \
345      }                                                         \
346   } while (0)
347
348/* Emits a sequence of register writes in order using a pkt4.  This will check
349 * (at runtime on a !NDEBUG build) that the registers were actually set up in
350 * order in the code.
351 *
352 * Note that references to buffers aren't automatically added to the CS,
353 * unlike in freedreno.  We are clever in various places to avoid duplicating
354 * the reference add work.
355 *
356 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
357 * address without having a reference to a BO, since the .dword field in the
358 * register's struct is only 32-bit wide.  We should fix this in the pack
359 * codegen later.
360 */
361#define tu_cs_emit_regs(cs, ...) do {                   \
362   const struct fd_reg_pair regs[] = { __VA_ARGS__ };   \
363   unsigned count = ARRAY_SIZE(regs);                   \
364                                                        \
365   STATIC_ASSERT(count > 0);                            \
366   STATIC_ASSERT(count <= 16);                          \
367                                                        \
368   tu_cs_emit_pkt4((cs), regs[0].reg, count);             \
369   uint32_t *p = (cs)->cur;                               \
370   __ONE_REG( 0, regs);                                 \
371   __ONE_REG( 1, regs);                                 \
372   __ONE_REG( 2, regs);                                 \
373   __ONE_REG( 3, regs);                                 \
374   __ONE_REG( 4, regs);                                 \
375   __ONE_REG( 5, regs);                                 \
376   __ONE_REG( 6, regs);                                 \
377   __ONE_REG( 7, regs);                                 \
378   __ONE_REG( 8, regs);                                 \
379   __ONE_REG( 9, regs);                                 \
380   __ONE_REG(10, regs);                                 \
381   __ONE_REG(11, regs);                                 \
382   __ONE_REG(12, regs);                                 \
383   __ONE_REG(13, regs);                                 \
384   __ONE_REG(14, regs);                                 \
385   __ONE_REG(15, regs);                                 \
386   (cs)->cur = p;                                         \
387   } while (0)
388
389#endif /* TU_CS_H */
390