1/*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "tu_cs.h"
25
26/**
27 * Initialize a command stream.
28 */
29void
30tu_cs_init(struct tu_cs *cs,
31           struct tu_device *device,
32           enum tu_cs_mode mode,
33           uint32_t initial_size)
34{
35   assert(mode != TU_CS_MODE_EXTERNAL);
36
37   memset(cs, 0, sizeof(*cs));
38
39   cs->device = device;
40   cs->mode = mode;
41   cs->next_bo_size = initial_size;
42}
43
44/**
45 * Initialize a command stream as a wrapper to an external buffer.
46 */
47void
48tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
49                    uint32_t *start, uint32_t *end)
50{
51   memset(cs, 0, sizeof(*cs));
52
53   cs->device = device;
54   cs->mode = TU_CS_MODE_EXTERNAL;
55   cs->start = cs->reserved_end = cs->cur = start;
56   cs->end = end;
57}
58
59/**
60 * Finish and release all resources owned by a command stream.
61 */
62void
63tu_cs_finish(struct tu_cs *cs)
64{
65   for (uint32_t i = 0; i < cs->bo_count; ++i) {
66      tu_bo_finish(cs->device, cs->bos[i]);
67      free(cs->bos[i]);
68   }
69
70   free(cs->entries);
71   free(cs->bos);
72}
73
74/**
75 * Get the offset of the command packets emitted since the last call to
76 * tu_cs_add_entry.
77 */
78static uint32_t
79tu_cs_get_offset(const struct tu_cs *cs)
80{
81   assert(cs->bo_count);
82   return cs->start - (uint32_t *) cs->bos[cs->bo_count - 1]->map;
83}
84
85/*
86 * Allocate and add a BO to a command stream.  Following command packets will
87 * be emitted to the new BO.
88 */
89static VkResult
90tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
91{
92   /* no BO for TU_CS_MODE_EXTERNAL */
93   assert(cs->mode != TU_CS_MODE_EXTERNAL);
94
95   /* no dangling command packet */
96   assert(tu_cs_is_empty(cs));
97
98   /* grow cs->bos if needed */
99   if (cs->bo_count == cs->bo_capacity) {
100      uint32_t new_capacity = MAX2(4, 2 * cs->bo_capacity);
101      struct tu_bo **new_bos =
102         realloc(cs->bos, new_capacity * sizeof(struct tu_bo *));
103      if (!new_bos)
104         return VK_ERROR_OUT_OF_HOST_MEMORY;
105
106      cs->bo_capacity = new_capacity;
107      cs->bos = new_bos;
108   }
109
110   struct tu_bo *new_bo = malloc(sizeof(struct tu_bo));
111   if (!new_bo)
112      return VK_ERROR_OUT_OF_HOST_MEMORY;
113
114   VkResult result =
115      tu_bo_init_new(cs->device, new_bo, size * sizeof(uint32_t),
116                     TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
117   if (result != VK_SUCCESS) {
118      free(new_bo);
119      return result;
120   }
121
122   result = tu_bo_map(cs->device, new_bo);
123   if (result != VK_SUCCESS) {
124      tu_bo_finish(cs->device, new_bo);
125      free(new_bo);
126      return result;
127   }
128
129   cs->bos[cs->bo_count++] = new_bo;
130
131   cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map;
132   cs->end = cs->start + new_bo->size / sizeof(uint32_t);
133
134   return VK_SUCCESS;
135}
136
137/**
138 * Reserve an IB entry.
139 */
140static VkResult
141tu_cs_reserve_entry(struct tu_cs *cs)
142{
143   /* entries are only for TU_CS_MODE_GROW */
144   assert(cs->mode == TU_CS_MODE_GROW);
145
146   /* grow cs->entries if needed */
147   if (cs->entry_count == cs->entry_capacity) {
148      uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2);
149      struct tu_cs_entry *new_entries =
150         realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry));
151      if (!new_entries)
152         return VK_ERROR_OUT_OF_HOST_MEMORY;
153
154      cs->entry_capacity = new_capacity;
155      cs->entries = new_entries;
156   }
157
158   return VK_SUCCESS;
159}
160
161/**
162 * Add an IB entry for the command packets emitted since the last call to this
163 * function.
164 */
165static void
166tu_cs_add_entry(struct tu_cs *cs)
167{
168   /* entries are only for TU_CS_MODE_GROW */
169   assert(cs->mode == TU_CS_MODE_GROW);
170
171   /* disallow empty entry */
172   assert(!tu_cs_is_empty(cs));
173
174   /*
175    * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry
176    * must both have been called
177    */
178   assert(cs->bo_count);
179   assert(cs->entry_count < cs->entry_capacity);
180
181   /* add an entry for [cs->start, cs->cur] */
182   cs->entries[cs->entry_count++] = (struct tu_cs_entry) {
183      .bo = cs->bos[cs->bo_count - 1],
184      .size = tu_cs_get_size(cs) * sizeof(uint32_t),
185      .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
186   };
187
188   cs->start = cs->cur;
189}
190
191/**
192 * same behavior as tu_cs_emit_call but without the indirect
193 */
194VkResult
195tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target)
196{
197   VkResult result;
198
199   assert(cs->mode == TU_CS_MODE_GROW);
200   assert(target->mode == TU_CS_MODE_GROW);
201
202   if (!tu_cs_is_empty(cs))
203      tu_cs_add_entry(cs);
204
205   for (unsigned i = 0; i < target->entry_count; i++) {
206      result = tu_cs_reserve_entry(cs);
207      if (result != VK_SUCCESS)
208         return result;
209      cs->entries[cs->entry_count++] = target->entries[i];
210   }
211
212   return VK_SUCCESS;
213}
214
215/**
216 * Begin (or continue) command packet emission.  This does nothing but sanity
217 * checks currently.  \a cs must not be in TU_CS_MODE_SUB_STREAM mode.
218 */
219void
220tu_cs_begin(struct tu_cs *cs)
221{
222   assert(cs->mode != TU_CS_MODE_SUB_STREAM);
223   assert(tu_cs_is_empty(cs));
224}
225
226/**
227 * End command packet emission.  This adds an IB entry when \a cs is in
228 * TU_CS_MODE_GROW mode.
229 */
230void
231tu_cs_end(struct tu_cs *cs)
232{
233   assert(cs->mode != TU_CS_MODE_SUB_STREAM);
234
235   if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
236      tu_cs_add_entry(cs);
237}
238
239/**
240 * Begin command packet emission to a sub-stream.  \a cs must be in
241 * TU_CS_MODE_SUB_STREAM mode.
242 *
243 * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode.  tu_cs_begin and
244 * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet
245 * emission.
246 */
247VkResult
248tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs)
249{
250   assert(cs->mode == TU_CS_MODE_SUB_STREAM);
251   assert(size);
252
253   VkResult result = tu_cs_reserve_space(cs, size);
254   if (result != VK_SUCCESS)
255      return result;
256
257   tu_cs_init_external(sub_cs, cs->device, cs->cur, cs->reserved_end);
258   tu_cs_begin(sub_cs);
259   result = tu_cs_reserve_space(sub_cs, size);
260   assert(result == VK_SUCCESS);
261
262   return VK_SUCCESS;
263}
264
265/**
266 * Allocate count*size dwords, aligned to size dwords.
267 * \a cs must be in TU_CS_MODE_SUB_STREAM mode.
268 *
269 */
270VkResult
271tu_cs_alloc(struct tu_cs *cs,
272            uint32_t count,
273            uint32_t size,
274            struct tu_cs_memory *memory)
275{
276   assert(cs->mode == TU_CS_MODE_SUB_STREAM);
277   assert(size && size <= 1024);
278
279   if (!count)
280      return VK_SUCCESS;
281
282   /* TODO: smarter way to deal with alignment? */
283
284   VkResult result = tu_cs_reserve_space(cs, count * size + (size-1));
285   if (result != VK_SUCCESS)
286      return result;
287
288   struct tu_bo *bo = cs->bos[cs->bo_count - 1];
289   size_t offset = align(tu_cs_get_offset(cs), size);
290
291   memory->map = bo->map + offset * sizeof(uint32_t);
292   memory->iova = bo->iova + offset * sizeof(uint32_t);
293
294   cs->start = cs->cur = (uint32_t*) bo->map + offset + count * size;
295
296   return VK_SUCCESS;
297}
298
299/**
300 * End command packet emission to a sub-stream.  \a sub_cs becomes invalid
301 * after this call.
302 *
303 * Return an IB entry for the sub-stream.  The entry has the same lifetime as
304 * \a cs.
305 */
306struct tu_cs_entry
307tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs)
308{
309   assert(cs->mode == TU_CS_MODE_SUB_STREAM);
310   assert(cs->bo_count);
311   assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end);
312   tu_cs_sanity_check(sub_cs);
313
314   tu_cs_end(sub_cs);
315
316   cs->cur = sub_cs->cur;
317
318   struct tu_cs_entry entry = {
319      .bo = cs->bos[cs->bo_count - 1],
320      .size = tu_cs_get_size(cs) * sizeof(uint32_t),
321      .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
322   };
323
324   cs->start = cs->cur;
325
326   return entry;
327}
328
329/**
330 * Reserve space from a command stream for \a reserved_size uint32_t values.
331 * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL.
332 */
333VkResult
334tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size)
335{
336   if (tu_cs_get_space(cs) < reserved_size) {
337      if (cs->mode == TU_CS_MODE_EXTERNAL) {
338         unreachable("cannot grow external buffer");
339         return VK_ERROR_OUT_OF_HOST_MEMORY;
340      }
341
342      /* add an entry for the exiting command packets */
343      if (!tu_cs_is_empty(cs)) {
344         /* no direct command packet for TU_CS_MODE_SUB_STREAM */
345         assert(cs->mode != TU_CS_MODE_SUB_STREAM);
346
347         tu_cs_add_entry(cs);
348      }
349
350      if (cs->cond_flags) {
351         /* Subtract one here to account for the DWORD field itself. */
352         *cs->cond_dwords = cs->cur - cs->cond_dwords - 1;
353
354         /* space for CP_COND_REG_EXEC in next bo */
355         reserved_size += 3;
356      }
357
358      /* switch to a new BO */
359      uint32_t new_size = MAX2(cs->next_bo_size, reserved_size);
360      VkResult result = tu_cs_add_bo(cs, new_size);
361      if (result != VK_SUCCESS)
362         return result;
363
364      /* if inside a condition, emit a new CP_COND_REG_EXEC */
365      if (cs->cond_flags) {
366         cs->reserved_end = cs->cur + reserved_size;
367
368         tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
369         tu_cs_emit(cs, cs->cond_flags);
370
371         cs->cond_dwords = cs->cur;
372
373         /* Emit dummy DWORD field here */
374         tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
375      }
376
377      /* double the size for the next bo, also there is an upper
378       * bound on IB size, which appears to be 0x0fffff
379       */
380      new_size = MIN2(new_size << 1, 0x0fffff);
381      if (cs->next_bo_size < new_size)
382         cs->next_bo_size = new_size;
383   }
384
385   assert(tu_cs_get_space(cs) >= reserved_size);
386   cs->reserved_end = cs->cur + reserved_size;
387
388   if (cs->mode == TU_CS_MODE_GROW) {
389      /* reserve an entry for the next call to this function or tu_cs_end */
390      return tu_cs_reserve_entry(cs);
391   }
392
393   return VK_SUCCESS;
394}
395
396/**
397 * Reset a command stream to its initial state.  This discards all comand
398 * packets in \a cs, but does not necessarily release all resources.
399 */
400void
401tu_cs_reset(struct tu_cs *cs)
402{
403   if (cs->mode == TU_CS_MODE_EXTERNAL) {
404      assert(!cs->bo_count && !cs->entry_count);
405      cs->reserved_end = cs->cur = cs->start;
406      return;
407   }
408
409   for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
410      tu_bo_finish(cs->device, cs->bos[i]);
411      free(cs->bos[i]);
412   }
413
414   if (cs->bo_count) {
415      cs->bos[0] = cs->bos[cs->bo_count - 1];
416      cs->bo_count = 1;
417
418      cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->bos[0]->map;
419      cs->end = cs->start + cs->bos[0]->size / sizeof(uint32_t);
420   }
421
422   cs->entry_count = 0;
423}
424