tu_cs.h revision 7ec681f3
1/* 2 * Copyright © 2019 Google LLC 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23#ifndef TU_CS_H 24#define TU_CS_H 25 26#include "tu_private.h" 27 28#include "adreno_pm4.xml.h" 29 30#include "freedreno_pm4.h" 31 32void 33tu_cs_init(struct tu_cs *cs, 34 struct tu_device *device, 35 enum tu_cs_mode mode, 36 uint32_t initial_size); 37 38void 39tu_cs_init_external(struct tu_cs *cs, struct tu_device *device, 40 uint32_t *start, uint32_t *end); 41 42void 43tu_cs_finish(struct tu_cs *cs); 44 45void 46tu_cs_begin(struct tu_cs *cs); 47 48void 49tu_cs_end(struct tu_cs *cs); 50 51VkResult 52tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs); 53 54VkResult 55tu_cs_alloc(struct tu_cs *cs, 56 uint32_t count, 57 uint32_t size, 58 struct tu_cs_memory *memory); 59 60struct tu_cs_entry 61tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs); 62 63static inline struct tu_draw_state 64tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs) 65{ 66 struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs); 67 return (struct tu_draw_state) { 68 .iova = entry.bo->iova + entry.offset, 69 .size = entry.size / sizeof(uint32_t), 70 }; 71} 72 73VkResult 74tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size); 75 76static inline struct tu_draw_state 77tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size) 78{ 79 struct tu_cs_memory memory; 80 81 /* TODO: clean this up */ 82 tu_cs_alloc(sub_cs, size, 1, &memory); 83 tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size); 84 tu_cs_begin(cs); 85 tu_cs_reserve_space(cs, size); 86 87 return (struct tu_draw_state) { 88 .iova = memory.iova, 89 .size = size, 90 }; 91} 92 93void 94tu_cs_reset(struct tu_cs *cs); 95 96VkResult 97tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target); 98 99/** 100 * Get the size of the command packets emitted since the last call to 101 * tu_cs_add_entry. 102 */ 103static inline uint32_t 104tu_cs_get_size(const struct tu_cs *cs) 105{ 106 return cs->cur - cs->start; 107} 108 109/** 110 * Return true if there is no command packet emitted since the last call to 111 * tu_cs_add_entry. 112 */ 113static inline uint32_t 114tu_cs_is_empty(const struct tu_cs *cs) 115{ 116 return tu_cs_get_size(cs) == 0; 117} 118 119/** 120 * Discard all entries. This allows \a cs to be reused while keeping the 121 * existing BOs and command packets intact. 122 */ 123static inline void 124tu_cs_discard_entries(struct tu_cs *cs) 125{ 126 assert(cs->mode == TU_CS_MODE_GROW); 127 cs->entry_count = 0; 128} 129 130/** 131 * Get the size needed for tu_cs_emit_call. 132 */ 133static inline uint32_t 134tu_cs_get_call_size(const struct tu_cs *cs) 135{ 136 assert(cs->mode == TU_CS_MODE_GROW); 137 /* each CP_INDIRECT_BUFFER needs 4 dwords */ 138 return cs->entry_count * 4; 139} 140 141/** 142 * Assert that we did not exceed the reserved space. 143 */ 144static inline void 145tu_cs_sanity_check(const struct tu_cs *cs) 146{ 147 assert(cs->start <= cs->cur); 148 assert(cs->cur <= cs->reserved_end); 149 assert(cs->reserved_end <= cs->end); 150} 151 152/** 153 * Emit a uint32_t value into a command stream, without boundary checking. 154 */ 155static inline void 156tu_cs_emit(struct tu_cs *cs, uint32_t value) 157{ 158 assert(cs->cur < cs->reserved_end); 159 *cs->cur = value; 160 ++cs->cur; 161} 162 163/** 164 * Emit an array of uint32_t into a command stream, without boundary checking. 165 */ 166static inline void 167tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length) 168{ 169 assert(cs->cur + length <= cs->reserved_end); 170 memcpy(cs->cur, values, sizeof(uint32_t) * length); 171 cs->cur += length; 172} 173 174/** 175 * Get the size of the remaining space in the current BO. 176 */ 177static inline uint32_t 178tu_cs_get_space(const struct tu_cs *cs) 179{ 180 return cs->end - cs->cur; 181} 182 183static inline void 184tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size) 185{ 186 if (cs->mode != TU_CS_MODE_GROW) { 187 assert(tu_cs_get_space(cs) >= reserved_size); 188 assert(cs->reserved_end == cs->end); 189 return; 190 } 191 192 if (tu_cs_get_space(cs) >= reserved_size && 193 cs->entry_count < cs->entry_capacity) { 194 cs->reserved_end = cs->cur + reserved_size; 195 return; 196 } 197 198 ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size); 199 /* TODO: set this error in tu_cs and use it */ 200 assert(result == VK_SUCCESS); 201} 202 203/** 204 * Emit a type-4 command packet header into a command stream. 205 */ 206static inline void 207tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) 208{ 209 tu_cs_reserve(cs, cnt + 1); 210 tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt)); 211} 212 213/** 214 * Emit a type-7 command packet header into a command stream. 215 */ 216static inline void 217tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) 218{ 219 tu_cs_reserve(cs, cnt + 1); 220 tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); 221} 222 223static inline void 224tu_cs_emit_wfi(struct tu_cs *cs) 225{ 226 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); 227} 228 229static inline void 230tu_cs_emit_qw(struct tu_cs *cs, uint64_t value) 231{ 232 tu_cs_emit(cs, (uint32_t) value); 233 tu_cs_emit(cs, (uint32_t) (value >> 32)); 234} 235 236static inline void 237tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value) 238{ 239 tu_cs_emit_pkt4(cs, reg, 1); 240 tu_cs_emit(cs, value); 241} 242 243/** 244 * Emit a CP_INDIRECT_BUFFER command packet. 245 */ 246static inline void 247tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry) 248{ 249 assert(entry->bo); 250 assert(entry->size && entry->offset + entry->size <= entry->bo->size); 251 assert(entry->size % sizeof(uint32_t) == 0); 252 assert(entry->offset % sizeof(uint32_t) == 0); 253 254 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3); 255 tu_cs_emit_qw(cs, entry->bo->iova + entry->offset); 256 tu_cs_emit(cs, entry->size / sizeof(uint32_t)); 257} 258 259/* for compute which isn't using SET_DRAW_STATE */ 260static inline void 261tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state) 262{ 263 if (state.size) { 264 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3); 265 tu_cs_emit_qw(cs, state.iova); 266 tu_cs_emit(cs, state.size); 267 } 268} 269 270/** 271 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target 272 * command stream. 273 */ 274static inline void 275tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target) 276{ 277 assert(target->mode == TU_CS_MODE_GROW); 278 for (uint32_t i = 0; i < target->entry_count; i++) 279 tu_cs_emit_ib(cs, target->entries + i); 280} 281 282/* Helpers for bracketing a large sequence of commands of unknown size inside 283 * a CP_COND_REG_EXEC packet. 284 */ 285static inline void 286tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags) 287{ 288 assert(cs->mode == TU_CS_MODE_GROW); 289 assert(!cs->cond_flags && cond_flags); 290 291 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); 292 tu_cs_emit(cs, cond_flags); 293 294 cs->cond_flags = cond_flags; 295 cs->cond_dwords = cs->cur; 296 297 /* Emit dummy DWORD field here */ 298 tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0)); 299} 300#define CP_COND_EXEC_0_RENDER_MODE_GMEM \ 301 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM) 302#define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \ 303 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM) 304 305static inline void 306tu_cond_exec_end(struct tu_cs *cs) 307{ 308 assert(cs->cond_flags); 309 310 cs->cond_flags = 0; 311 /* Subtract one here to account for the DWORD field itself. */ 312 *cs->cond_dwords = cs->cur - cs->cond_dwords - 1; 313} 314 315#define fd_reg_pair tu_reg_value 316#define __bo_type struct tu_bo * 317 318#include "a6xx.xml.h" 319#include "a6xx-pack.xml.h" 320 321#define __assert_eq(a, b) \ 322 do { \ 323 if ((a) != (b)) { \ 324 fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \ 325 assert((a) == (b)); \ 326 } \ 327 } while (0) 328 329#define __ONE_REG(i, regs) \ 330 do { \ 331 if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \ 332 __assert_eq(regs[0].reg + i, regs[i].reg); \ 333 if (regs[i].bo) { \ 334 uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \ 335 v >>= regs[i].bo_shift; \ 336 v |= regs[i].value; \ 337 \ 338 *p++ = v; \ 339 *p++ = v >> 32; \ 340 } else { \ 341 *p++ = regs[i].value; \ 342 if (regs[i].is_address) \ 343 *p++ = regs[i].value >> 32; \ 344 } \ 345 } \ 346 } while (0) 347 348/* Emits a sequence of register writes in order using a pkt4. This will check 349 * (at runtime on a !NDEBUG build) that the registers were actually set up in 350 * order in the code. 351 * 352 * Note that references to buffers aren't automatically added to the CS, 353 * unlike in freedreno. We are clever in various places to avoid duplicating 354 * the reference add work. 355 * 356 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit 357 * address without having a reference to a BO, since the .dword field in the 358 * register's struct is only 32-bit wide. We should fix this in the pack 359 * codegen later. 360 */ 361#define tu_cs_emit_regs(cs, ...) do { \ 362 const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \ 363 unsigned count = ARRAY_SIZE(regs); \ 364 \ 365 STATIC_ASSERT(count > 0); \ 366 STATIC_ASSERT(count <= 16); \ 367 \ 368 tu_cs_emit_pkt4((cs), regs[0].reg, count); \ 369 uint32_t *p = (cs)->cur; \ 370 __ONE_REG( 0, regs); \ 371 __ONE_REG( 1, regs); \ 372 __ONE_REG( 2, regs); \ 373 __ONE_REG( 3, regs); \ 374 __ONE_REG( 4, regs); \ 375 __ONE_REG( 5, regs); \ 376 __ONE_REG( 6, regs); \ 377 __ONE_REG( 7, regs); \ 378 __ONE_REG( 8, regs); \ 379 __ONE_REG( 9, regs); \ 380 __ONE_REG(10, regs); \ 381 __ONE_REG(11, regs); \ 382 __ONE_REG(12, regs); \ 383 __ONE_REG(13, regs); \ 384 __ONE_REG(14, regs); \ 385 __ONE_REG(15, regs); \ 386 (cs)->cur = p; \ 387 } while (0) 388 389#endif /* TU_CS_H */ 390