1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 * DEALINGS IN THE SOFTWARE. 26 */ 27 28#include "tu_private.h" 29 30#include "registers/adreno_pm4.xml.h" 31#include "registers/adreno_common.xml.h" 32#include "registers/a6xx.xml.h" 33 34#include "vk_format.h" 35 36#include "tu_cs.h" 37 38void 39tu_bo_list_init(struct tu_bo_list *list) 40{ 41 list->count = list->capacity = 0; 42 list->bo_infos = NULL; 43} 44 45void 46tu_bo_list_destroy(struct tu_bo_list *list) 47{ 48 free(list->bo_infos); 49} 50 51void 52tu_bo_list_reset(struct tu_bo_list *list) 53{ 54 list->count = 0; 55} 56 57/** 58 * \a flags consists of MSM_SUBMIT_BO_FLAGS. 59 */ 60static uint32_t 61tu_bo_list_add_info(struct tu_bo_list *list, 62 const struct drm_msm_gem_submit_bo *bo_info) 63{ 64 for (uint32_t i = 0; i < list->count; ++i) { 65 if (list->bo_infos[i].handle == bo_info->handle) { 66 assert(list->bo_infos[i].presumed == bo_info->presumed); 67 list->bo_infos[i].flags |= bo_info->flags; 68 return i; 69 } 70 } 71 72 /* grow list->bo_infos if needed */ 73 if (list->count == list->capacity) { 74 uint32_t new_capacity = MAX2(2 * list->count, 16); 75 struct drm_msm_gem_submit_bo *new_bo_infos = realloc( 76 list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo)); 77 if (!new_bo_infos) 78 return TU_BO_LIST_FAILED; 79 list->bo_infos = new_bo_infos; 80 list->capacity = new_capacity; 81 } 82 83 list->bo_infos[list->count] = *bo_info; 84 return list->count++; 85} 86 87uint32_t 88tu_bo_list_add(struct tu_bo_list *list, 89 const struct tu_bo *bo, 90 uint32_t flags) 91{ 92 return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) { 93 .flags = flags, 94 .handle = bo->gem_handle, 95 .presumed = bo->iova, 96 }); 97} 98 99VkResult 100tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other) 101{ 102 for (uint32_t i = 0; i < other->count; i++) { 103 if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED) 104 return VK_ERROR_OUT_OF_HOST_MEMORY; 105 } 106 107 return VK_SUCCESS; 108} 109 110static VkResult 111tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling, 112 const struct tu_device *dev) 113{ 114 const uint32_t gmem_size = dev->physical_device->gmem_size; 115 uint32_t offset = 0; 116 117 for (uint32_t i = 0; i < tiling->buffer_count; i++) { 118 /* 16KB-aligned */ 119 offset = align(offset, 0x4000); 120 121 tiling->gmem_offsets[i] = offset; 122 offset += tiling->tile0.extent.width * tiling->tile0.extent.height * 123 tiling->buffer_cpp[i]; 124 } 125 126 return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; 127} 128 129static void 130tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, 131 const struct tu_device *dev) 132{ 133 const uint32_t tile_align_w = dev->physical_device->tile_align_w; 134 const uint32_t tile_align_h = dev->physical_device->tile_align_h; 135 const uint32_t max_tile_width = 1024; /* A6xx */ 136 137 tiling->tile0.offset = (VkOffset2D) { 138 .x = tiling->render_area.offset.x & ~(tile_align_w - 1), 139 .y = tiling->render_area.offset.y & ~(tile_align_h - 1), 140 }; 141 142 const uint32_t ra_width = 143 tiling->render_area.extent.width + 144 (tiling->render_area.offset.x - tiling->tile0.offset.x); 145 const uint32_t ra_height = 146 tiling->render_area.extent.height + 147 (tiling->render_area.offset.y - tiling->tile0.offset.y); 148 149 /* start from 1 tile */ 150 tiling->tile_count = (VkExtent2D) { 151 .width = 1, 152 .height = 1, 153 }; 154 tiling->tile0.extent = (VkExtent2D) { 155 .width = align(ra_width, tile_align_w), 156 .height = align(ra_height, tile_align_h), 157 }; 158 159 /* do not exceed max tile width */ 160 while (tiling->tile0.extent.width > max_tile_width) { 161 tiling->tile_count.width++; 162 tiling->tile0.extent.width = 163 align(ra_width / tiling->tile_count.width, tile_align_w); 164 } 165 166 /* do not exceed gmem size */ 167 while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) { 168 if (tiling->tile0.extent.width > tiling->tile0.extent.height) { 169 tiling->tile_count.width++; 170 tiling->tile0.extent.width = 171 align(ra_width / tiling->tile_count.width, tile_align_w); 172 } else { 173 tiling->tile_count.height++; 174 tiling->tile0.extent.height = 175 align(ra_height / tiling->tile_count.height, tile_align_h); 176 } 177 } 178} 179 180static void 181tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling, 182 const struct tu_device *dev) 183{ 184 const uint32_t max_pipe_count = 32; /* A6xx */ 185 186 /* start from 1 tile per pipe */ 187 tiling->pipe0 = (VkExtent2D) { 188 .width = 1, 189 .height = 1, 190 }; 191 tiling->pipe_count = tiling->tile_count; 192 193 /* do not exceed max pipe count vertically */ 194 while (tiling->pipe_count.height > max_pipe_count) { 195 tiling->pipe0.height += 2; 196 tiling->pipe_count.height = 197 (tiling->tile_count.height + tiling->pipe0.height - 1) / 198 tiling->pipe0.height; 199 } 200 201 /* do not exceed max pipe count */ 202 while (tiling->pipe_count.width * tiling->pipe_count.height > 203 max_pipe_count) { 204 tiling->pipe0.width += 1; 205 tiling->pipe_count.width = 206 (tiling->tile_count.width + tiling->pipe0.width - 1) / 207 tiling->pipe0.width; 208 } 209} 210 211static void 212tu_tiling_config_update_pipes(struct tu_tiling_config *tiling, 213 const struct tu_device *dev) 214{ 215 const uint32_t max_pipe_count = 32; /* A6xx */ 216 const uint32_t used_pipe_count = 217 tiling->pipe_count.width * tiling->pipe_count.height; 218 const VkExtent2D last_pipe = { 219 .width = tiling->tile_count.width % tiling->pipe0.width, 220 .height = tiling->tile_count.height % tiling->pipe0.height, 221 }; 222 223 assert(used_pipe_count <= max_pipe_count); 224 assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config)); 225 226 for (uint32_t y = 0; y < tiling->pipe_count.height; y++) { 227 for (uint32_t x = 0; x < tiling->pipe_count.width; x++) { 228 const uint32_t pipe_x = tiling->pipe0.width * x; 229 const uint32_t pipe_y = tiling->pipe0.height * y; 230 const uint32_t pipe_w = (x == tiling->pipe_count.width - 1) 231 ? last_pipe.width 232 : tiling->pipe0.width; 233 const uint32_t pipe_h = (y == tiling->pipe_count.height - 1) 234 ? last_pipe.height 235 : tiling->pipe0.height; 236 const uint32_t n = tiling->pipe_count.width * y + x; 237 238 tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) | 239 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) | 240 A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) | 241 A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h); 242 tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h); 243 } 244 } 245 246 memset(tiling->pipe_config + used_pipe_count, 0, 247 sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); 248} 249 250static void 251tu_tiling_config_update(struct tu_tiling_config *tiling, 252 const struct tu_device *dev, 253 const uint32_t *buffer_cpp, 254 uint32_t buffer_count, 255 const VkRect2D *render_area) 256{ 257 /* see if there is any real change */ 258 const bool ra_changed = 259 render_area && 260 memcmp(&tiling->render_area, render_area, sizeof(*render_area)); 261 const bool buf_changed = tiling->buffer_count != buffer_count || 262 memcmp(tiling->buffer_cpp, buffer_cpp, 263 sizeof(*buffer_cpp) * buffer_count); 264 if (!ra_changed && !buf_changed) 265 return; 266 267 if (ra_changed) 268 tiling->render_area = *render_area; 269 270 if (buf_changed) { 271 memcpy(tiling->buffer_cpp, buffer_cpp, 272 sizeof(*buffer_cpp) * buffer_count); 273 tiling->buffer_count = buffer_count; 274 } 275 276 tu_tiling_config_update_tile_layout(tiling, dev); 277 tu_tiling_config_update_pipe_layout(tiling, dev); 278 tu_tiling_config_update_pipes(tiling, dev); 279} 280 281static void 282tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, 283 const struct tu_device *dev, 284 uint32_t tx, 285 uint32_t ty, 286 struct tu_tile *tile) 287{ 288 /* find the pipe and the slot for tile (tx, ty) */ 289 const uint32_t px = tx / tiling->pipe0.width; 290 const uint32_t py = ty / tiling->pipe0.height; 291 const uint32_t sx = tx - tiling->pipe0.width * px; 292 const uint32_t sy = ty - tiling->pipe0.height * py; 293 294 assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height); 295 assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height); 296 assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height); 297 298 /* convert to 1D indices */ 299 tile->pipe = tiling->pipe_count.width * py + px; 300 tile->slot = tiling->pipe0.width * sy + sx; 301 302 /* get the blit area for the tile */ 303 tile->begin = (VkOffset2D) { 304 .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx, 305 .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty, 306 }; 307 tile->end.x = 308 (tx == tiling->tile_count.width - 1) 309 ? tiling->render_area.offset.x + tiling->render_area.extent.width 310 : tile->begin.x + tiling->tile0.extent.width; 311 tile->end.y = 312 (ty == tiling->tile_count.height - 1) 313 ? tiling->render_area.offset.y + tiling->render_area.extent.height 314 : tile->begin.y + tiling->tile0.extent.height; 315} 316 317static enum a3xx_msaa_samples 318tu6_msaa_samples(uint32_t samples) 319{ 320 switch (samples) { 321 case 1: 322 return MSAA_ONE; 323 case 2: 324 return MSAA_TWO; 325 case 4: 326 return MSAA_FOUR; 327 case 8: 328 return MSAA_EIGHT; 329 default: 330 assert(!"invalid sample count"); 331 return MSAA_ONE; 332 } 333} 334 335static enum a4xx_index_size 336tu6_index_size(VkIndexType type) 337{ 338 switch (type) { 339 case VK_INDEX_TYPE_UINT16: 340 return INDEX4_SIZE_16_BIT; 341 case VK_INDEX_TYPE_UINT32: 342 return INDEX4_SIZE_32_BIT; 343 default: 344 unreachable("invalid VkIndexType"); 345 return INDEX4_SIZE_8_BIT; 346 } 347} 348 349static void 350tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 351{ 352 tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno); 353} 354 355void 356tu6_emit_event_write(struct tu_cmd_buffer *cmd, 357 struct tu_cs *cs, 358 enum vgt_event_type event, 359 bool need_seqno) 360{ 361 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1); 362 tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event)); 363 if (need_seqno) { 364 tu_cs_emit_qw(cs, cmd->scratch_bo.iova); 365 tu_cs_emit(cs, ++cmd->scratch_seqno); 366 } 367} 368 369static void 370tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 371{ 372 tu6_emit_event_write(cmd, cs, 0x31, false); 373} 374 375static void 376tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 377{ 378 tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false); 379} 380 381static void 382tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 383{ 384 if (cmd->wait_for_idle) { 385 tu_cs_emit_wfi(cs); 386 cmd->wait_for_idle = false; 387 } 388} 389 390static void 391tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 392{ 393 const struct tu_subpass *subpass = cmd->state.subpass; 394 395 const uint32_t a = subpass->depth_stencil_attachment.attachment; 396 if (a == VK_ATTACHMENT_UNUSED) { 397 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); 398 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); 399 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ 400 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ 401 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ 402 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 403 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ 404 405 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); 406 tu_cs_emit(cs, 407 A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); 408 409 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); 410 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 411 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 412 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ 413 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ 414 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ 415 416 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1); 417 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */ 418 419 return; 420 } 421 422 /* enable zs? */ 423} 424 425static void 426tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 427{ 428 const struct tu_framebuffer *fb = cmd->state.framebuffer; 429 const struct tu_subpass *subpass = cmd->state.subpass; 430 const struct tu_tiling_config *tiling = &cmd->state.tiling_config; 431 unsigned char mrt_comp[MAX_RTS] = { 0 }; 432 unsigned srgb_cntl = 0; 433 434 uint32_t gmem_index = 0; 435 for (uint32_t i = 0; i < subpass->color_count; ++i) { 436 uint32_t a = subpass->color_attachments[i].attachment; 437 if (a == VK_ATTACHMENT_UNUSED) 438 continue; 439 440 const struct tu_image_view *iview = fb->attachments[a].attachment; 441 const struct tu_image_level *slice = 442 &iview->image->levels[iview->base_mip]; 443 const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; 444 uint32_t stride = 0; 445 uint32_t offset = 0; 446 447 mrt_comp[i] = 0xf; 448 449 if (vk_format_is_srgb(iview->vk_format)) 450 srgb_cntl |= (1 << i); 451 452 const struct tu_native_format *format = 453 tu6_get_native_format(iview->vk_format); 454 assert(format && format->rb >= 0); 455 456 offset = slice->offset + slice->size * iview->base_layer; 457 stride = slice->pitch * vk_format_get_blocksize(iview->vk_format); 458 459 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); 460 tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | 461 A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | 462 A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); 463 tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride)); 464 tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size)); 465 tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + 466 offset); /* BASE_LO/HI */ 467 tu_cs_emit( 468 cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */ 469 470 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); 471 tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb)); 472 473#if 0 474 /* when we support UBWC, these would be the system memory 475 * addr/pitch/etc: 476 */ 477 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); 478 tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ 479 tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ 480 tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); 481 tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); 482#endif 483 } 484 485 tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1); 486 tu_cs_emit(cs, srgb_cntl); 487 488 tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1); 489 tu_cs_emit(cs, srgb_cntl); 490 491 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1); 492 tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | 493 A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | 494 A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | 495 A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | 496 A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | 497 A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | 498 A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | 499 A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); 500 501 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1); 502 tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) | 503 A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) | 504 A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) | 505 A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) | 506 A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) | 507 A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) | 508 A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) | 509 A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7])); 510} 511 512static void 513tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 514{ 515 const struct tu_subpass *subpass = cmd->state.subpass; 516 const enum a3xx_msaa_samples samples = 517 tu6_msaa_samples(subpass->max_sample_count); 518 519 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); 520 tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); 521 tu_cs_emit( 522 cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | 523 ((samples == MSAA_ONE) ? A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE 524 : 0)); 525 526 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); 527 tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); 528 tu_cs_emit( 529 cs, 530 A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | 531 ((samples == MSAA_ONE) ? A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); 532 533 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2); 534 tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); 535 tu_cs_emit( 536 cs, 537 A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | 538 ((samples == MSAA_ONE) ? A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); 539 540 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1); 541 tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); 542} 543 544static void 545tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags) 546{ 547 const struct tu_tiling_config *tiling = &cmd->state.tiling_config; 548 const uint32_t bin_w = tiling->tile0.extent.width; 549 const uint32_t bin_h = tiling->tile0.extent.height; 550 551 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1); 552 tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) | 553 A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags); 554 555 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1); 556 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) | 557 A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags); 558 559 /* no flag for RB_BIN_CONTROL2... */ 560 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1); 561 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) | 562 A6XX_RB_BIN_CONTROL2_BINH(bin_h)); 563} 564 565static void 566tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, 567 struct tu_cs *cs, 568 bool binning) 569{ 570 uint32_t cntl = 0; 571 cntl |= A6XX_RB_RENDER_CNTL_UNK4; 572 if (binning) 573 cntl |= A6XX_RB_RENDER_CNTL_BINNING; 574 575 tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); 576 tu_cs_emit(cs, 0x2); 577 tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL); 578 tu_cs_emit(cs, cntl); 579} 580 581static void 582tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 583{ 584 const VkRect2D *render_area = &cmd->state.tiling_config.render_area; 585 const uint32_t x1 = render_area->offset.x; 586 const uint32_t y1 = render_area->offset.y; 587 const uint32_t x2 = x1 + render_area->extent.width - 1; 588 const uint32_t y2 = y1 + render_area->extent.height - 1; 589 590 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); 591 tu_cs_emit(cs, 592 A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); 593 tu_cs_emit(cs, 594 A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); 595} 596 597static void 598tu6_emit_blit_info(struct tu_cmd_buffer *cmd, 599 struct tu_cs *cs, 600 const struct tu_image_view *iview, 601 uint32_t gmem_offset, 602 uint32_t blit_info) 603{ 604 const struct tu_image_level *slice = 605 &iview->image->levels[iview->base_mip]; 606 const uint32_t offset = slice->offset + slice->size * iview->base_layer; 607 const uint32_t stride = 608 slice->pitch * vk_format_get_blocksize(iview->vk_format); 609 const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; 610 const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); 611 612 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); 613 tu_cs_emit(cs, blit_info); 614 615 /* tile mode? */ 616 const struct tu_native_format *format = 617 tu6_get_native_format(iview->vk_format); 618 assert(format && format->rb >= 0); 619 620 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5); 621 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | 622 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | 623 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | 624 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap)); 625 tu_cs_emit_qw(cs, 626 iview->image->bo->iova + iview->image->bo_offset + offset); 627 tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride)); 628 tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size)); 629 630 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 631 tu_cs_emit(cs, gmem_offset); 632} 633 634static void 635tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, 636 struct tu_cs *cs, 637 const struct tu_image_view *iview, 638 uint32_t gmem_offset, 639 const VkClearValue *clear_value) 640{ 641 const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; 642 const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); 643 644 const struct tu_native_format *format = 645 tu6_get_native_format(iview->vk_format); 646 assert(format && format->rb >= 0); 647 /* must be WZYX; other values are ignored */ 648 const enum a3xx_color_swap swap = WZYX; 649 650 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); 651 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | 652 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | 653 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | 654 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); 655 656 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); 657 tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); 658 659 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 660 tu_cs_emit(cs, gmem_offset); 661 662 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); 663 tu_cs_emit(cs, 0); 664 665 /* pack clear_value into WZYX order */ 666 uint32_t clear_vals[4] = { 0 }; 667 tu_pack_clear_value(clear_value, iview->vk_format, clear_vals); 668 669 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); 670 tu_cs_emit(cs, clear_vals[0]); 671 tu_cs_emit(cs, clear_vals[1]); 672 tu_cs_emit(cs, clear_vals[2]); 673 tu_cs_emit(cs, clear_vals[3]); 674} 675 676static void 677tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 678{ 679 tu6_emit_marker(cmd, cs); 680 tu6_emit_event_write(cmd, cs, BLIT, false); 681 tu6_emit_marker(cmd, cs); 682} 683 684static void 685tu6_emit_window_scissor(struct tu_cmd_buffer *cmd, 686 struct tu_cs *cs, 687 uint32_t x1, 688 uint32_t y1, 689 uint32_t x2, 690 uint32_t y2) 691{ 692 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 693 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | 694 A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); 695 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | 696 A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); 697 698 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2); 699 tu_cs_emit( 700 cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1)); 701 tu_cs_emit( 702 cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2)); 703} 704 705static void 706tu6_emit_window_offset(struct tu_cmd_buffer *cmd, 707 struct tu_cs *cs, 708 uint32_t x1, 709 uint32_t y1) 710{ 711 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1); 712 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1)); 713 714 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1); 715 tu_cs_emit(cs, 716 A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); 717 718 tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1); 719 tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); 720 721 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); 722 tu_cs_emit( 723 cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1)); 724} 725 726static void 727tu6_emit_tile_select(struct tu_cmd_buffer *cmd, 728 struct tu_cs *cs, 729 const struct tu_tile *tile) 730{ 731 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); 732 tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(0x7)); 733 734 tu6_emit_marker(cmd, cs); 735 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); 736 tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); 737 tu6_emit_marker(cmd, cs); 738 739 const uint32_t x1 = tile->begin.x; 740 const uint32_t y1 = tile->begin.y; 741 const uint32_t x2 = tile->end.x - 1; 742 const uint32_t y2 = tile->end.y - 1; 743 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2); 744 tu6_emit_window_offset(cmd, cs, x1, y1); 745 746 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1); 747 tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); 748 749 if (false) { 750 /* hw binning? */ 751 } else { 752 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1); 753 tu_cs_emit(cs, 0x1); 754 755 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); 756 tu_cs_emit(cs, 0x0); 757 } 758} 759 760static void 761tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 762{ 763 const struct tu_framebuffer *fb = cmd->state.framebuffer; 764 const struct tu_subpass *subpass = cmd->state.subpass; 765 const struct tu_tiling_config *tiling = &cmd->state.tiling_config; 766 const struct tu_attachment_state *attachments = cmd->state.attachments; 767 768 tu6_emit_blit_scissor(cmd, cs); 769 770 uint32_t gmem_index = 0; 771 for (uint32_t i = 0; i < subpass->color_count; ++i) { 772 const uint32_t a = subpass->color_attachments[i].attachment; 773 if (a == VK_ATTACHMENT_UNUSED) 774 continue; 775 776 const struct tu_image_view *iview = fb->attachments[a].attachment; 777 const struct tu_attachment_state *att = attachments + a; 778 if (att->pending_clear_aspects) { 779 assert(att->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); 780 tu6_emit_blit_clear(cmd, cs, iview, 781 tiling->gmem_offsets[gmem_index++], 782 &att->clear_value); 783 } else { 784 tu6_emit_blit_info(cmd, cs, iview, 785 tiling->gmem_offsets[gmem_index++], 786 A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); 787 } 788 789 tu6_emit_blit(cmd, cs); 790 } 791 792 /* load/clear zs? */ 793} 794 795static void 796tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 797{ 798 const struct tu_framebuffer *fb = cmd->state.framebuffer; 799 const struct tu_tiling_config *tiling = &cmd->state.tiling_config; 800 801 if (false) { 802 /* hw binning? */ 803 } 804 805 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); 806 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | 807 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | 808 CP_SET_DRAW_STATE__0_GROUP_ID(0)); 809 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); 810 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); 811 812 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 813 tu_cs_emit(cs, 0x0); 814 815 tu6_emit_marker(cmd, cs); 816 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); 817 tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); 818 tu6_emit_marker(cmd, cs); 819 820 tu6_emit_blit_scissor(cmd, cs); 821 822 uint32_t gmem_index = 0; 823 for (uint32_t i = 0; i < cmd->state.subpass->color_count; ++i) { 824 uint32_t a = cmd->state.subpass->color_attachments[i].attachment; 825 if (a == VK_ATTACHMENT_UNUSED) 826 continue; 827 828 const struct tu_image_view *iview = fb->attachments[a].attachment; 829 tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[gmem_index++], 830 0); 831 tu6_emit_blit(cmd, cs); 832 } 833} 834 835static void 836tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index) 837{ 838 tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1); 839 tu_cs_emit(cs, restart_index); 840} 841 842static void 843tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 844{ 845 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); 846 if (result != VK_SUCCESS) { 847 cmd->record_result = result; 848 return; 849 } 850 851 tu6_emit_cache_flush(cmd, cs); 852 853 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); 854 855 tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004); 856 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); 857 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8); 858 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0); 859 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f); 860 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44); 861 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000); 862 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); 863 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0); 864 865 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0); 866 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880); 867 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0); 868 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); 869 tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0); 870 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0); 871 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0); 872 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); 873 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4); 874 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0); 875 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5); 876 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); 877 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010); 878 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f); 879 880 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0); 881 882 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0); 883 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0); 884 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0); 885 886 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401); 887 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0); 888 tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); 889 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0); 890 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0); 891 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0); 892 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0); 893 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0); 894 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0); 895 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0); 896 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); 897 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); 898 899 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00); 900 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0); 901 902 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1); 903 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0); 904 905 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE, 906 A6XX_VPC_SO_OVERRIDE_SO_DISABLE); 907 908 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0); 909 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); 910 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0); 911 912 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); 913 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); 914 915 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0); 916 917 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); 918 919 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0); 920 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0); 921 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2); 922 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); 923 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); 924 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); 925 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0); 926 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3); 927 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0); 928 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3); 929 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0); 930 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); 931 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0); 932 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0); 933 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0); 934 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0); 935 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0); 936 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0); 937 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0); 938 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0); 939 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); 940 941 tu6_emit_marker(cmd, cs); 942 943 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000); 944 945 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); 946 947 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f); 948 949 /* we don't use this yet.. probably best to disable.. */ 950 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); 951 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | 952 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | 953 CP_SET_DRAW_STATE__0_GROUP_ID(0)); 954 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); 955 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); 956 957 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3); 958 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ 959 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ 960 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ 961 962 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2); 963 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ 964 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ 965 966 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1); 967 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */ 968 969 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1); 970 tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */ 971 972 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3); 973 tu_cs_emit(cs, 0x00000000); 974 tu_cs_emit(cs, 0x00000000); 975 tu_cs_emit(cs, 0x00000000); 976 977 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6); 978 tu_cs_emit(cs, 0x00000000); 979 tu_cs_emit(cs, 0x00000000); 980 tu_cs_emit(cs, 0x00000000); 981 tu_cs_emit(cs, 0x00000000); 982 tu_cs_emit(cs, 0x00000000); 983 tu_cs_emit(cs, 0x00000000); 984 985 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6); 986 tu_cs_emit(cs, 0x00000000); 987 tu_cs_emit(cs, 0x00000000); 988 tu_cs_emit(cs, 0x00000000); 989 tu_cs_emit(cs, 0x00000000); 990 tu_cs_emit(cs, 0x00000000); 991 tu_cs_emit(cs, 0x00000000); 992 993 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3); 994 tu_cs_emit(cs, 0x00000000); 995 tu_cs_emit(cs, 0x00000000); 996 tu_cs_emit(cs, 0x00000000); 997 998 tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1); 999 tu_cs_emit(cs, 0x00000000); 1000 1001 tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1); 1002 tu_cs_emit(cs, 0x00000000); 1003 1004 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); 1005 tu_cs_emit(cs, 0x00000000); 1006 1007 tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1); 1008 tu_cs_emit(cs, 0x00000000); 1009 1010 tu_cs_sanity_check(cs); 1011} 1012 1013static void 1014tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 1015{ 1016 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); 1017 if (result != VK_SUCCESS) { 1018 cmd->record_result = result; 1019 return; 1020 } 1021 1022 tu6_emit_lrz_flush(cmd, cs); 1023 1024 /* lrz clear? */ 1025 1026 tu6_emit_cache_flush(cmd, cs); 1027 1028 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 1029 tu_cs_emit(cs, 0x0); 1030 1031 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ 1032 tu6_emit_wfi(cmd, cs); 1033 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1); 1034 tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */ 1035 1036 tu6_emit_zs(cmd, cs); 1037 tu6_emit_mrt(cmd, cs); 1038 tu6_emit_msaa(cmd, cs); 1039 1040 if (false) { 1041 /* hw binning? */ 1042 } else { 1043 tu6_emit_bin_size(cmd, cs, 0x6000000); 1044 /* no draws */ 1045 } 1046 1047 tu6_emit_render_cntl(cmd, cs, false); 1048 1049 tu_cs_sanity_check(cs); 1050} 1051 1052static void 1053tu6_render_tile(struct tu_cmd_buffer *cmd, 1054 struct tu_cs *cs, 1055 const struct tu_tile *tile) 1056{ 1057 const uint32_t render_tile_space = 64 + tu_cs_get_call_size(&cmd->draw_cs); 1058 VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space); 1059 if (result != VK_SUCCESS) { 1060 cmd->record_result = result; 1061 return; 1062 } 1063 1064 tu6_emit_tile_select(cmd, cs, tile); 1065 tu_cs_emit_ib(cs, &cmd->state.tile_load_ib); 1066 1067 tu_cs_emit_call(cs, &cmd->draw_cs); 1068 cmd->wait_for_idle = true; 1069 1070 tu_cs_emit_ib(cs, &cmd->state.tile_store_ib); 1071 1072 tu_cs_sanity_check(cs); 1073} 1074 1075static void 1076tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 1077{ 1078 VkResult result = tu_cs_reserve_space(cmd->device, cs, 16); 1079 if (result != VK_SUCCESS) { 1080 cmd->record_result = result; 1081 return; 1082 } 1083 1084 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); 1085 tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3); 1086 1087 tu6_emit_lrz_flush(cmd, cs); 1088 1089 tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true); 1090 1091 tu_cs_sanity_check(cs); 1092} 1093 1094static void 1095tu_cmd_render_tiles(struct tu_cmd_buffer *cmd) 1096{ 1097 const struct tu_tiling_config *tiling = &cmd->state.tiling_config; 1098 1099 tu6_render_begin(cmd, &cmd->cs); 1100 1101 for (uint32_t y = 0; y < tiling->tile_count.height; y++) { 1102 for (uint32_t x = 0; x < tiling->tile_count.width; x++) { 1103 struct tu_tile tile; 1104 tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile); 1105 tu6_render_tile(cmd, &cmd->cs, &tile); 1106 } 1107 } 1108 1109 tu6_render_end(cmd, &cmd->cs); 1110} 1111 1112static void 1113tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd) 1114{ 1115 const uint32_t tile_load_space = 16 + 32 * MAX_RTS; 1116 const struct tu_subpass *subpass = cmd->state.subpass; 1117 struct tu_attachment_state *attachments = cmd->state.attachments; 1118 struct tu_cs sub_cs; 1119 1120 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, 1121 tile_load_space, &sub_cs); 1122 if (result != VK_SUCCESS) { 1123 cmd->record_result = result; 1124 return; 1125 } 1126 1127 /* emit to tile-load sub_cs */ 1128 tu6_emit_tile_load(cmd, &sub_cs); 1129 1130 cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); 1131 1132 for (uint32_t i = 0; i < subpass->color_count; ++i) { 1133 const uint32_t a = subpass->color_attachments[i].attachment; 1134 if (a != VK_ATTACHMENT_UNUSED) 1135 attachments[a].pending_clear_aspects = 0; 1136 } 1137} 1138 1139static void 1140tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd) 1141{ 1142 const uint32_t tile_store_space = 32 + 32 * MAX_RTS; 1143 struct tu_cs sub_cs; 1144 1145 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, 1146 tile_store_space, &sub_cs); 1147 if (result != VK_SUCCESS) { 1148 cmd->record_result = result; 1149 return; 1150 } 1151 1152 /* emit to tile-store sub_cs */ 1153 tu6_emit_tile_store(cmd, &sub_cs); 1154 1155 cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); 1156} 1157 1158static void 1159tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, 1160 const VkRect2D *render_area) 1161{ 1162 const struct tu_device *dev = cmd->device; 1163 const struct tu_render_pass *pass = cmd->state.pass; 1164 const struct tu_subpass *subpass = cmd->state.subpass; 1165 struct tu_tiling_config *tiling = &cmd->state.tiling_config; 1166 1167 uint32_t buffer_cpp[MAX_RTS + 2]; 1168 uint32_t buffer_count = 0; 1169 1170 for (uint32_t i = 0; i < subpass->color_count; ++i) { 1171 const uint32_t a = subpass->color_attachments[i].attachment; 1172 if (a == VK_ATTACHMENT_UNUSED) 1173 continue; 1174 1175 const struct tu_render_pass_attachment *att = &pass->attachments[a]; 1176 buffer_cpp[buffer_count++] = 1177 vk_format_get_blocksize(att->format) * att->samples; 1178 } 1179 1180 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 1181 const uint32_t a = subpass->depth_stencil_attachment.attachment; 1182 const struct tu_render_pass_attachment *att = &pass->attachments[a]; 1183 1184 /* TODO */ 1185 assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT); 1186 1187 buffer_cpp[buffer_count++] = 1188 vk_format_get_blocksize(att->format) * att->samples; 1189 } 1190 1191 tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count, 1192 render_area); 1193} 1194 1195const struct tu_dynamic_state default_dynamic_state = { 1196 .viewport = 1197 { 1198 .count = 0, 1199 }, 1200 .scissor = 1201 { 1202 .count = 0, 1203 }, 1204 .line_width = 1.0f, 1205 .depth_bias = 1206 { 1207 .bias = 0.0f, 1208 .clamp = 0.0f, 1209 .slope = 0.0f, 1210 }, 1211 .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, 1212 .depth_bounds = 1213 { 1214 .min = 0.0f, 1215 .max = 1.0f, 1216 }, 1217 .stencil_compare_mask = 1218 { 1219 .front = ~0u, 1220 .back = ~0u, 1221 }, 1222 .stencil_write_mask = 1223 { 1224 .front = ~0u, 1225 .back = ~0u, 1226 }, 1227 .stencil_reference = 1228 { 1229 .front = 0u, 1230 .back = 0u, 1231 }, 1232}; 1233 1234static void UNUSED /* FINISHME */ 1235tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer, 1236 const struct tu_dynamic_state *src) 1237{ 1238 struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic; 1239 uint32_t copy_mask = src->mask; 1240 uint32_t dest_mask = 0; 1241 1242 tu_use_args(cmd_buffer); /* FINISHME */ 1243 1244 /* Make sure to copy the number of viewports/scissors because they can 1245 * only be specified at pipeline creation time. 1246 */ 1247 dest->viewport.count = src->viewport.count; 1248 dest->scissor.count = src->scissor.count; 1249 dest->discard_rectangle.count = src->discard_rectangle.count; 1250 1251 if (copy_mask & TU_DYNAMIC_VIEWPORT) { 1252 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 1253 src->viewport.count * sizeof(VkViewport))) { 1254 typed_memcpy(dest->viewport.viewports, src->viewport.viewports, 1255 src->viewport.count); 1256 dest_mask |= TU_DYNAMIC_VIEWPORT; 1257 } 1258 } 1259 1260 if (copy_mask & TU_DYNAMIC_SCISSOR) { 1261 if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 1262 src->scissor.count * sizeof(VkRect2D))) { 1263 typed_memcpy(dest->scissor.scissors, src->scissor.scissors, 1264 src->scissor.count); 1265 dest_mask |= TU_DYNAMIC_SCISSOR; 1266 } 1267 } 1268 1269 if (copy_mask & TU_DYNAMIC_LINE_WIDTH) { 1270 if (dest->line_width != src->line_width) { 1271 dest->line_width = src->line_width; 1272 dest_mask |= TU_DYNAMIC_LINE_WIDTH; 1273 } 1274 } 1275 1276 if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) { 1277 if (memcmp(&dest->depth_bias, &src->depth_bias, 1278 sizeof(src->depth_bias))) { 1279 dest->depth_bias = src->depth_bias; 1280 dest_mask |= TU_DYNAMIC_DEPTH_BIAS; 1281 } 1282 } 1283 1284 if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) { 1285 if (memcmp(&dest->blend_constants, &src->blend_constants, 1286 sizeof(src->blend_constants))) { 1287 typed_memcpy(dest->blend_constants, src->blend_constants, 4); 1288 dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS; 1289 } 1290 } 1291 1292 if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) { 1293 if (memcmp(&dest->depth_bounds, &src->depth_bounds, 1294 sizeof(src->depth_bounds))) { 1295 dest->depth_bounds = src->depth_bounds; 1296 dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS; 1297 } 1298 } 1299 1300 if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) { 1301 if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, 1302 sizeof(src->stencil_compare_mask))) { 1303 dest->stencil_compare_mask = src->stencil_compare_mask; 1304 dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK; 1305 } 1306 } 1307 1308 if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) { 1309 if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 1310 sizeof(src->stencil_write_mask))) { 1311 dest->stencil_write_mask = src->stencil_write_mask; 1312 dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK; 1313 } 1314 } 1315 1316 if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) { 1317 if (memcmp(&dest->stencil_reference, &src->stencil_reference, 1318 sizeof(src->stencil_reference))) { 1319 dest->stencil_reference = src->stencil_reference; 1320 dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE; 1321 } 1322 } 1323 1324 if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) { 1325 if (memcmp(&dest->discard_rectangle.rectangles, 1326 &src->discard_rectangle.rectangles, 1327 src->discard_rectangle.count * sizeof(VkRect2D))) { 1328 typed_memcpy(dest->discard_rectangle.rectangles, 1329 src->discard_rectangle.rectangles, 1330 src->discard_rectangle.count); 1331 dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE; 1332 } 1333 } 1334} 1335 1336static VkResult 1337tu_create_cmd_buffer(struct tu_device *device, 1338 struct tu_cmd_pool *pool, 1339 VkCommandBufferLevel level, 1340 VkCommandBuffer *pCommandBuffer) 1341{ 1342 struct tu_cmd_buffer *cmd_buffer; 1343 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, 1344 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1345 if (cmd_buffer == NULL) 1346 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1347 1348 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1349 cmd_buffer->device = device; 1350 cmd_buffer->pool = pool; 1351 cmd_buffer->level = level; 1352 1353 if (pool) { 1354 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 1355 cmd_buffer->queue_family_index = pool->queue_family_index; 1356 1357 } else { 1358 /* Init the pool_link so we can safely call list_del when we destroy 1359 * the command buffer 1360 */ 1361 list_inithead(&cmd_buffer->pool_link); 1362 cmd_buffer->queue_family_index = TU_QUEUE_GENERAL; 1363 } 1364 1365 tu_bo_list_init(&cmd_buffer->bo_list); 1366 tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096); 1367 tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096); 1368 tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024); 1369 1370 *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer); 1371 1372 list_inithead(&cmd_buffer->upload.list); 1373 1374 cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG( 1375 cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6); 1376 1377 VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000); 1378 if (result != VK_SUCCESS) 1379 return result; 1380 1381 return VK_SUCCESS; 1382} 1383 1384static void 1385tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) 1386{ 1387 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo); 1388 1389 list_del(&cmd_buffer->pool_link); 1390 1391 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) 1392 free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); 1393 1394 tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs); 1395 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs); 1396 tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs); 1397 1398 tu_bo_list_destroy(&cmd_buffer->bo_list); 1399 vk_free(&cmd_buffer->pool->alloc, cmd_buffer); 1400} 1401 1402static VkResult 1403tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) 1404{ 1405 cmd_buffer->wait_for_idle = true; 1406 1407 cmd_buffer->record_result = VK_SUCCESS; 1408 1409 tu_bo_list_reset(&cmd_buffer->bo_list); 1410 tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs); 1411 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs); 1412 tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs); 1413 1414 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { 1415 cmd_buffer->descriptors[i].dirty = 0; 1416 cmd_buffer->descriptors[i].valid = 0; 1417 cmd_buffer->descriptors[i].push_dirty = false; 1418 } 1419 1420 cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL; 1421 1422 return cmd_buffer->record_result; 1423} 1424 1425static VkResult 1426tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer, 1427 const VkRenderPassBeginInfo *info) 1428{ 1429 struct tu_cmd_state *state = &cmd_buffer->state; 1430 const struct tu_framebuffer *fb = state->framebuffer; 1431 const struct tu_render_pass *pass = state->pass; 1432 1433 for (uint32_t i = 0; i < fb->attachment_count; ++i) { 1434 const struct tu_image_view *iview = fb->attachments[i].attachment; 1435 tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo, 1436 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); 1437 } 1438 1439 if (pass->attachment_count == 0) { 1440 state->attachments = NULL; 1441 return VK_SUCCESS; 1442 } 1443 1444 state->attachments = 1445 vk_alloc(&cmd_buffer->pool->alloc, 1446 pass->attachment_count * sizeof(state->attachments[0]), 8, 1447 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1448 if (state->attachments == NULL) { 1449 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 1450 return cmd_buffer->record_result; 1451 } 1452 1453 for (uint32_t i = 0; i < pass->attachment_count; ++i) { 1454 const struct tu_render_pass_attachment *att = &pass->attachments[i]; 1455 VkImageAspectFlags att_aspects = vk_format_aspects(att->format); 1456 VkImageAspectFlags clear_aspects = 0; 1457 1458 if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { 1459 /* color attachment */ 1460 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 1461 clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; 1462 } 1463 } else { 1464 /* depthstencil attachment */ 1465 if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 1466 att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 1467 clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 1468 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 1469 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 1470 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 1471 } 1472 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 1473 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 1474 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 1475 } 1476 } 1477 1478 state->attachments[i].pending_clear_aspects = clear_aspects; 1479 state->attachments[i].cleared_views = 0; 1480 if (clear_aspects && info) { 1481 assert(info->clearValueCount > i); 1482 state->attachments[i].clear_value = info->pClearValues[i]; 1483 } 1484 1485 state->attachments[i].current_layout = att->initial_layout; 1486 } 1487 1488 return VK_SUCCESS; 1489} 1490 1491VkResult 1492tu_AllocateCommandBuffers(VkDevice _device, 1493 const VkCommandBufferAllocateInfo *pAllocateInfo, 1494 VkCommandBuffer *pCommandBuffers) 1495{ 1496 TU_FROM_HANDLE(tu_device, device, _device); 1497 TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool); 1498 1499 VkResult result = VK_SUCCESS; 1500 uint32_t i; 1501 1502 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 1503 1504 if (!list_empty(&pool->free_cmd_buffers)) { 1505 struct tu_cmd_buffer *cmd_buffer = list_first_entry( 1506 &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link); 1507 1508 list_del(&cmd_buffer->pool_link); 1509 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 1510 1511 result = tu_reset_cmd_buffer(cmd_buffer); 1512 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 1513 cmd_buffer->level = pAllocateInfo->level; 1514 1515 pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer); 1516 } else { 1517 result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level, 1518 &pCommandBuffers[i]); 1519 } 1520 if (result != VK_SUCCESS) 1521 break; 1522 } 1523 1524 if (result != VK_SUCCESS) { 1525 tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, 1526 pCommandBuffers); 1527 1528 /* From the Vulkan 1.0.66 spec: 1529 * 1530 * "vkAllocateCommandBuffers can be used to create multiple 1531 * command buffers. If the creation of any of those command 1532 * buffers fails, the implementation must destroy all 1533 * successfully created command buffer objects from this 1534 * command, set all entries of the pCommandBuffers array to 1535 * NULL and return the error." 1536 */ 1537 memset(pCommandBuffers, 0, 1538 sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); 1539 } 1540 1541 return result; 1542} 1543 1544void 1545tu_FreeCommandBuffers(VkDevice device, 1546 VkCommandPool commandPool, 1547 uint32_t commandBufferCount, 1548 const VkCommandBuffer *pCommandBuffers) 1549{ 1550 for (uint32_t i = 0; i < commandBufferCount; i++) { 1551 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]); 1552 1553 if (cmd_buffer) { 1554 if (cmd_buffer->pool) { 1555 list_del(&cmd_buffer->pool_link); 1556 list_addtail(&cmd_buffer->pool_link, 1557 &cmd_buffer->pool->free_cmd_buffers); 1558 } else 1559 tu_cmd_buffer_destroy(cmd_buffer); 1560 } 1561 } 1562} 1563 1564VkResult 1565tu_ResetCommandBuffer(VkCommandBuffer commandBuffer, 1566 VkCommandBufferResetFlags flags) 1567{ 1568 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 1569 return tu_reset_cmd_buffer(cmd_buffer); 1570} 1571 1572VkResult 1573tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, 1574 const VkCommandBufferBeginInfo *pBeginInfo) 1575{ 1576 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 1577 VkResult result = VK_SUCCESS; 1578 1579 if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) { 1580 /* If the command buffer has already been resetted with 1581 * vkResetCommandBuffer, no need to do it again. 1582 */ 1583 result = tu_reset_cmd_buffer(cmd_buffer); 1584 if (result != VK_SUCCESS) 1585 return result; 1586 } 1587 1588 memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); 1589 cmd_buffer->usage_flags = pBeginInfo->flags; 1590 1591 tu_cs_begin(&cmd_buffer->cs); 1592 1593 cmd_buffer->marker_seqno = 0; 1594 cmd_buffer->scratch_seqno = 0; 1595 1596 /* setup initial configuration into command buffer */ 1597 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 1598 switch (cmd_buffer->queue_family_index) { 1599 case TU_QUEUE_GENERAL: 1600 tu6_init_hw(cmd_buffer, &cmd_buffer->cs); 1601 break; 1602 default: 1603 break; 1604 } 1605 } 1606 1607 cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING; 1608 1609 return VK_SUCCESS; 1610} 1611 1612void 1613tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, 1614 uint32_t firstBinding, 1615 uint32_t bindingCount, 1616 const VkBuffer *pBuffers, 1617 const VkDeviceSize *pOffsets) 1618{ 1619 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1620 1621 assert(firstBinding + bindingCount <= MAX_VBS); 1622 1623 for (uint32_t i = 0; i < bindingCount; i++) { 1624 cmd->state.vb.buffers[firstBinding + i] = 1625 tu_buffer_from_handle(pBuffers[i]); 1626 cmd->state.vb.offsets[firstBinding + i] = pOffsets[i]; 1627 } 1628 1629 /* VB states depend on VkPipelineVertexInputStateCreateInfo */ 1630 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; 1631} 1632 1633void 1634tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, 1635 VkBuffer buffer, 1636 VkDeviceSize offset, 1637 VkIndexType indexType) 1638{ 1639 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1640 TU_FROM_HANDLE(tu_buffer, buf, buffer); 1641 1642 /* initialize/update the restart index */ 1643 if (!cmd->state.index_buffer || cmd->state.index_type != indexType) { 1644 struct tu_cs *draw_cs = &cmd->draw_cs; 1645 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2); 1646 if (result != VK_SUCCESS) { 1647 cmd->record_result = result; 1648 return; 1649 } 1650 1651 tu6_emit_restart_index( 1652 draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff); 1653 1654 tu_cs_sanity_check(draw_cs); 1655 } 1656 1657 /* track the BO */ 1658 if (cmd->state.index_buffer != buf) 1659 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); 1660 1661 cmd->state.index_buffer = buf; 1662 cmd->state.index_offset = offset; 1663 cmd->state.index_type = indexType; 1664} 1665 1666void 1667tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, 1668 VkPipelineBindPoint pipelineBindPoint, 1669 VkPipelineLayout _layout, 1670 uint32_t firstSet, 1671 uint32_t descriptorSetCount, 1672 const VkDescriptorSet *pDescriptorSets, 1673 uint32_t dynamicOffsetCount, 1674 const uint32_t *pDynamicOffsets) 1675{ 1676} 1677 1678void 1679tu_CmdPushConstants(VkCommandBuffer commandBuffer, 1680 VkPipelineLayout layout, 1681 VkShaderStageFlags stageFlags, 1682 uint32_t offset, 1683 uint32_t size, 1684 const void *pValues) 1685{ 1686} 1687 1688VkResult 1689tu_EndCommandBuffer(VkCommandBuffer commandBuffer) 1690{ 1691 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 1692 1693 if (cmd_buffer->scratch_seqno) { 1694 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo, 1695 MSM_SUBMIT_BO_WRITE); 1696 } 1697 1698 for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) { 1699 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i], 1700 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); 1701 } 1702 1703 for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) { 1704 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i], 1705 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); 1706 } 1707 1708 tu_cs_end(&cmd_buffer->cs); 1709 1710 assert(!cmd_buffer->state.attachments); 1711 1712 cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; 1713 1714 return cmd_buffer->record_result; 1715} 1716 1717void 1718tu_CmdBindPipeline(VkCommandBuffer commandBuffer, 1719 VkPipelineBindPoint pipelineBindPoint, 1720 VkPipeline _pipeline) 1721{ 1722 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1723 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); 1724 1725 switch (pipelineBindPoint) { 1726 case VK_PIPELINE_BIND_POINT_GRAPHICS: 1727 cmd->state.pipeline = pipeline; 1728 cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE; 1729 break; 1730 case VK_PIPELINE_BIND_POINT_COMPUTE: 1731 tu_finishme("binding compute pipeline"); 1732 break; 1733 default: 1734 unreachable("unrecognized pipeline bind point"); 1735 break; 1736 } 1737} 1738 1739void 1740tu_CmdSetViewport(VkCommandBuffer commandBuffer, 1741 uint32_t firstViewport, 1742 uint32_t viewportCount, 1743 const VkViewport *pViewports) 1744{ 1745 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1746 struct tu_cs *draw_cs = &cmd->draw_cs; 1747 1748 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12); 1749 if (result != VK_SUCCESS) { 1750 cmd->record_result = result; 1751 return; 1752 } 1753 1754 assert(firstViewport == 0 && viewportCount == 1); 1755 tu6_emit_viewport(draw_cs, pViewports); 1756 1757 tu_cs_sanity_check(draw_cs); 1758} 1759 1760void 1761tu_CmdSetScissor(VkCommandBuffer commandBuffer, 1762 uint32_t firstScissor, 1763 uint32_t scissorCount, 1764 const VkRect2D *pScissors) 1765{ 1766 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1767 struct tu_cs *draw_cs = &cmd->draw_cs; 1768 1769 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3); 1770 if (result != VK_SUCCESS) { 1771 cmd->record_result = result; 1772 return; 1773 } 1774 1775 assert(firstScissor == 0 && scissorCount == 1); 1776 tu6_emit_scissor(draw_cs, pScissors); 1777 1778 tu_cs_sanity_check(draw_cs); 1779} 1780 1781void 1782tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) 1783{ 1784 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1785 1786 cmd->state.dynamic.line_width = lineWidth; 1787 1788 /* line width depends on VkPipelineRasterizationStateCreateInfo */ 1789 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH; 1790} 1791 1792void 1793tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, 1794 float depthBiasConstantFactor, 1795 float depthBiasClamp, 1796 float depthBiasSlopeFactor) 1797{ 1798 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1799 struct tu_cs *draw_cs = &cmd->draw_cs; 1800 1801 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4); 1802 if (result != VK_SUCCESS) { 1803 cmd->record_result = result; 1804 return; 1805 } 1806 1807 tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, 1808 depthBiasSlopeFactor); 1809 1810 tu_cs_sanity_check(draw_cs); 1811} 1812 1813void 1814tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, 1815 const float blendConstants[4]) 1816{ 1817 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1818 struct tu_cs *draw_cs = &cmd->draw_cs; 1819 1820 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5); 1821 if (result != VK_SUCCESS) { 1822 cmd->record_result = result; 1823 return; 1824 } 1825 1826 tu6_emit_blend_constants(draw_cs, blendConstants); 1827 1828 tu_cs_sanity_check(draw_cs); 1829} 1830 1831void 1832tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer, 1833 float minDepthBounds, 1834 float maxDepthBounds) 1835{ 1836} 1837 1838void 1839tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, 1840 VkStencilFaceFlags faceMask, 1841 uint32_t compareMask) 1842{ 1843 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1844 1845 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 1846 cmd->state.dynamic.stencil_compare_mask.front = compareMask; 1847 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 1848 cmd->state.dynamic.stencil_compare_mask.back = compareMask; 1849 1850 /* the front/back compare masks must be updated together */ 1851 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; 1852} 1853 1854void 1855tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, 1856 VkStencilFaceFlags faceMask, 1857 uint32_t writeMask) 1858{ 1859 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1860 1861 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 1862 cmd->state.dynamic.stencil_write_mask.front = writeMask; 1863 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 1864 cmd->state.dynamic.stencil_write_mask.back = writeMask; 1865 1866 /* the front/back write masks must be updated together */ 1867 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; 1868} 1869 1870void 1871tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, 1872 VkStencilFaceFlags faceMask, 1873 uint32_t reference) 1874{ 1875 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1876 1877 if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 1878 cmd->state.dynamic.stencil_reference.front = reference; 1879 if (faceMask & VK_STENCIL_FACE_BACK_BIT) 1880 cmd->state.dynamic.stencil_reference.back = reference; 1881 1882 /* the front/back references must be updated together */ 1883 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; 1884} 1885 1886void 1887tu_CmdExecuteCommands(VkCommandBuffer commandBuffer, 1888 uint32_t commandBufferCount, 1889 const VkCommandBuffer *pCmdBuffers) 1890{ 1891} 1892 1893VkResult 1894tu_CreateCommandPool(VkDevice _device, 1895 const VkCommandPoolCreateInfo *pCreateInfo, 1896 const VkAllocationCallbacks *pAllocator, 1897 VkCommandPool *pCmdPool) 1898{ 1899 TU_FROM_HANDLE(tu_device, device, _device); 1900 struct tu_cmd_pool *pool; 1901 1902 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, 1903 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1904 if (pool == NULL) 1905 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1906 1907 if (pAllocator) 1908 pool->alloc = *pAllocator; 1909 else 1910 pool->alloc = device->alloc; 1911 1912 list_inithead(&pool->cmd_buffers); 1913 list_inithead(&pool->free_cmd_buffers); 1914 1915 pool->queue_family_index = pCreateInfo->queueFamilyIndex; 1916 1917 *pCmdPool = tu_cmd_pool_to_handle(pool); 1918 1919 return VK_SUCCESS; 1920} 1921 1922void 1923tu_DestroyCommandPool(VkDevice _device, 1924 VkCommandPool commandPool, 1925 const VkAllocationCallbacks *pAllocator) 1926{ 1927 TU_FROM_HANDLE(tu_device, device, _device); 1928 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); 1929 1930 if (!pool) 1931 return; 1932 1933 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, 1934 &pool->cmd_buffers, pool_link) 1935 { 1936 tu_cmd_buffer_destroy(cmd_buffer); 1937 } 1938 1939 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, 1940 &pool->free_cmd_buffers, pool_link) 1941 { 1942 tu_cmd_buffer_destroy(cmd_buffer); 1943 } 1944 1945 vk_free2(&device->alloc, pAllocator, pool); 1946} 1947 1948VkResult 1949tu_ResetCommandPool(VkDevice device, 1950 VkCommandPool commandPool, 1951 VkCommandPoolResetFlags flags) 1952{ 1953 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); 1954 VkResult result; 1955 1956 list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers, 1957 pool_link) 1958 { 1959 result = tu_reset_cmd_buffer(cmd_buffer); 1960 if (result != VK_SUCCESS) 1961 return result; 1962 } 1963 1964 return VK_SUCCESS; 1965} 1966 1967void 1968tu_TrimCommandPool(VkDevice device, 1969 VkCommandPool commandPool, 1970 VkCommandPoolTrimFlags flags) 1971{ 1972 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); 1973 1974 if (!pool) 1975 return; 1976 1977 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, 1978 &pool->free_cmd_buffers, pool_link) 1979 { 1980 tu_cmd_buffer_destroy(cmd_buffer); 1981 } 1982} 1983 1984void 1985tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, 1986 const VkRenderPassBeginInfo *pRenderPassBegin, 1987 VkSubpassContents contents) 1988{ 1989 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 1990 TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass); 1991 TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 1992 VkResult result; 1993 1994 cmd_buffer->state.pass = pass; 1995 cmd_buffer->state.subpass = pass->subpasses; 1996 cmd_buffer->state.framebuffer = framebuffer; 1997 1998 result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); 1999 if (result != VK_SUCCESS) 2000 return; 2001 2002 tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea); 2003 tu_cmd_prepare_tile_load_ib(cmd_buffer); 2004 tu_cmd_prepare_tile_store_ib(cmd_buffer); 2005 2006 /* draw_cs should contain entries only for this render pass */ 2007 assert(!cmd_buffer->draw_cs.entry_count); 2008 tu_cs_begin(&cmd_buffer->draw_cs); 2009} 2010 2011void 2012tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer, 2013 const VkRenderPassBeginInfo *pRenderPassBeginInfo, 2014 const VkSubpassBeginInfoKHR *pSubpassBeginInfo) 2015{ 2016 tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo, 2017 pSubpassBeginInfo->contents); 2018} 2019 2020void 2021tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) 2022{ 2023 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2024 2025 tu_cmd_render_tiles(cmd); 2026 2027 cmd->state.subpass++; 2028 2029 tu_cmd_update_tiling_config(cmd, NULL); 2030 tu_cmd_prepare_tile_load_ib(cmd); 2031 tu_cmd_prepare_tile_store_ib(cmd); 2032} 2033 2034void 2035tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer, 2036 const VkSubpassBeginInfoKHR *pSubpassBeginInfo, 2037 const VkSubpassEndInfoKHR *pSubpassEndInfo) 2038{ 2039 tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents); 2040} 2041 2042struct tu_draw_info 2043{ 2044 /** 2045 * Number of vertices. 2046 */ 2047 uint32_t count; 2048 2049 /** 2050 * Index of the first vertex. 2051 */ 2052 int32_t vertex_offset; 2053 2054 /** 2055 * First instance id. 2056 */ 2057 uint32_t first_instance; 2058 2059 /** 2060 * Number of instances. 2061 */ 2062 uint32_t instance_count; 2063 2064 /** 2065 * First index (indexed draws only). 2066 */ 2067 uint32_t first_index; 2068 2069 /** 2070 * Whether it's an indexed draw. 2071 */ 2072 bool indexed; 2073 2074 /** 2075 * Indirect draw parameters resource. 2076 */ 2077 struct tu_buffer *indirect; 2078 uint64_t indirect_offset; 2079 uint32_t stride; 2080 2081 /** 2082 * Draw count parameters resource. 2083 */ 2084 struct tu_buffer *count_buffer; 2085 uint64_t count_buffer_offset; 2086}; 2087 2088enum tu_draw_state_group_id 2089{ 2090 TU_DRAW_STATE_PROGRAM, 2091 TU_DRAW_STATE_PROGRAM_BINNING, 2092 TU_DRAW_STATE_VI, 2093 TU_DRAW_STATE_VI_BINNING, 2094 TU_DRAW_STATE_VP, 2095 TU_DRAW_STATE_RAST, 2096 TU_DRAW_STATE_DS, 2097 TU_DRAW_STATE_BLEND, 2098 2099 TU_DRAW_STATE_COUNT, 2100}; 2101 2102struct tu_draw_state_group 2103{ 2104 enum tu_draw_state_group_id id; 2105 uint32_t enable_mask; 2106 const struct tu_cs_entry *ib; 2107}; 2108 2109static void 2110tu6_bind_draw_states(struct tu_cmd_buffer *cmd, 2111 struct tu_cs *cs, 2112 const struct tu_draw_info *draw) 2113{ 2114 const struct tu_pipeline *pipeline = cmd->state.pipeline; 2115 const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; 2116 struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; 2117 uint32_t draw_state_group_count = 0; 2118 2119 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); 2120 if (result != VK_SUCCESS) { 2121 cmd->record_result = result; 2122 return; 2123 } 2124 2125 /* TODO lrz */ 2126 2127 uint32_t pc_primitive_cntl = 0; 2128 if (pipeline->ia.primitive_restart && draw->indexed) 2129 pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART; 2130 2131 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); 2132 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0); 2133 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); 2134 2135 tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); 2136 tu_cs_emit(cs, pc_primitive_cntl); 2137 2138 if (cmd->state.dirty & 2139 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) && 2140 (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) { 2141 tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl, 2142 dynamic->line_width); 2143 } 2144 2145 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) && 2146 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { 2147 tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front, 2148 dynamic->stencil_compare_mask.back); 2149 } 2150 2151 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) && 2152 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { 2153 tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front, 2154 dynamic->stencil_write_mask.back); 2155 } 2156 2157 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) && 2158 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { 2159 tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front, 2160 dynamic->stencil_reference.back); 2161 } 2162 2163 if (cmd->state.dirty & 2164 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) { 2165 for (uint32_t i = 0; i < pipeline->vi.count; i++) { 2166 const uint32_t binding = pipeline->vi.bindings[i]; 2167 const uint32_t stride = pipeline->vi.strides[i]; 2168 const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; 2169 const VkDeviceSize offset = buf->bo_offset + 2170 cmd->state.vb.offsets[binding] + 2171 pipeline->vi.offsets[i]; 2172 const VkDeviceSize size = 2173 offset < buf->bo->size ? buf->bo->size - offset : 0; 2174 2175 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4); 2176 tu_cs_emit_qw(cs, buf->bo->iova + offset); 2177 tu_cs_emit(cs, size); 2178 tu_cs_emit(cs, stride); 2179 } 2180 } 2181 2182 /* TODO shader consts */ 2183 2184 if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { 2185 draw_state_groups[draw_state_group_count++] = 2186 (struct tu_draw_state_group) { 2187 .id = TU_DRAW_STATE_PROGRAM, 2188 .enable_mask = 0x6, 2189 .ib = &pipeline->program.state_ib, 2190 }; 2191 draw_state_groups[draw_state_group_count++] = 2192 (struct tu_draw_state_group) { 2193 .id = TU_DRAW_STATE_PROGRAM_BINNING, 2194 .enable_mask = 0x1, 2195 .ib = &pipeline->program.binning_state_ib, 2196 }; 2197 draw_state_groups[draw_state_group_count++] = 2198 (struct tu_draw_state_group) { 2199 .id = TU_DRAW_STATE_VI, 2200 .enable_mask = 0x6, 2201 .ib = &pipeline->vi.state_ib, 2202 }; 2203 draw_state_groups[draw_state_group_count++] = 2204 (struct tu_draw_state_group) { 2205 .id = TU_DRAW_STATE_VI_BINNING, 2206 .enable_mask = 0x1, 2207 .ib = &pipeline->vi.binning_state_ib, 2208 }; 2209 draw_state_groups[draw_state_group_count++] = 2210 (struct tu_draw_state_group) { 2211 .id = TU_DRAW_STATE_VP, 2212 .enable_mask = 0x7, 2213 .ib = &pipeline->vp.state_ib, 2214 }; 2215 draw_state_groups[draw_state_group_count++] = 2216 (struct tu_draw_state_group) { 2217 .id = TU_DRAW_STATE_RAST, 2218 .enable_mask = 0x7, 2219 .ib = &pipeline->rast.state_ib, 2220 }; 2221 draw_state_groups[draw_state_group_count++] = 2222 (struct tu_draw_state_group) { 2223 .id = TU_DRAW_STATE_DS, 2224 .enable_mask = 0x7, 2225 .ib = &pipeline->ds.state_ib, 2226 }; 2227 draw_state_groups[draw_state_group_count++] = 2228 (struct tu_draw_state_group) { 2229 .id = TU_DRAW_STATE_BLEND, 2230 .enable_mask = 0x7, 2231 .ib = &pipeline->blend.state_ib, 2232 }; 2233 } 2234 2235 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count); 2236 for (uint32_t i = 0; i < draw_state_group_count; i++) { 2237 const struct tu_draw_state_group *group = &draw_state_groups[i]; 2238 2239 uint32_t cp_set_draw_state = 2240 CP_SET_DRAW_STATE__0_COUNT(group->ib->size / 4) | 2241 CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) | 2242 CP_SET_DRAW_STATE__0_GROUP_ID(group->id); 2243 uint64_t iova; 2244 if (group->ib->size) { 2245 iova = group->ib->bo->iova + group->ib->offset; 2246 } else { 2247 cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE; 2248 iova = 0; 2249 } 2250 2251 tu_cs_emit(cs, cp_set_draw_state); 2252 tu_cs_emit_qw(cs, iova); 2253 } 2254 2255 tu_cs_sanity_check(cs); 2256 2257 /* track BOs */ 2258 if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { 2259 tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo, 2260 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); 2261 for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) { 2262 tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i], 2263 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); 2264 } 2265 } 2266 if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) { 2267 for (uint32_t i = 0; i < MAX_VBS; i++) { 2268 const struct tu_buffer *buf = cmd->state.vb.buffers[i]; 2269 if (buf) 2270 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); 2271 } 2272 } 2273 2274 cmd->state.dirty = 0; 2275} 2276 2277static void 2278tu6_emit_draw_direct(struct tu_cmd_buffer *cmd, 2279 struct tu_cs *cs, 2280 const struct tu_draw_info *draw) 2281{ 2282 2283 const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype; 2284 2285 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2); 2286 tu_cs_emit(cs, draw->vertex_offset); 2287 tu_cs_emit(cs, draw->first_instance); 2288 2289 /* TODO hw binning */ 2290 if (draw->indexed) { 2291 const enum a4xx_index_size index_size = 2292 tu6_index_size(cmd->state.index_type); 2293 const uint32_t index_bytes = 2294 (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2; 2295 const struct tu_buffer *buf = cmd->state.index_buffer; 2296 const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset + 2297 index_bytes * draw->first_index; 2298 const uint32_t size = index_bytes * draw->count; 2299 2300 const uint32_t cp_draw_indx = 2301 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | 2302 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | 2303 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) | 2304 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; 2305 2306 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7); 2307 tu_cs_emit(cs, cp_draw_indx); 2308 tu_cs_emit(cs, draw->instance_count); 2309 tu_cs_emit(cs, draw->count); 2310 tu_cs_emit(cs, 0x0); /* XXX */ 2311 tu_cs_emit_qw(cs, buf->bo->iova + offset); 2312 tu_cs_emit(cs, size); 2313 } else { 2314 const uint32_t cp_draw_indx = 2315 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | 2316 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 2317 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; 2318 2319 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); 2320 tu_cs_emit(cs, cp_draw_indx); 2321 tu_cs_emit(cs, draw->instance_count); 2322 tu_cs_emit(cs, draw->count); 2323 } 2324} 2325 2326static void 2327tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) 2328{ 2329 struct tu_cs *cs = &cmd->draw_cs; 2330 2331 tu6_bind_draw_states(cmd, cs, draw); 2332 2333 VkResult result = tu_cs_reserve_space(cmd->device, cs, 32); 2334 if (result != VK_SUCCESS) { 2335 cmd->record_result = result; 2336 return; 2337 } 2338 2339 if (draw->indirect) { 2340 tu_finishme("indirect draw"); 2341 return; 2342 } 2343 2344 /* TODO tu6_emit_marker should pick different regs depending on cs */ 2345 tu6_emit_marker(cmd, cs); 2346 tu6_emit_draw_direct(cmd, cs, draw); 2347 tu6_emit_marker(cmd, cs); 2348 2349 cmd->wait_for_idle = true; 2350 2351 tu_cs_sanity_check(cs); 2352} 2353 2354void 2355tu_CmdDraw(VkCommandBuffer commandBuffer, 2356 uint32_t vertexCount, 2357 uint32_t instanceCount, 2358 uint32_t firstVertex, 2359 uint32_t firstInstance) 2360{ 2361 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2362 struct tu_draw_info info = {}; 2363 2364 info.count = vertexCount; 2365 info.instance_count = instanceCount; 2366 info.first_instance = firstInstance; 2367 info.vertex_offset = firstVertex; 2368 2369 tu_draw(cmd_buffer, &info); 2370} 2371 2372void 2373tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, 2374 uint32_t indexCount, 2375 uint32_t instanceCount, 2376 uint32_t firstIndex, 2377 int32_t vertexOffset, 2378 uint32_t firstInstance) 2379{ 2380 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2381 struct tu_draw_info info = {}; 2382 2383 info.indexed = true; 2384 info.count = indexCount; 2385 info.instance_count = instanceCount; 2386 info.first_index = firstIndex; 2387 info.vertex_offset = vertexOffset; 2388 info.first_instance = firstInstance; 2389 2390 tu_draw(cmd_buffer, &info); 2391} 2392 2393void 2394tu_CmdDrawIndirect(VkCommandBuffer commandBuffer, 2395 VkBuffer _buffer, 2396 VkDeviceSize offset, 2397 uint32_t drawCount, 2398 uint32_t stride) 2399{ 2400 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2401 TU_FROM_HANDLE(tu_buffer, buffer, _buffer); 2402 struct tu_draw_info info = {}; 2403 2404 info.count = drawCount; 2405 info.indirect = buffer; 2406 info.indirect_offset = offset; 2407 info.stride = stride; 2408 2409 tu_draw(cmd_buffer, &info); 2410} 2411 2412void 2413tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, 2414 VkBuffer _buffer, 2415 VkDeviceSize offset, 2416 uint32_t drawCount, 2417 uint32_t stride) 2418{ 2419 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2420 TU_FROM_HANDLE(tu_buffer, buffer, _buffer); 2421 struct tu_draw_info info = {}; 2422 2423 info.indexed = true; 2424 info.count = drawCount; 2425 info.indirect = buffer; 2426 info.indirect_offset = offset; 2427 info.stride = stride; 2428 2429 tu_draw(cmd_buffer, &info); 2430} 2431 2432struct tu_dispatch_info 2433{ 2434 /** 2435 * Determine the layout of the grid (in block units) to be used. 2436 */ 2437 uint32_t blocks[3]; 2438 2439 /** 2440 * A starting offset for the grid. If unaligned is set, the offset 2441 * must still be aligned. 2442 */ 2443 uint32_t offsets[3]; 2444 /** 2445 * Whether it's an unaligned compute dispatch. 2446 */ 2447 bool unaligned; 2448 2449 /** 2450 * Indirect compute parameters resource. 2451 */ 2452 struct tu_buffer *indirect; 2453 uint64_t indirect_offset; 2454}; 2455 2456static void 2457tu_dispatch(struct tu_cmd_buffer *cmd_buffer, 2458 const struct tu_dispatch_info *info) 2459{ 2460} 2461 2462void 2463tu_CmdDispatchBase(VkCommandBuffer commandBuffer, 2464 uint32_t base_x, 2465 uint32_t base_y, 2466 uint32_t base_z, 2467 uint32_t x, 2468 uint32_t y, 2469 uint32_t z) 2470{ 2471 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2472 struct tu_dispatch_info info = {}; 2473 2474 info.blocks[0] = x; 2475 info.blocks[1] = y; 2476 info.blocks[2] = z; 2477 2478 info.offsets[0] = base_x; 2479 info.offsets[1] = base_y; 2480 info.offsets[2] = base_z; 2481 tu_dispatch(cmd_buffer, &info); 2482} 2483 2484void 2485tu_CmdDispatch(VkCommandBuffer commandBuffer, 2486 uint32_t x, 2487 uint32_t y, 2488 uint32_t z) 2489{ 2490 tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); 2491} 2492 2493void 2494tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer, 2495 VkBuffer _buffer, 2496 VkDeviceSize offset) 2497{ 2498 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2499 TU_FROM_HANDLE(tu_buffer, buffer, _buffer); 2500 struct tu_dispatch_info info = {}; 2501 2502 info.indirect = buffer; 2503 info.indirect_offset = offset; 2504 2505 tu_dispatch(cmd_buffer, &info); 2506} 2507 2508void 2509tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) 2510{ 2511 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2512 2513 tu_cs_end(&cmd_buffer->draw_cs); 2514 2515 tu_cmd_render_tiles(cmd_buffer); 2516 2517 /* discard draw_cs entries now that the tiles are rendered */ 2518 tu_cs_discard_entries(&cmd_buffer->draw_cs); 2519 2520 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); 2521 cmd_buffer->state.attachments = NULL; 2522 2523 cmd_buffer->state.pass = NULL; 2524 cmd_buffer->state.subpass = NULL; 2525 cmd_buffer->state.framebuffer = NULL; 2526} 2527 2528void 2529tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, 2530 const VkSubpassEndInfoKHR *pSubpassEndInfo) 2531{ 2532 tu_CmdEndRenderPass(commandBuffer); 2533} 2534 2535struct tu_barrier_info 2536{ 2537 uint32_t eventCount; 2538 const VkEvent *pEvents; 2539 VkPipelineStageFlags srcStageMask; 2540}; 2541 2542static void 2543tu_barrier(struct tu_cmd_buffer *cmd_buffer, 2544 uint32_t memoryBarrierCount, 2545 const VkMemoryBarrier *pMemoryBarriers, 2546 uint32_t bufferMemoryBarrierCount, 2547 const VkBufferMemoryBarrier *pBufferMemoryBarriers, 2548 uint32_t imageMemoryBarrierCount, 2549 const VkImageMemoryBarrier *pImageMemoryBarriers, 2550 const struct tu_barrier_info *info) 2551{ 2552} 2553 2554void 2555tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer, 2556 VkPipelineStageFlags srcStageMask, 2557 VkPipelineStageFlags destStageMask, 2558 VkBool32 byRegion, 2559 uint32_t memoryBarrierCount, 2560 const VkMemoryBarrier *pMemoryBarriers, 2561 uint32_t bufferMemoryBarrierCount, 2562 const VkBufferMemoryBarrier *pBufferMemoryBarriers, 2563 uint32_t imageMemoryBarrierCount, 2564 const VkImageMemoryBarrier *pImageMemoryBarriers) 2565{ 2566 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2567 struct tu_barrier_info info; 2568 2569 info.eventCount = 0; 2570 info.pEvents = NULL; 2571 info.srcStageMask = srcStageMask; 2572 2573 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, 2574 bufferMemoryBarrierCount, pBufferMemoryBarriers, 2575 imageMemoryBarrierCount, pImageMemoryBarriers, &info); 2576} 2577 2578static void 2579write_event(struct tu_cmd_buffer *cmd_buffer, 2580 struct tu_event *event, 2581 VkPipelineStageFlags stageMask, 2582 unsigned value) 2583{ 2584} 2585 2586void 2587tu_CmdSetEvent(VkCommandBuffer commandBuffer, 2588 VkEvent _event, 2589 VkPipelineStageFlags stageMask) 2590{ 2591 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2592 TU_FROM_HANDLE(tu_event, event, _event); 2593 2594 write_event(cmd_buffer, event, stageMask, 1); 2595} 2596 2597void 2598tu_CmdResetEvent(VkCommandBuffer commandBuffer, 2599 VkEvent _event, 2600 VkPipelineStageFlags stageMask) 2601{ 2602 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2603 TU_FROM_HANDLE(tu_event, event, _event); 2604 2605 write_event(cmd_buffer, event, stageMask, 0); 2606} 2607 2608void 2609tu_CmdWaitEvents(VkCommandBuffer commandBuffer, 2610 uint32_t eventCount, 2611 const VkEvent *pEvents, 2612 VkPipelineStageFlags srcStageMask, 2613 VkPipelineStageFlags dstStageMask, 2614 uint32_t memoryBarrierCount, 2615 const VkMemoryBarrier *pMemoryBarriers, 2616 uint32_t bufferMemoryBarrierCount, 2617 const VkBufferMemoryBarrier *pBufferMemoryBarriers, 2618 uint32_t imageMemoryBarrierCount, 2619 const VkImageMemoryBarrier *pImageMemoryBarriers) 2620{ 2621 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 2622 struct tu_barrier_info info; 2623 2624 info.eventCount = eventCount; 2625 info.pEvents = pEvents; 2626 info.srcStageMask = 0; 2627 2628 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, 2629 bufferMemoryBarrierCount, pBufferMemoryBarriers, 2630 imageMemoryBarrierCount, pImageMemoryBarriers, &info); 2631} 2632 2633void 2634tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) 2635{ 2636 /* No-op */ 2637} 2638