anv_blorp.c revision 7ec681f3
1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_private.h" 25 26static bool 27lookup_blorp_shader(struct blorp_batch *batch, 28 const void *key, uint32_t key_size, 29 uint32_t *kernel_out, void *prog_data_out) 30{ 31 struct blorp_context *blorp = batch->blorp; 32 struct anv_device *device = blorp->driver_ctx; 33 34 /* The default cache must be a real cache */ 35 assert(device->default_pipeline_cache.cache); 36 37 struct anv_shader_bin *bin = 38 anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size); 39 if (!bin) 40 return false; 41 42 /* The cache already has a reference and it's not going anywhere so there 43 * is no need to hold a second reference. 44 */ 45 anv_shader_bin_unref(device, bin); 46 47 *kernel_out = bin->kernel.offset; 48 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 49 50 return true; 51} 52 53static bool 54upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, 55 const void *key, uint32_t key_size, 56 const void *kernel, uint32_t kernel_size, 57 const struct brw_stage_prog_data *prog_data, 58 uint32_t prog_data_size, 59 uint32_t *kernel_out, void *prog_data_out) 60{ 61 struct blorp_context *blorp = batch->blorp; 62 struct anv_device *device = blorp->driver_ctx; 63 64 /* The blorp cache must be a real cache */ 65 assert(device->default_pipeline_cache.cache); 66 67 struct anv_pipeline_bind_map bind_map = { 68 .surface_count = 0, 69 .sampler_count = 0, 70 }; 71 72 struct anv_shader_bin *bin = 73 anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, stage, 74 key, key_size, kernel, kernel_size, 75 prog_data, prog_data_size, 76 NULL, 0, NULL, &bind_map); 77 78 if (!bin) 79 return false; 80 81 /* The cache already has a reference and it's not going anywhere so there 82 * is no need to hold a second reference. 83 */ 84 anv_shader_bin_unref(device, bin); 85 86 *kernel_out = bin->kernel.offset; 87 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 88 89 return true; 90} 91 92void 93anv_device_init_blorp(struct anv_device *device) 94{ 95 blorp_init(&device->blorp, device, &device->isl_dev); 96 device->blorp.compiler = device->physical->compiler; 97 device->blorp.lookup_shader = lookup_blorp_shader; 98 device->blorp.upload_shader = upload_blorp_shader; 99 switch (device->info.verx10) { 100 case 70: 101 device->blorp.exec = gfx7_blorp_exec; 102 break; 103 case 75: 104 device->blorp.exec = gfx75_blorp_exec; 105 break; 106 case 80: 107 device->blorp.exec = gfx8_blorp_exec; 108 break; 109 case 90: 110 device->blorp.exec = gfx9_blorp_exec; 111 break; 112 case 110: 113 device->blorp.exec = gfx11_blorp_exec; 114 break; 115 case 120: 116 device->blorp.exec = gfx12_blorp_exec; 117 break; 118 case 125: 119 device->blorp.exec = gfx125_blorp_exec; 120 break; 121 default: 122 unreachable("Unknown hardware generation"); 123 } 124} 125 126void 127anv_device_finish_blorp(struct anv_device *device) 128{ 129 blorp_finish(&device->blorp); 130} 131 132static void 133anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer, 134 struct blorp_batch *batch, enum blorp_batch_flags flags) 135{ 136 if (!(cmd_buffer->pool->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) { 137 assert(cmd_buffer->pool->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT); 138 flags |= BLORP_BATCH_USE_COMPUTE; 139 } 140 141 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags); 142} 143 144static void 145anv_blorp_batch_finish(struct blorp_batch *batch) 146{ 147 blorp_batch_finish(batch); 148} 149 150static void 151get_blorp_surf_for_anv_buffer(struct anv_device *device, 152 struct anv_buffer *buffer, uint64_t offset, 153 uint32_t width, uint32_t height, 154 uint32_t row_pitch, enum isl_format format, 155 bool is_dest, 156 struct blorp_surf *blorp_surf, 157 struct isl_surf *isl_surf) 158{ 159 const struct isl_format_layout *fmtl = 160 isl_format_get_layout(format); 161 bool ok UNUSED; 162 163 /* ASTC is the only format which doesn't support linear layouts. 164 * Create an equivalently sized surface with ISL to get around this. 165 */ 166 if (fmtl->txc == ISL_TXC_ASTC) { 167 /* Use an equivalently sized format */ 168 format = ISL_FORMAT_R32G32B32A32_UINT; 169 assert(fmtl->bpb == isl_format_get_layout(format)->bpb); 170 171 /* Shrink the dimensions for the new format */ 172 width = DIV_ROUND_UP(width, fmtl->bw); 173 height = DIV_ROUND_UP(height, fmtl->bh); 174 } 175 176 *blorp_surf = (struct blorp_surf) { 177 .surf = isl_surf, 178 .addr = { 179 .buffer = buffer->address.bo, 180 .offset = buffer->address.offset + offset, 181 .mocs = anv_mocs(device, buffer->address.bo, 182 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT 183 : ISL_SURF_USAGE_TEXTURE_BIT), 184 }, 185 }; 186 187 ok = isl_surf_init(&device->isl_dev, isl_surf, 188 .dim = ISL_SURF_DIM_2D, 189 .format = format, 190 .width = width, 191 .height = height, 192 .depth = 1, 193 .levels = 1, 194 .array_len = 1, 195 .samples = 1, 196 .row_pitch_B = row_pitch, 197 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT 198 : ISL_SURF_USAGE_TEXTURE_BIT, 199 .tiling_flags = ISL_TILING_LINEAR_BIT); 200 assert(ok); 201} 202 203/* Pick something high enough that it won't be used in core and low enough it 204 * will never map to an extension. 205 */ 206#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000 207 208static struct blorp_address 209anv_to_blorp_address(struct anv_address addr) 210{ 211 return (struct blorp_address) { 212 .buffer = addr.bo, 213 .offset = addr.offset, 214 }; 215} 216 217static void 218get_blorp_surf_for_anv_image(const struct anv_device *device, 219 const struct anv_image *image, 220 VkImageAspectFlags aspect, 221 VkImageUsageFlags usage, 222 VkImageLayout layout, 223 enum isl_aux_usage aux_usage, 224 struct blorp_surf *blorp_surf) 225{ 226 const uint32_t plane = anv_image_aspect_to_plane(image, aspect); 227 228 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) { 229 assert(usage != 0); 230 aux_usage = anv_layout_to_aux_usage(&device->info, image, 231 aspect, usage, layout); 232 } 233 234 isl_surf_usage_flags_t mocs_usage = 235 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ? 236 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT; 237 238 const struct anv_surface *surface = &image->planes[plane].primary_surface; 239 const struct anv_address address = 240 anv_image_address(image, &surface->memory_range); 241 242 *blorp_surf = (struct blorp_surf) { 243 .surf = &surface->isl, 244 .addr = { 245 .buffer = address.bo, 246 .offset = address.offset, 247 .mocs = anv_mocs(device, address.bo, mocs_usage), 248 }, 249 }; 250 251 if (aux_usage != ISL_AUX_USAGE_NONE) { 252 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface; 253 const struct anv_address aux_address = 254 anv_image_address(image, &aux_surface->memory_range); 255 256 blorp_surf->aux_usage = aux_usage; 257 blorp_surf->aux_surf = &aux_surface->isl; 258 259 if (!anv_address_is_null(aux_address)) { 260 blorp_surf->aux_addr = (struct blorp_address) { 261 .buffer = aux_address.bo, 262 .offset = aux_address.offset, 263 .mocs = anv_mocs(device, aux_address.bo, 0), 264 }; 265 } 266 267 /* If we're doing a partial resolve, then we need the indirect clear 268 * color. If we are doing a fast clear and want to store/update the 269 * clear color, we also pass the address to blorp, otherwise it will only 270 * stomp the CCS to a particular value and won't care about format or 271 * clear value 272 */ 273 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { 274 const struct anv_address clear_color_addr = 275 anv_image_get_clear_color_addr(device, image, aspect); 276 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); 277 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { 278 const struct anv_address clear_color_addr = 279 anv_image_get_clear_color_addr(device, image, aspect); 280 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); 281 blorp_surf->clear_color = (union isl_color_value) { 282 .f32 = { ANV_HZ_FC_VAL }, 283 }; 284 } 285 } 286} 287 288static bool 289get_blorp_surf_for_anv_shadow_image(const struct anv_device *device, 290 const struct anv_image *image, 291 VkImageAspectFlags aspect, 292 struct blorp_surf *blorp_surf) 293{ 294 295 const uint32_t plane = anv_image_aspect_to_plane(image, aspect); 296 if (!anv_surface_is_valid(&image->planes[plane].shadow_surface)) 297 return false; 298 299 const struct anv_surface *surface = &image->planes[plane].shadow_surface; 300 const struct anv_address address = 301 anv_image_address(image, &surface->memory_range); 302 303 *blorp_surf = (struct blorp_surf) { 304 .surf = &surface->isl, 305 .addr = { 306 .buffer = address.bo, 307 .offset = address.offset, 308 .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT), 309 }, 310 }; 311 312 return true; 313} 314 315static void 316copy_image(struct anv_cmd_buffer *cmd_buffer, 317 struct blorp_batch *batch, 318 struct anv_image *src_image, 319 VkImageLayout src_image_layout, 320 struct anv_image *dst_image, 321 VkImageLayout dst_image_layout, 322 const VkImageCopy2KHR *region) 323{ 324 VkOffset3D srcOffset = 325 anv_sanitize_image_offset(src_image->vk.image_type, region->srcOffset); 326 VkOffset3D dstOffset = 327 anv_sanitize_image_offset(dst_image->vk.image_type, region->dstOffset); 328 VkExtent3D extent = 329 anv_sanitize_image_extent(src_image->vk.image_type, region->extent); 330 331 const uint32_t dst_level = region->dstSubresource.mipLevel; 332 unsigned dst_base_layer, layer_count; 333 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) { 334 dst_base_layer = region->dstOffset.z; 335 layer_count = region->extent.depth; 336 } else { 337 dst_base_layer = region->dstSubresource.baseArrayLayer; 338 layer_count = vk_image_subresource_layer_count(&dst_image->vk, 339 ®ion->dstSubresource); 340 } 341 342 const uint32_t src_level = region->srcSubresource.mipLevel; 343 unsigned src_base_layer; 344 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) { 345 src_base_layer = region->srcOffset.z; 346 } else { 347 src_base_layer = region->srcSubresource.baseArrayLayer; 348 assert(layer_count == 349 vk_image_subresource_layer_count(&src_image->vk, 350 ®ion->srcSubresource)); 351 } 352 353 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask, 354 dst_mask = region->dstSubresource.aspectMask; 355 356 assert(anv_image_aspects_compatible(src_mask, dst_mask)); 357 358 if (util_bitcount(src_mask) > 1) { 359 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) { 360 struct blorp_surf src_surf, dst_surf; 361 get_blorp_surf_for_anv_image(cmd_buffer->device, 362 src_image, 1UL << aspect_bit, 363 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 364 src_image_layout, ISL_AUX_USAGE_NONE, 365 &src_surf); 366 get_blorp_surf_for_anv_image(cmd_buffer->device, 367 dst_image, 1UL << aspect_bit, 368 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 369 dst_image_layout, ISL_AUX_USAGE_NONE, 370 &dst_surf); 371 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 372 1UL << aspect_bit, 373 dst_surf.aux_usage, dst_level, 374 dst_base_layer, layer_count); 375 376 for (unsigned i = 0; i < layer_count; i++) { 377 blorp_copy(batch, &src_surf, src_level, src_base_layer + i, 378 &dst_surf, dst_level, dst_base_layer + i, 379 srcOffset.x, srcOffset.y, 380 dstOffset.x, dstOffset.y, 381 extent.width, extent.height); 382 } 383 384 struct blorp_surf dst_shadow_surf; 385 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, 386 dst_image, 387 1UL << aspect_bit, 388 &dst_shadow_surf)) { 389 for (unsigned i = 0; i < layer_count; i++) { 390 blorp_copy(batch, &src_surf, src_level, src_base_layer + i, 391 &dst_shadow_surf, dst_level, dst_base_layer + i, 392 srcOffset.x, srcOffset.y, 393 dstOffset.x, dstOffset.y, 394 extent.width, extent.height); 395 } 396 } 397 } 398 } else { 399 struct blorp_surf src_surf, dst_surf; 400 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask, 401 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 402 src_image_layout, ISL_AUX_USAGE_NONE, 403 &src_surf); 404 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask, 405 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 406 dst_image_layout, ISL_AUX_USAGE_NONE, 407 &dst_surf); 408 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask, 409 dst_surf.aux_usage, dst_level, 410 dst_base_layer, layer_count); 411 412 for (unsigned i = 0; i < layer_count; i++) { 413 blorp_copy(batch, &src_surf, src_level, src_base_layer + i, 414 &dst_surf, dst_level, dst_base_layer + i, 415 srcOffset.x, srcOffset.y, 416 dstOffset.x, dstOffset.y, 417 extent.width, extent.height); 418 } 419 420 struct blorp_surf dst_shadow_surf; 421 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, 422 dst_image, dst_mask, 423 &dst_shadow_surf)) { 424 for (unsigned i = 0; i < layer_count; i++) { 425 blorp_copy(batch, &src_surf, src_level, src_base_layer + i, 426 &dst_shadow_surf, dst_level, dst_base_layer + i, 427 srcOffset.x, srcOffset.y, 428 dstOffset.x, dstOffset.y, 429 extent.width, extent.height); 430 } 431 } 432 } 433} 434 435void anv_CmdCopyImage2KHR( 436 VkCommandBuffer commandBuffer, 437 const VkCopyImageInfo2KHR* pCopyImageInfo) 438{ 439 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 440 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage); 441 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage); 442 443 struct blorp_batch batch; 444 anv_blorp_batch_init(cmd_buffer, &batch, 0); 445 446 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { 447 copy_image(cmd_buffer, &batch, 448 src_image, pCopyImageInfo->srcImageLayout, 449 dst_image, pCopyImageInfo->dstImageLayout, 450 &pCopyImageInfo->pRegions[r]); 451 } 452 453 anv_blorp_batch_finish(&batch); 454} 455 456static enum isl_format 457isl_format_for_size(unsigned size_B) 458{ 459 /* Prefer 32-bit per component formats for CmdFillBuffer */ 460 switch (size_B) { 461 case 1: return ISL_FORMAT_R8_UINT; 462 case 2: return ISL_FORMAT_R16_UINT; 463 case 3: return ISL_FORMAT_R8G8B8_UINT; 464 case 4: return ISL_FORMAT_R32_UINT; 465 case 6: return ISL_FORMAT_R16G16B16_UINT; 466 case 8: return ISL_FORMAT_R32G32_UINT; 467 case 12: return ISL_FORMAT_R32G32B32_UINT; 468 case 16: return ISL_FORMAT_R32G32B32A32_UINT; 469 default: 470 unreachable("Unknown format size"); 471 } 472} 473 474static void 475copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, 476 struct blorp_batch *batch, 477 struct anv_buffer *anv_buffer, 478 struct anv_image *anv_image, 479 VkImageLayout image_layout, 480 const VkBufferImageCopy2KHR* region, 481 bool buffer_to_image) 482{ 483 struct { 484 struct blorp_surf surf; 485 uint32_t level; 486 VkOffset3D offset; 487 } image, buffer, *src, *dst; 488 489 buffer.level = 0; 490 buffer.offset = (VkOffset3D) { 0, 0, 0 }; 491 492 if (buffer_to_image) { 493 src = &buffer; 494 dst = ℑ 495 } else { 496 src = ℑ 497 dst = &buffer; 498 } 499 500 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask; 501 502 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect, 503 buffer_to_image ? 504 VK_IMAGE_USAGE_TRANSFER_DST_BIT : 505 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 506 image_layout, ISL_AUX_USAGE_NONE, 507 &image.surf); 508 image.offset = 509 anv_sanitize_image_offset(anv_image->vk.image_type, region->imageOffset); 510 image.level = region->imageSubresource.mipLevel; 511 512 VkExtent3D extent = 513 anv_sanitize_image_extent(anv_image->vk.image_type, region->imageExtent); 514 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) { 515 image.offset.z = region->imageSubresource.baseArrayLayer; 516 extent.depth = 517 vk_image_subresource_layer_count(&anv_image->vk, 518 ®ion->imageSubresource); 519 } 520 521 const enum isl_format linear_format = 522 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk.format, 523 aspect, VK_IMAGE_TILING_LINEAR); 524 const struct isl_format_layout *linear_fmtl = 525 isl_format_get_layout(linear_format); 526 527 const uint32_t buffer_row_length = 528 region->bufferRowLength ? 529 region->bufferRowLength : extent.width; 530 531 const uint32_t buffer_image_height = 532 region->bufferImageHeight ? 533 region->bufferImageHeight : extent.height; 534 535 const uint32_t buffer_row_pitch = 536 DIV_ROUND_UP(buffer_row_length, linear_fmtl->bw) * 537 (linear_fmtl->bpb / 8); 538 539 const uint32_t buffer_layer_stride = 540 DIV_ROUND_UP(buffer_image_height, linear_fmtl->bh) * 541 buffer_row_pitch; 542 543 /* Some formats have additional restrictions which may cause ISL to 544 * fail to create a surface for us. Some examples include: 545 * 546 * 1. ASTC formats are not allowed to be LINEAR and must be tiled 547 * 2. YCbCr formats have to have 2-pixel aligned strides 548 * 549 * To avoid these issues, we always bind the buffer as if it's a 550 * "normal" format like RGBA32_UINT. Since we're using blorp_copy, 551 * the format doesn't matter as long as it has the right bpb. 552 */ 553 const VkExtent2D buffer_extent = { 554 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw), 555 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh), 556 }; 557 const enum isl_format buffer_format = 558 isl_format_for_size(linear_fmtl->bpb / 8); 559 560 struct isl_surf buffer_isl_surf; 561 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 562 anv_buffer, region->bufferOffset, 563 buffer_extent.width, buffer_extent.height, 564 buffer_row_pitch, buffer_format, false, 565 &buffer.surf, &buffer_isl_surf); 566 567 bool dst_has_shadow = false; 568 struct blorp_surf dst_shadow_surf; 569 if (&image == dst) { 570 /* In this case, the source is the buffer and, since blorp takes its 571 * copy dimensions in terms of the source format, we have to use the 572 * scaled down version for compressed textures because the source 573 * format is an RGB format. 574 */ 575 extent.width = buffer_extent.width; 576 extent.height = buffer_extent.height; 577 578 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image, 579 aspect, dst->surf.aux_usage, 580 dst->level, 581 dst->offset.z, extent.depth); 582 583 dst_has_shadow = 584 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, 585 anv_image, aspect, 586 &dst_shadow_surf); 587 } 588 589 for (unsigned z = 0; z < extent.depth; z++) { 590 blorp_copy(batch, &src->surf, src->level, src->offset.z, 591 &dst->surf, dst->level, dst->offset.z, 592 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y, 593 extent.width, extent.height); 594 595 if (dst_has_shadow) { 596 blorp_copy(batch, &src->surf, src->level, src->offset.z, 597 &dst_shadow_surf, dst->level, dst->offset.z, 598 src->offset.x, src->offset.y, 599 dst->offset.x, dst->offset.y, 600 extent.width, extent.height); 601 } 602 603 image.offset.z++; 604 buffer.surf.addr.offset += buffer_layer_stride; 605 } 606} 607 608void anv_CmdCopyBufferToImage2KHR( 609 VkCommandBuffer commandBuffer, 610 const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo) 611{ 612 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 613 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); 614 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage); 615 616 struct blorp_batch batch; 617 anv_blorp_batch_init(cmd_buffer, &batch, 0); 618 619 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { 620 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image, 621 pCopyBufferToImageInfo->dstImageLayout, 622 &pCopyBufferToImageInfo->pRegions[r], true); 623 } 624 625 anv_blorp_batch_finish(&batch); 626} 627 628void anv_CmdCopyImageToBuffer2KHR( 629 VkCommandBuffer commandBuffer, 630 const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo) 631{ 632 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 633 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage); 634 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); 635 636 struct blorp_batch batch; 637 anv_blorp_batch_init(cmd_buffer, &batch, 0); 638 639 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) { 640 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image, 641 pCopyImageToBufferInfo->srcImageLayout, 642 &pCopyImageToBufferInfo->pRegions[r], false); 643 } 644 645 anv_blorp_batch_finish(&batch); 646 647 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 648} 649 650static bool 651flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1) 652{ 653 bool flip = false; 654 if (*src0 > *src1) { 655 unsigned tmp = *src0; 656 *src0 = *src1; 657 *src1 = tmp; 658 flip = !flip; 659 } 660 661 if (*dst0 > *dst1) { 662 unsigned tmp = *dst0; 663 *dst0 = *dst1; 664 *dst1 = tmp; 665 flip = !flip; 666 } 667 668 return flip; 669} 670 671static void 672blit_image(struct anv_cmd_buffer *cmd_buffer, 673 struct blorp_batch *batch, 674 struct anv_image *src_image, 675 VkImageLayout src_image_layout, 676 struct anv_image *dst_image, 677 VkImageLayout dst_image_layout, 678 const VkImageBlit2KHR *region, 679 VkFilter filter) 680{ 681 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource; 682 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource; 683 684 struct blorp_surf src, dst; 685 686 enum blorp_filter blorp_filter; 687 switch (filter) { 688 case VK_FILTER_NEAREST: 689 blorp_filter = BLORP_FILTER_NEAREST; 690 break; 691 case VK_FILTER_LINEAR: 692 blorp_filter = BLORP_FILTER_BILINEAR; 693 break; 694 default: 695 unreachable("Invalid filter"); 696 } 697 698 assert(anv_image_aspects_compatible(src_res->aspectMask, 699 dst_res->aspectMask)); 700 701 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) { 702 get_blorp_surf_for_anv_image(cmd_buffer->device, 703 src_image, 1U << aspect_bit, 704 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 705 src_image_layout, ISL_AUX_USAGE_NONE, &src); 706 get_blorp_surf_for_anv_image(cmd_buffer->device, 707 dst_image, 1U << aspect_bit, 708 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 709 dst_image_layout, ISL_AUX_USAGE_NONE, &dst); 710 711 struct anv_format_plane src_format = 712 anv_get_format_aspect(&cmd_buffer->device->info, src_image->vk.format, 713 1U << aspect_bit, src_image->vk.tiling); 714 struct anv_format_plane dst_format = 715 anv_get_format_aspect(&cmd_buffer->device->info, dst_image->vk.format, 716 1U << aspect_bit, dst_image->vk.tiling); 717 718 unsigned dst_start, dst_end; 719 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) { 720 assert(dst_res->baseArrayLayer == 0); 721 dst_start = region->dstOffsets[0].z; 722 dst_end = region->dstOffsets[1].z; 723 } else { 724 dst_start = dst_res->baseArrayLayer; 725 dst_end = dst_start + 726 vk_image_subresource_layer_count(&dst_image->vk, dst_res); 727 } 728 729 unsigned src_start, src_end; 730 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) { 731 assert(src_res->baseArrayLayer == 0); 732 src_start = region->srcOffsets[0].z; 733 src_end = region->srcOffsets[1].z; 734 } else { 735 src_start = src_res->baseArrayLayer; 736 src_end = src_start + 737 vk_image_subresource_layer_count(&src_image->vk, src_res); 738 } 739 740 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); 741 const unsigned num_layers = dst_end - dst_start; 742 float src_z_step = (float)(src_end - src_start) / (float)num_layers; 743 744 /* There is no interpolation to the pixel center during rendering, so 745 * add the 0.5 offset ourselves here. */ 746 float depth_center_offset = 0; 747 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) 748 depth_center_offset = 0.5 / num_layers * (src_end - src_start); 749 750 if (flip_z) { 751 src_start = src_end; 752 src_z_step *= -1; 753 depth_center_offset *= -1; 754 } 755 756 unsigned src_x0 = region->srcOffsets[0].x; 757 unsigned src_x1 = region->srcOffsets[1].x; 758 unsigned dst_x0 = region->dstOffsets[0].x; 759 unsigned dst_x1 = region->dstOffsets[1].x; 760 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); 761 762 unsigned src_y0 = region->srcOffsets[0].y; 763 unsigned src_y1 = region->srcOffsets[1].y; 764 unsigned dst_y0 = region->dstOffsets[0].y; 765 unsigned dst_y1 = region->dstOffsets[1].y; 766 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); 767 768 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 769 1U << aspect_bit, 770 dst.aux_usage, 771 dst_res->mipLevel, 772 dst_start, num_layers); 773 774 for (unsigned i = 0; i < num_layers; i++) { 775 unsigned dst_z = dst_start + i; 776 float src_z = src_start + i * src_z_step + depth_center_offset; 777 778 blorp_blit(batch, &src, src_res->mipLevel, src_z, 779 src_format.isl_format, src_format.swizzle, 780 &dst, dst_res->mipLevel, dst_z, 781 dst_format.isl_format, dst_format.swizzle, 782 src_x0, src_y0, src_x1, src_y1, 783 dst_x0, dst_y0, dst_x1, dst_y1, 784 blorp_filter, flip_x, flip_y); 785 } 786 } 787} 788 789void anv_CmdBlitImage2KHR( 790 VkCommandBuffer commandBuffer, 791 const VkBlitImageInfo2KHR* pBlitImageInfo) 792{ 793 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 794 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage); 795 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage); 796 797 struct blorp_batch batch; 798 anv_blorp_batch_init(cmd_buffer, &batch, 0); 799 800 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) { 801 blit_image(cmd_buffer, &batch, 802 src_image, pBlitImageInfo->srcImageLayout, 803 dst_image, pBlitImageInfo->dstImageLayout, 804 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter); 805 } 806 807 anv_blorp_batch_finish(&batch); 808} 809 810/** 811 * Returns the greatest common divisor of a and b that is a power of two. 812 */ 813static uint64_t 814gcd_pow2_u64(uint64_t a, uint64_t b) 815{ 816 assert(a > 0 || b > 0); 817 818 unsigned a_log2 = ffsll(a) - 1; 819 unsigned b_log2 = ffsll(b) - 1; 820 821 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which 822 * case, the MIN2() will take the other one. If both are 0 then we will 823 * hit the assert above. 824 */ 825 return 1 << MIN2(a_log2, b_log2); 826} 827 828/* This is maximum possible width/height our HW can handle */ 829#define MAX_SURFACE_DIM (1ull << 14) 830 831static void 832copy_buffer(struct anv_device *device, 833 struct blorp_batch *batch, 834 struct anv_buffer *src_buffer, 835 struct anv_buffer *dst_buffer, 836 const VkBufferCopy2KHR *region) 837{ 838 struct blorp_address src = { 839 .buffer = src_buffer->address.bo, 840 .offset = src_buffer->address.offset + region->srcOffset, 841 .mocs = anv_mocs(device, src_buffer->address.bo, 842 ISL_SURF_USAGE_TEXTURE_BIT), 843 }; 844 struct blorp_address dst = { 845 .buffer = dst_buffer->address.bo, 846 .offset = dst_buffer->address.offset + region->dstOffset, 847 .mocs = anv_mocs(device, dst_buffer->address.bo, 848 ISL_SURF_USAGE_RENDER_TARGET_BIT), 849 }; 850 851 blorp_buffer_copy(batch, src, dst, region->size); 852} 853 854void anv_CmdCopyBuffer2KHR( 855 VkCommandBuffer commandBuffer, 856 const VkCopyBufferInfo2KHR* pCopyBufferInfo) 857{ 858 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 859 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer); 860 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); 861 862 struct blorp_batch batch; 863 anv_blorp_batch_init(cmd_buffer, &batch, 0); 864 865 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) { 866 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer, 867 &pCopyBufferInfo->pRegions[r]); 868 } 869 870 anv_blorp_batch_finish(&batch); 871 872 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 873} 874 875 876void anv_CmdUpdateBuffer( 877 VkCommandBuffer commandBuffer, 878 VkBuffer dstBuffer, 879 VkDeviceSize dstOffset, 880 VkDeviceSize dataSize, 881 const void* pData) 882{ 883 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 884 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 885 886 struct blorp_batch batch; 887 anv_blorp_batch_init(cmd_buffer, &batch, 0); 888 889 /* We can't quite grab a full block because the state stream needs a 890 * little data at the top to build its linked list. 891 */ 892 const uint32_t max_update_size = 893 cmd_buffer->device->dynamic_state_pool.block_size - 64; 894 895 assert(max_update_size < MAX_SURFACE_DIM * 4); 896 897 /* We're about to read data that was written from the CPU. Flush the 898 * texture cache so we don't get anything stale. 899 */ 900 anv_add_pending_pipe_bits(cmd_buffer, 901 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, 902 "before UpdateBuffer"); 903 904 while (dataSize) { 905 const uint32_t copy_size = MIN2(dataSize, max_update_size); 906 907 struct anv_state tmp_data = 908 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); 909 910 memcpy(tmp_data.map, pData, copy_size); 911 912 struct blorp_address src = { 913 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, 914 .offset = tmp_data.offset, 915 .mocs = isl_mocs(&cmd_buffer->device->isl_dev, 916 ISL_SURF_USAGE_TEXTURE_BIT, false) 917 }; 918 struct blorp_address dst = { 919 .buffer = dst_buffer->address.bo, 920 .offset = dst_buffer->address.offset + dstOffset, 921 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo, 922 ISL_SURF_USAGE_RENDER_TARGET_BIT), 923 }; 924 925 blorp_buffer_copy(&batch, src, dst, copy_size); 926 927 dataSize -= copy_size; 928 dstOffset += copy_size; 929 pData = (void *)pData + copy_size; 930 } 931 932 anv_blorp_batch_finish(&batch); 933 934 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 935} 936 937void anv_CmdFillBuffer( 938 VkCommandBuffer commandBuffer, 939 VkBuffer dstBuffer, 940 VkDeviceSize dstOffset, 941 VkDeviceSize fillSize, 942 uint32_t data) 943{ 944 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 945 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 946 struct blorp_surf surf; 947 struct isl_surf isl_surf; 948 949 struct blorp_batch batch; 950 anv_blorp_batch_init(cmd_buffer, &batch, 0); 951 952 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize); 953 954 /* From the Vulkan spec: 955 * 956 * "size is the number of bytes to fill, and must be either a multiple 957 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of 958 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the 959 * buffer is not a multiple of 4, then the nearest smaller multiple is 960 * used." 961 */ 962 fillSize &= ~3ull; 963 964 /* First, we compute the biggest format that can be used with the 965 * given offsets and size. 966 */ 967 int bs = 16; 968 bs = gcd_pow2_u64(bs, dstOffset); 969 bs = gcd_pow2_u64(bs, fillSize); 970 enum isl_format isl_format = isl_format_for_size(bs); 971 972 union isl_color_value color = { 973 .u32 = { data, data, data, data }, 974 }; 975 976 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; 977 while (fillSize >= max_fill_size) { 978 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 979 dst_buffer, dstOffset, 980 MAX_SURFACE_DIM, MAX_SURFACE_DIM, 981 MAX_SURFACE_DIM * bs, isl_format, true, 982 &surf, &isl_surf); 983 984 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 985 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, 986 color, 0 /* color_write_disable */); 987 fillSize -= max_fill_size; 988 dstOffset += max_fill_size; 989 } 990 991 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs); 992 assert(height < MAX_SURFACE_DIM); 993 if (height != 0) { 994 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs; 995 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 996 dst_buffer, dstOffset, 997 MAX_SURFACE_DIM, height, 998 MAX_SURFACE_DIM * bs, isl_format, true, 999 &surf, &isl_surf); 1000 1001 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 1002 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height, 1003 color, 0 /* color_write_disable */); 1004 fillSize -= rect_fill_size; 1005 dstOffset += rect_fill_size; 1006 } 1007 1008 if (fillSize != 0) { 1009 const uint32_t width = fillSize / bs; 1010 get_blorp_surf_for_anv_buffer(cmd_buffer->device, 1011 dst_buffer, dstOffset, 1012 width, 1, 1013 width * bs, isl_format, true, 1014 &surf, &isl_surf); 1015 1016 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 1017 0, 0, 1, 0, 0, width, 1, 1018 color, 0 /* color_write_disable */); 1019 } 1020 1021 anv_blorp_batch_finish(&batch); 1022 1023 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 1024} 1025 1026void anv_CmdClearColorImage( 1027 VkCommandBuffer commandBuffer, 1028 VkImage _image, 1029 VkImageLayout imageLayout, 1030 const VkClearColorValue* pColor, 1031 uint32_t rangeCount, 1032 const VkImageSubresourceRange* pRanges) 1033{ 1034 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1035 ANV_FROM_HANDLE(anv_image, image, _image); 1036 1037 struct blorp_batch batch; 1038 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1039 1040 for (unsigned r = 0; r < rangeCount; r++) { 1041 if (pRanges[r].aspectMask == 0) 1042 continue; 1043 1044 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); 1045 1046 struct blorp_surf surf; 1047 get_blorp_surf_for_anv_image(cmd_buffer->device, 1048 image, pRanges[r].aspectMask, 1049 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1050 imageLayout, ISL_AUX_USAGE_NONE, &surf); 1051 1052 struct anv_format_plane src_format = 1053 anv_get_format_aspect(&cmd_buffer->device->info, image->vk.format, 1054 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling); 1055 1056 unsigned base_layer = pRanges[r].baseArrayLayer; 1057 uint32_t layer_count = 1058 vk_image_subresource_layer_count(&image->vk, &pRanges[r]); 1059 uint32_t level_count = 1060 vk_image_subresource_level_count(&image->vk, &pRanges[r]); 1061 1062 for (uint32_t i = 0; i < level_count; i++) { 1063 const unsigned level = pRanges[r].baseMipLevel + i; 1064 const unsigned level_width = anv_minify(image->vk.extent.width, level); 1065 const unsigned level_height = anv_minify(image->vk.extent.height, level); 1066 1067 if (image->vk.image_type == VK_IMAGE_TYPE_3D) { 1068 base_layer = 0; 1069 layer_count = anv_minify(image->vk.extent.depth, level); 1070 } 1071 1072 anv_cmd_buffer_mark_image_written(cmd_buffer, image, 1073 pRanges[r].aspectMask, 1074 surf.aux_usage, level, 1075 base_layer, layer_count); 1076 1077 blorp_clear(&batch, &surf, 1078 src_format.isl_format, src_format.swizzle, 1079 level, base_layer, layer_count, 1080 0, 0, level_width, level_height, 1081 vk_to_isl_color(*pColor), 0 /* color_write_disable */); 1082 } 1083 } 1084 1085 anv_blorp_batch_finish(&batch); 1086} 1087 1088void anv_CmdClearDepthStencilImage( 1089 VkCommandBuffer commandBuffer, 1090 VkImage image_h, 1091 VkImageLayout imageLayout, 1092 const VkClearDepthStencilValue* pDepthStencil, 1093 uint32_t rangeCount, 1094 const VkImageSubresourceRange* pRanges) 1095{ 1096 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1097 ANV_FROM_HANDLE(anv_image, image, image_h); 1098 1099 struct blorp_batch batch; 1100 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1101 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1102 1103 struct blorp_surf depth, stencil, stencil_shadow; 1104 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 1105 get_blorp_surf_for_anv_image(cmd_buffer->device, 1106 image, VK_IMAGE_ASPECT_DEPTH_BIT, 1107 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1108 imageLayout, ISL_AUX_USAGE_NONE, &depth); 1109 } else { 1110 memset(&depth, 0, sizeof(depth)); 1111 } 1112 1113 bool has_stencil_shadow = false; 1114 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1115 get_blorp_surf_for_anv_image(cmd_buffer->device, 1116 image, VK_IMAGE_ASPECT_STENCIL_BIT, 1117 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1118 imageLayout, ISL_AUX_USAGE_NONE, &stencil); 1119 1120 has_stencil_shadow = 1121 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, 1122 VK_IMAGE_ASPECT_STENCIL_BIT, 1123 &stencil_shadow); 1124 } else { 1125 memset(&stencil, 0, sizeof(stencil)); 1126 } 1127 1128 for (unsigned r = 0; r < rangeCount; r++) { 1129 if (pRanges[r].aspectMask == 0) 1130 continue; 1131 1132 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 1133 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 1134 1135 unsigned base_layer = pRanges[r].baseArrayLayer; 1136 uint32_t layer_count = 1137 vk_image_subresource_layer_count(&image->vk, &pRanges[r]); 1138 uint32_t level_count = 1139 vk_image_subresource_level_count(&image->vk, &pRanges[r]); 1140 1141 for (uint32_t i = 0; i < level_count; i++) { 1142 const unsigned level = pRanges[r].baseMipLevel + i; 1143 const unsigned level_width = anv_minify(image->vk.extent.width, level); 1144 const unsigned level_height = anv_minify(image->vk.extent.height, level); 1145 1146 if (image->vk.image_type == VK_IMAGE_TYPE_3D) 1147 layer_count = anv_minify(image->vk.extent.depth, level); 1148 1149 blorp_clear_depth_stencil(&batch, &depth, &stencil, 1150 level, base_layer, layer_count, 1151 0, 0, level_width, level_height, 1152 clear_depth, pDepthStencil->depth, 1153 clear_stencil ? 0xff : 0, 1154 pDepthStencil->stencil); 1155 1156 if (clear_stencil && has_stencil_shadow) { 1157 union isl_color_value stencil_color = { 1158 .u32 = { pDepthStencil->stencil, }, 1159 }; 1160 blorp_clear(&batch, &stencil_shadow, 1161 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, 1162 level, base_layer, layer_count, 1163 0, 0, level_width, level_height, 1164 stencil_color, 0 /* color_write_disable */); 1165 } 1166 } 1167 } 1168 1169 anv_blorp_batch_finish(&batch); 1170} 1171 1172VkResult 1173anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, 1174 uint32_t num_entries, 1175 uint32_t *state_offset, 1176 struct anv_state *bt_state) 1177{ 1178 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 1179 state_offset); 1180 if (bt_state->map == NULL) { 1181 /* We ran out of space. Grab a new binding table block. */ 1182 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); 1183 if (result != VK_SUCCESS) 1184 return result; 1185 1186 /* Re-emit state base addresses so we get the new surface state base 1187 * address before we start emitting binding tables etc. 1188 */ 1189 anv_cmd_buffer_emit_state_base_address(cmd_buffer); 1190 1191 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 1192 state_offset); 1193 assert(bt_state->map != NULL); 1194 } 1195 1196 return VK_SUCCESS; 1197} 1198 1199static VkResult 1200binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer, 1201 struct anv_state surface_state, 1202 uint32_t *bt_offset) 1203{ 1204 uint32_t state_offset; 1205 struct anv_state bt_state; 1206 1207 VkResult result = 1208 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset, 1209 &bt_state); 1210 if (result != VK_SUCCESS) 1211 return result; 1212 1213 uint32_t *bt_map = bt_state.map; 1214 bt_map[0] = surface_state.offset + state_offset; 1215 1216 *bt_offset = bt_state.offset; 1217 return VK_SUCCESS; 1218} 1219 1220static void 1221clear_color_attachment(struct anv_cmd_buffer *cmd_buffer, 1222 struct blorp_batch *batch, 1223 const VkClearAttachment *attachment, 1224 uint32_t rectCount, const VkClearRect *pRects) 1225{ 1226 const struct anv_subpass *subpass = cmd_buffer->state.subpass; 1227 const uint32_t color_att = attachment->colorAttachment; 1228 assert(color_att < subpass->color_count); 1229 const uint32_t att_idx = subpass->color_attachments[color_att].attachment; 1230 1231 if (att_idx == VK_ATTACHMENT_UNUSED) 1232 return; 1233 1234 struct anv_render_pass_attachment *pass_att = 1235 &cmd_buffer->state.pass->attachments[att_idx]; 1236 struct anv_attachment_state *att_state = 1237 &cmd_buffer->state.attachments[att_idx]; 1238 1239 uint32_t binding_table; 1240 VkResult result = 1241 binding_table_for_surface_state(cmd_buffer, att_state->color.state, 1242 &binding_table); 1243 if (result != VK_SUCCESS) 1244 return; 1245 1246 union isl_color_value clear_color = 1247 vk_to_isl_color(attachment->clearValue.color); 1248 1249 /* If multiview is enabled we ignore baseArrayLayer and layerCount */ 1250 if (subpass->view_mask) { 1251 u_foreach_bit(view_idx, subpass->view_mask) { 1252 for (uint32_t r = 0; r < rectCount; ++r) { 1253 const VkOffset2D offset = pRects[r].rect.offset; 1254 const VkExtent2D extent = pRects[r].rect.extent; 1255 blorp_clear_attachments(batch, binding_table, 1256 ISL_FORMAT_UNSUPPORTED, pass_att->samples, 1257 view_idx, 1, 1258 offset.x, offset.y, 1259 offset.x + extent.width, 1260 offset.y + extent.height, 1261 true, clear_color, false, 0.0f, 0, 0); 1262 } 1263 } 1264 return; 1265 } 1266 1267 for (uint32_t r = 0; r < rectCount; ++r) { 1268 const VkOffset2D offset = pRects[r].rect.offset; 1269 const VkExtent2D extent = pRects[r].rect.extent; 1270 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); 1271 blorp_clear_attachments(batch, binding_table, 1272 ISL_FORMAT_UNSUPPORTED, pass_att->samples, 1273 pRects[r].baseArrayLayer, 1274 pRects[r].layerCount, 1275 offset.x, offset.y, 1276 offset.x + extent.width, offset.y + extent.height, 1277 true, clear_color, false, 0.0f, 0, 0); 1278 } 1279} 1280 1281static void 1282clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, 1283 struct blorp_batch *batch, 1284 const VkClearAttachment *attachment, 1285 uint32_t rectCount, const VkClearRect *pRects) 1286{ 1287 static const union isl_color_value color_value = { .u32 = { 0, } }; 1288 const struct anv_subpass *subpass = cmd_buffer->state.subpass; 1289 if (!subpass->depth_stencil_attachment) 1290 return; 1291 1292 const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; 1293 assert(att_idx != VK_ATTACHMENT_UNUSED); 1294 struct anv_render_pass_attachment *pass_att = 1295 &cmd_buffer->state.pass->attachments[att_idx]; 1296 1297 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 1298 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 1299 1300 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED; 1301 if (clear_depth) { 1302 depth_format = anv_get_isl_format(&cmd_buffer->device->info, 1303 pass_att->format, 1304 VK_IMAGE_ASPECT_DEPTH_BIT, 1305 VK_IMAGE_TILING_OPTIMAL); 1306 } 1307 1308 uint32_t binding_table; 1309 VkResult result = 1310 binding_table_for_surface_state(cmd_buffer, 1311 cmd_buffer->state.null_surface_state, 1312 &binding_table); 1313 if (result != VK_SUCCESS) 1314 return; 1315 1316 /* If multiview is enabled we ignore baseArrayLayer and layerCount */ 1317 if (subpass->view_mask) { 1318 u_foreach_bit(view_idx, subpass->view_mask) { 1319 for (uint32_t r = 0; r < rectCount; ++r) { 1320 const VkOffset2D offset = pRects[r].rect.offset; 1321 const VkExtent2D extent = pRects[r].rect.extent; 1322 VkClearDepthStencilValue value = attachment->clearValue.depthStencil; 1323 blorp_clear_attachments(batch, binding_table, 1324 depth_format, pass_att->samples, 1325 view_idx, 1, 1326 offset.x, offset.y, 1327 offset.x + extent.width, 1328 offset.y + extent.height, 1329 false, color_value, 1330 clear_depth, value.depth, 1331 clear_stencil ? 0xff : 0, value.stencil); 1332 } 1333 } 1334 return; 1335 } 1336 1337 for (uint32_t r = 0; r < rectCount; ++r) { 1338 const VkOffset2D offset = pRects[r].rect.offset; 1339 const VkExtent2D extent = pRects[r].rect.extent; 1340 VkClearDepthStencilValue value = attachment->clearValue.depthStencil; 1341 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); 1342 blorp_clear_attachments(batch, binding_table, 1343 depth_format, pass_att->samples, 1344 pRects[r].baseArrayLayer, 1345 pRects[r].layerCount, 1346 offset.x, offset.y, 1347 offset.x + extent.width, offset.y + extent.height, 1348 false, color_value, 1349 clear_depth, value.depth, 1350 clear_stencil ? 0xff : 0, value.stencil); 1351 } 1352} 1353 1354void anv_CmdClearAttachments( 1355 VkCommandBuffer commandBuffer, 1356 uint32_t attachmentCount, 1357 const VkClearAttachment* pAttachments, 1358 uint32_t rectCount, 1359 const VkClearRect* pRects) 1360{ 1361 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1362 1363 /* Because this gets called within a render pass, we tell blorp not to 1364 * trash our depth and stencil buffers. 1365 */ 1366 struct blorp_batch batch; 1367 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; 1368 if (cmd_buffer->state.conditional_render_enabled) { 1369 anv_cmd_emit_conditional_render_predicate(cmd_buffer); 1370 flags |= BLORP_BATCH_PREDICATE_ENABLE; 1371 } 1372 anv_blorp_batch_init(cmd_buffer, &batch, flags); 1373 1374 for (uint32_t a = 0; a < attachmentCount; ++a) { 1375 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { 1376 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 1377 clear_color_attachment(cmd_buffer, &batch, 1378 &pAttachments[a], 1379 rectCount, pRects); 1380 } else { 1381 clear_depth_stencil_attachment(cmd_buffer, &batch, 1382 &pAttachments[a], 1383 rectCount, pRects); 1384 } 1385 } 1386 1387 anv_blorp_batch_finish(&batch); 1388} 1389 1390enum subpass_stage { 1391 SUBPASS_STAGE_LOAD, 1392 SUBPASS_STAGE_DRAW, 1393 SUBPASS_STAGE_RESOLVE, 1394}; 1395 1396void 1397anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, 1398 const struct anv_image *src_image, 1399 enum isl_aux_usage src_aux_usage, 1400 uint32_t src_level, uint32_t src_base_layer, 1401 const struct anv_image *dst_image, 1402 enum isl_aux_usage dst_aux_usage, 1403 uint32_t dst_level, uint32_t dst_base_layer, 1404 VkImageAspectFlagBits aspect, 1405 uint32_t src_x, uint32_t src_y, 1406 uint32_t dst_x, uint32_t dst_y, 1407 uint32_t width, uint32_t height, 1408 uint32_t layer_count, 1409 enum blorp_filter filter) 1410{ 1411 struct blorp_batch batch; 1412 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1413 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1414 1415 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D); 1416 assert(src_image->vk.samples > 1); 1417 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D); 1418 assert(dst_image->vk.samples == 1); 1419 assert(src_image->n_planes == dst_image->n_planes); 1420 1421 struct blorp_surf src_surf, dst_surf; 1422 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect, 1423 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 1424 ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1425 src_aux_usage, &src_surf); 1426 if (src_aux_usage == ISL_AUX_USAGE_MCS) { 1427 src_surf.clear_color_addr = anv_to_blorp_address( 1428 anv_image_get_clear_color_addr(cmd_buffer->device, src_image, 1429 VK_IMAGE_ASPECT_COLOR_BIT)); 1430 } 1431 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect, 1432 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1433 ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1434 dst_aux_usage, &dst_surf); 1435 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 1436 aspect, dst_aux_usage, 1437 dst_level, dst_base_layer, layer_count); 1438 1439 if (filter == BLORP_FILTER_NONE) { 1440 /* If no explicit filter is provided, then it's implied by the type of 1441 * the source image. 1442 */ 1443 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) || 1444 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) || 1445 isl_format_has_int_channel(src_surf.surf->format)) { 1446 filter = BLORP_FILTER_SAMPLE_0; 1447 } else { 1448 filter = BLORP_FILTER_AVERAGE; 1449 } 1450 } 1451 1452 for (uint32_t l = 0; l < layer_count; l++) { 1453 blorp_blit(&batch, 1454 &src_surf, src_level, src_base_layer + l, 1455 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1456 &dst_surf, dst_level, dst_base_layer + l, 1457 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1458 src_x, src_y, src_x + width, src_y + height, 1459 dst_x, dst_y, dst_x + width, dst_y + height, 1460 filter, false, false); 1461 } 1462 1463 anv_blorp_batch_finish(&batch); 1464} 1465 1466static void 1467resolve_image(struct anv_cmd_buffer *cmd_buffer, 1468 struct anv_image *src_image, 1469 VkImageLayout src_image_layout, 1470 struct anv_image *dst_image, 1471 VkImageLayout dst_image_layout, 1472 const VkImageResolve2KHR *region) 1473{ 1474 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask); 1475 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) == 1476 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource)); 1477 1478 const uint32_t layer_count = 1479 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource); 1480 1481 anv_foreach_image_aspect_bit(aspect_bit, src_image, 1482 region->srcSubresource.aspectMask) { 1483 enum isl_aux_usage src_aux_usage = 1484 anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image, 1485 (1 << aspect_bit), 1486 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 1487 src_image_layout); 1488 enum isl_aux_usage dst_aux_usage = 1489 anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image, 1490 (1 << aspect_bit), 1491 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1492 dst_image_layout); 1493 1494 anv_image_msaa_resolve(cmd_buffer, 1495 src_image, src_aux_usage, 1496 region->srcSubresource.mipLevel, 1497 region->srcSubresource.baseArrayLayer, 1498 dst_image, dst_aux_usage, 1499 region->dstSubresource.mipLevel, 1500 region->dstSubresource.baseArrayLayer, 1501 (1 << aspect_bit), 1502 region->srcOffset.x, 1503 region->srcOffset.y, 1504 region->dstOffset.x, 1505 region->dstOffset.y, 1506 region->extent.width, 1507 region->extent.height, 1508 layer_count, BLORP_FILTER_NONE); 1509 } 1510} 1511 1512void anv_CmdResolveImage2KHR( 1513 VkCommandBuffer commandBuffer, 1514 const VkResolveImageInfo2KHR* pResolveImageInfo) 1515{ 1516 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1517 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage); 1518 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage); 1519 1520 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) { 1521 resolve_image(cmd_buffer, 1522 src_image, pResolveImageInfo->srcImageLayout, 1523 dst_image, pResolveImageInfo->dstImageLayout, 1524 &pResolveImageInfo->pRegions[r]); 1525 } 1526} 1527 1528void 1529anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, 1530 const struct anv_image *image, 1531 VkImageAspectFlagBits aspect, 1532 uint32_t base_level, uint32_t level_count, 1533 uint32_t base_layer, uint32_t layer_count) 1534{ 1535 struct blorp_batch batch; 1536 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1537 1538 /* We don't know who touched the main surface last so flush a bunch of 1539 * caches to ensure we get good data. 1540 */ 1541 anv_add_pending_pipe_bits(cmd_buffer, 1542 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | 1543 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | 1544 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 1545 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, 1546 "before copy_to_shadow"); 1547 1548 struct blorp_surf surf; 1549 get_blorp_surf_for_anv_image(cmd_buffer->device, 1550 image, aspect, 1551 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 1552 VK_IMAGE_LAYOUT_GENERAL, 1553 ISL_AUX_USAGE_NONE, &surf); 1554 assert(surf.aux_usage == ISL_AUX_USAGE_NONE); 1555 1556 struct blorp_surf shadow_surf; 1557 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, 1558 image, aspect, &shadow_surf); 1559 1560 for (uint32_t l = 0; l < level_count; l++) { 1561 const uint32_t level = base_level + l; 1562 1563 const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level); 1564 1565 if (image->vk.image_type == VK_IMAGE_TYPE_3D) 1566 layer_count = extent.depth; 1567 1568 for (uint32_t a = 0; a < layer_count; a++) { 1569 const uint32_t layer = base_layer + a; 1570 1571 blorp_copy(&batch, &surf, level, layer, 1572 &shadow_surf, level, layer, 1573 0, 0, 0, 0, extent.width, extent.height); 1574 } 1575 } 1576 1577 /* We just wrote to the buffer with the render cache. Flush it. */ 1578 anv_add_pending_pipe_bits(cmd_buffer, 1579 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, 1580 "after copy_to_shadow"); 1581 1582 anv_blorp_batch_finish(&batch); 1583} 1584 1585void 1586anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, 1587 const struct anv_image *image, 1588 VkImageAspectFlagBits aspect, 1589 enum isl_aux_usage aux_usage, 1590 enum isl_format format, struct isl_swizzle swizzle, 1591 uint32_t level, uint32_t base_layer, uint32_t layer_count, 1592 VkRect2D area, union isl_color_value clear_color) 1593{ 1594 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1595 1596 /* We don't support planar images with multisampling yet */ 1597 assert(image->n_planes == 1); 1598 1599 struct blorp_batch batch; 1600 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1601 1602 struct blorp_surf surf; 1603 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1604 VK_IMAGE_USAGE_TRANSFER_DST_BIT, 1605 ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1606 aux_usage, &surf); 1607 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage, 1608 level, base_layer, layer_count); 1609 1610 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle), 1611 level, base_layer, layer_count, 1612 area.offset.x, area.offset.y, 1613 area.offset.x + area.extent.width, 1614 area.offset.y + area.extent.height, 1615 clear_color, 0 /* color_write_disable */); 1616 1617 anv_blorp_batch_finish(&batch); 1618} 1619 1620void 1621anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, 1622 const struct anv_image *image, 1623 VkImageAspectFlags aspects, 1624 enum isl_aux_usage depth_aux_usage, 1625 uint32_t level, 1626 uint32_t base_layer, uint32_t layer_count, 1627 VkRect2D area, 1628 float depth_value, uint8_t stencil_value) 1629{ 1630 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 1631 VK_IMAGE_ASPECT_STENCIL_BIT)); 1632 1633 struct blorp_batch batch; 1634 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1635 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1636 1637 struct blorp_surf depth = {}; 1638 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 1639 get_blorp_surf_for_anv_image(cmd_buffer->device, 1640 image, VK_IMAGE_ASPECT_DEPTH_BIT, 1641 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1642 depth_aux_usage, &depth); 1643 } 1644 1645 struct blorp_surf stencil = {}; 1646 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1647 const uint32_t plane = 1648 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT); 1649 get_blorp_surf_for_anv_image(cmd_buffer->device, 1650 image, VK_IMAGE_ASPECT_STENCIL_BIT, 1651 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1652 image->planes[plane].aux_usage, &stencil); 1653 } 1654 1655 /* Blorp may choose to clear stencil using RGBA32_UINT for better 1656 * performance. If it does this, we need to flush it out of the depth 1657 * cache before rendering to it. 1658 */ 1659 anv_add_pending_pipe_bits(cmd_buffer, 1660 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | 1661 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 1662 "before clear DS"); 1663 1664 blorp_clear_depth_stencil(&batch, &depth, &stencil, 1665 level, base_layer, layer_count, 1666 area.offset.x, area.offset.y, 1667 area.offset.x + area.extent.width, 1668 area.offset.y + area.extent.height, 1669 aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1670 depth_value, 1671 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0, 1672 stencil_value); 1673 1674 /* Blorp may choose to clear stencil using RGBA32_UINT for better 1675 * performance. If it does this, we need to flush it out of the render 1676 * cache before someone starts trying to do stencil on it. 1677 */ 1678 anv_add_pending_pipe_bits(cmd_buffer, 1679 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 1680 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 1681 "after clear DS"); 1682 1683 struct blorp_surf stencil_shadow; 1684 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 1685 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, 1686 VK_IMAGE_ASPECT_STENCIL_BIT, 1687 &stencil_shadow)) { 1688 union isl_color_value stencil_color = { 1689 .u32 = { stencil_value }, 1690 }; 1691 blorp_clear(&batch, &stencil_shadow, 1692 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, 1693 level, base_layer, layer_count, 1694 area.offset.x, area.offset.y, 1695 area.offset.x + area.extent.width, 1696 area.offset.y + area.extent.height, 1697 stencil_color, 0 /* color_write_disable */); 1698 } 1699 1700 anv_blorp_batch_finish(&batch); 1701} 1702 1703void 1704anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, 1705 const struct anv_image *image, 1706 VkImageAspectFlagBits aspect, uint32_t level, 1707 uint32_t base_layer, uint32_t layer_count, 1708 enum isl_aux_op hiz_op) 1709{ 1710 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT); 1711 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level)); 1712 const uint32_t plane = anv_image_aspect_to_plane(image, aspect); 1713 assert(plane == 0); 1714 1715 struct blorp_batch batch; 1716 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1717 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1718 1719 struct blorp_surf surf; 1720 get_blorp_surf_for_anv_image(cmd_buffer->device, 1721 image, VK_IMAGE_ASPECT_DEPTH_BIT, 1722 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1723 image->planes[plane].aux_usage, &surf); 1724 1725 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op); 1726 1727 anv_blorp_batch_finish(&batch); 1728} 1729 1730void 1731anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, 1732 const struct anv_image *image, 1733 VkImageAspectFlags aspects, 1734 uint32_t level, 1735 uint32_t base_layer, uint32_t layer_count, 1736 VkRect2D area, uint8_t stencil_value) 1737{ 1738 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 1739 VK_IMAGE_ASPECT_STENCIL_BIT)); 1740 1741 struct blorp_batch batch; 1742 anv_blorp_batch_init(cmd_buffer, &batch, 0); 1743 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1744 1745 struct blorp_surf depth = {}; 1746 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 1747 const uint32_t plane = 1748 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT); 1749 assert(base_layer + layer_count <= 1750 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level)); 1751 get_blorp_surf_for_anv_image(cmd_buffer->device, 1752 image, VK_IMAGE_ASPECT_DEPTH_BIT, 1753 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1754 image->planes[plane].aux_usage, &depth); 1755 } 1756 1757 struct blorp_surf stencil = {}; 1758 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1759 const uint32_t plane = 1760 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT); 1761 get_blorp_surf_for_anv_image(cmd_buffer->device, 1762 image, VK_IMAGE_ASPECT_STENCIL_BIT, 1763 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1764 image->planes[plane].aux_usage, &stencil); 1765 } 1766 1767 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear": 1768 * 1769 * "The following is required when performing a depth buffer clear with 1770 * using the WM_STATE or 3DSTATE_WM: 1771 * 1772 * * If other rendering operations have preceded this clear, a 1773 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 1774 * enabled must be issued before the rectangle primitive used for 1775 * the depth buffer clear operation. 1776 * * [...]" 1777 * 1778 * Even though the PRM only says that this is required if using 3DSTATE_WM 1779 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional 1780 * hangs when doing a clear with WM_HZ_OP. 1781 */ 1782 anv_add_pending_pipe_bits(cmd_buffer, 1783 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | 1784 ANV_PIPE_DEPTH_STALL_BIT, 1785 "before clear hiz"); 1786 1787 if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 1788 depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) { 1789 /* From Bspec 47010 (Depth Buffer Clear): 1790 * 1791 * Since the fast clear cycles to CCS are not cached in TileCache, 1792 * any previous depth buffer writes to overlapping pixels must be 1793 * flushed out of TileCache before a succeeding Depth Buffer Clear. 1794 * This restriction only applies to Depth Buffer with write-thru 1795 * enabled, since fast clears to CCS only occur for write-thru mode. 1796 * 1797 * There may have been a write to this depth buffer. Flush it from the 1798 * tile cache just in case. 1799 */ 1800 anv_add_pending_pipe_bits(cmd_buffer, 1801 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | 1802 ANV_PIPE_TILE_CACHE_FLUSH_BIT, 1803 "before clear hiz_ccs_wt"); 1804 } 1805 1806 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, 1807 level, base_layer, layer_count, 1808 area.offset.x, area.offset.y, 1809 area.offset.x + area.extent.width, 1810 area.offset.y + area.extent.height, 1811 aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1812 ANV_HZ_FC_VAL, 1813 aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 1814 stencil_value); 1815 1816 anv_blorp_batch_finish(&batch); 1817 1818 /* From the SKL PRM, Depth Buffer Clear: 1819 * 1820 * "Depth Buffer Clear Workaround 1821 * 1822 * Depth buffer clear pass using any of the methods (WM_STATE, 1823 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL 1824 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before 1825 * starting to render. DepthStall and DepthFlush are not needed between 1826 * consecutive depth clear passes nor is it required if the depth-clear 1827 * pass was done with “full_surf_clear” bit set in the 1828 * 3DSTATE_WM_HZ_OP." 1829 * 1830 * Even though the PRM provides a bunch of conditions under which this is 1831 * supposedly unnecessary, we choose to perform the flush unconditionally 1832 * just to be safe. 1833 */ 1834 anv_add_pending_pipe_bits(cmd_buffer, 1835 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | 1836 ANV_PIPE_DEPTH_STALL_BIT, 1837 "after clear hiz"); 1838} 1839 1840void 1841anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, 1842 const struct anv_image *image, 1843 enum isl_format format, struct isl_swizzle swizzle, 1844 VkImageAspectFlagBits aspect, 1845 uint32_t base_layer, uint32_t layer_count, 1846 enum isl_aux_op mcs_op, union isl_color_value *clear_value, 1847 bool predicate) 1848{ 1849 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1850 assert(image->vk.samples > 1); 1851 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0)); 1852 1853 /* Multisampling with multi-planar formats is not supported */ 1854 assert(image->n_planes == 1); 1855 1856 struct blorp_batch batch; 1857 anv_blorp_batch_init(cmd_buffer, &batch, 1858 BLORP_BATCH_PREDICATE_ENABLE * predicate + 1859 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); 1860 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1861 1862 struct blorp_surf surf; 1863 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1864 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1865 ISL_AUX_USAGE_MCS, &surf); 1866 1867 /* Blorp will store the clear color for us if we provide the clear color 1868 * address and we are doing a fast clear. So we save the clear value into 1869 * the blorp surface. 1870 */ 1871 if (clear_value) 1872 surf.clear_color = *clear_value; 1873 1874 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": 1875 * 1876 * "After Render target fast clear, pipe-control with color cache 1877 * write-flush must be issued before sending any DRAW commands on 1878 * that render target." 1879 * 1880 * This comment is a bit cryptic and doesn't really tell you what's going 1881 * or what's really needed. It appears that fast clear ops are not 1882 * properly synchronized with other drawing. This means that we cannot 1883 * have a fast clear operation in the pipe at the same time as other 1884 * regular drawing operations. We need to use a PIPE_CONTROL to ensure 1885 * that the contents of the previous draw hit the render target before we 1886 * resolve and then use a second PIPE_CONTROL after the resolve to ensure 1887 * that it is completed before any additional drawing occurs. 1888 */ 1889 anv_add_pending_pipe_bits(cmd_buffer, 1890 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 1891 ANV_PIPE_TILE_CACHE_FLUSH_BIT | 1892 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 1893 "before fast clear mcs"); 1894 1895 switch (mcs_op) { 1896 case ISL_AUX_OP_FAST_CLEAR: 1897 blorp_fast_clear(&batch, &surf, format, swizzle, 1898 0, base_layer, layer_count, 1899 0, 0, image->vk.extent.width, image->vk.extent.height); 1900 break; 1901 case ISL_AUX_OP_PARTIAL_RESOLVE: 1902 blorp_mcs_partial_resolve(&batch, &surf, format, 1903 base_layer, layer_count); 1904 break; 1905 case ISL_AUX_OP_FULL_RESOLVE: 1906 case ISL_AUX_OP_AMBIGUATE: 1907 default: 1908 unreachable("Unsupported MCS operation"); 1909 } 1910 1911 anv_add_pending_pipe_bits(cmd_buffer, 1912 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 1913 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 1914 "after fast clear mcs"); 1915 1916 anv_blorp_batch_finish(&batch); 1917} 1918 1919void 1920anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, 1921 const struct anv_image *image, 1922 enum isl_format format, struct isl_swizzle swizzle, 1923 VkImageAspectFlagBits aspect, uint32_t level, 1924 uint32_t base_layer, uint32_t layer_count, 1925 enum isl_aux_op ccs_op, union isl_color_value *clear_value, 1926 bool predicate) 1927{ 1928 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); 1929 assert(image->vk.samples == 1); 1930 assert(level < anv_image_aux_levels(image, aspect)); 1931 /* Multi-LOD YcBcR is not allowed */ 1932 assert(image->n_planes == 1 || level == 0); 1933 assert(base_layer + layer_count <= 1934 anv_image_aux_layers(image, aspect, level)); 1935 1936 const uint32_t plane = anv_image_aspect_to_plane(image, aspect); 1937 1938 struct blorp_batch batch; 1939 anv_blorp_batch_init(cmd_buffer, &batch, 1940 BLORP_BATCH_PREDICATE_ENABLE * predicate + 1941 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); 1942 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0); 1943 1944 struct blorp_surf surf; 1945 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1946 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1947 image->planes[plane].aux_usage, 1948 &surf); 1949 1950 uint32_t level_width = anv_minify(surf.surf->logical_level0_px.w, level); 1951 uint32_t level_height = anv_minify(surf.surf->logical_level0_px.h, level); 1952 1953 /* Blorp will store the clear color for us if we provide the clear color 1954 * address and we are doing a fast clear. So we save the clear value into 1955 * the blorp surface. 1956 */ 1957 if (clear_value) 1958 surf.clear_color = *clear_value; 1959 1960 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": 1961 * 1962 * "After Render target fast clear, pipe-control with color cache 1963 * write-flush must be issued before sending any DRAW commands on 1964 * that render target." 1965 * 1966 * This comment is a bit cryptic and doesn't really tell you what's going 1967 * or what's really needed. It appears that fast clear ops are not 1968 * properly synchronized with other drawing. This means that we cannot 1969 * have a fast clear operation in the pipe at the same time as other 1970 * regular drawing operations. We need to use a PIPE_CONTROL to ensure 1971 * that the contents of the previous draw hit the render target before we 1972 * resolve and then use a second PIPE_CONTROL after the resolve to ensure 1973 * that it is completed before any additional drawing occurs. 1974 */ 1975 anv_add_pending_pipe_bits(cmd_buffer, 1976 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 1977 ANV_PIPE_TILE_CACHE_FLUSH_BIT | 1978 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 1979 "before fast clear ccs"); 1980 1981 switch (ccs_op) { 1982 case ISL_AUX_OP_FAST_CLEAR: 1983 blorp_fast_clear(&batch, &surf, format, swizzle, 1984 level, base_layer, layer_count, 1985 0, 0, level_width, level_height); 1986 break; 1987 case ISL_AUX_OP_FULL_RESOLVE: 1988 case ISL_AUX_OP_PARTIAL_RESOLVE: 1989 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count, 1990 format, ccs_op); 1991 break; 1992 case ISL_AUX_OP_AMBIGUATE: 1993 for (uint32_t a = 0; a < layer_count; a++) { 1994 const uint32_t layer = base_layer + a; 1995 blorp_ccs_ambiguate(&batch, &surf, level, layer); 1996 } 1997 break; 1998 default: 1999 unreachable("Unsupported CCS operation"); 2000 } 2001 2002 anv_add_pending_pipe_bits(cmd_buffer, 2003 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | 2004 ANV_PIPE_END_OF_PIPE_SYNC_BIT, 2005 "after fast clear ccs"); 2006 2007 anv_blorp_batch_finish(&batch); 2008} 2009