1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "anv_private.h" 25b8e80941Smrg 26b8e80941Smrgstatic bool 27b8e80941Smrglookup_blorp_shader(struct blorp_batch *batch, 28b8e80941Smrg const void *key, uint32_t key_size, 29b8e80941Smrg uint32_t *kernel_out, void *prog_data_out) 30b8e80941Smrg{ 31b8e80941Smrg struct blorp_context *blorp = batch->blorp; 32b8e80941Smrg struct anv_device *device = blorp->driver_ctx; 33b8e80941Smrg 34b8e80941Smrg /* The default cache must be a real cache */ 35b8e80941Smrg assert(device->default_pipeline_cache.cache); 36b8e80941Smrg 37b8e80941Smrg struct anv_shader_bin *bin = 38b8e80941Smrg anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size); 39b8e80941Smrg if (!bin) 40b8e80941Smrg return false; 41b8e80941Smrg 42b8e80941Smrg /* The cache already has a reference and it's not going anywhere so there 43b8e80941Smrg * is no need to hold a second reference. 44b8e80941Smrg */ 45b8e80941Smrg anv_shader_bin_unref(device, bin); 46b8e80941Smrg 47b8e80941Smrg *kernel_out = bin->kernel.offset; 48b8e80941Smrg *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 49b8e80941Smrg 50b8e80941Smrg return true; 51b8e80941Smrg} 52b8e80941Smrg 53b8e80941Smrgstatic bool 54b8e80941Smrgupload_blorp_shader(struct blorp_batch *batch, 55b8e80941Smrg const void *key, uint32_t key_size, 56b8e80941Smrg const void *kernel, uint32_t kernel_size, 57b8e80941Smrg const struct brw_stage_prog_data *prog_data, 58b8e80941Smrg uint32_t prog_data_size, 59b8e80941Smrg uint32_t *kernel_out, void *prog_data_out) 60b8e80941Smrg{ 61b8e80941Smrg struct blorp_context *blorp = batch->blorp; 62b8e80941Smrg struct anv_device *device = blorp->driver_ctx; 63b8e80941Smrg 64b8e80941Smrg /* The blorp cache must be a real cache */ 65b8e80941Smrg assert(device->default_pipeline_cache.cache); 66b8e80941Smrg 67b8e80941Smrg struct anv_pipeline_bind_map bind_map = { 68b8e80941Smrg .surface_count = 0, 69b8e80941Smrg .sampler_count = 0, 70b8e80941Smrg }; 71b8e80941Smrg 72b8e80941Smrg struct anv_shader_bin *bin = 73b8e80941Smrg anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, 74b8e80941Smrg key, key_size, kernel, kernel_size, 75b8e80941Smrg NULL, 0, 76b8e80941Smrg prog_data, prog_data_size, 77b8e80941Smrg NULL, &bind_map); 78b8e80941Smrg 79b8e80941Smrg if (!bin) 80b8e80941Smrg return false; 81b8e80941Smrg 82b8e80941Smrg /* The cache already has a reference and it's not going anywhere so there 83b8e80941Smrg * is no need to hold a second reference. 84b8e80941Smrg */ 85b8e80941Smrg anv_shader_bin_unref(device, bin); 86b8e80941Smrg 87b8e80941Smrg *kernel_out = bin->kernel.offset; 88b8e80941Smrg *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data; 89b8e80941Smrg 90b8e80941Smrg return true; 91b8e80941Smrg} 92b8e80941Smrg 93b8e80941Smrgvoid 94b8e80941Smrganv_device_init_blorp(struct anv_device *device) 95b8e80941Smrg{ 96b8e80941Smrg blorp_init(&device->blorp, device, &device->isl_dev); 97b8e80941Smrg device->blorp.compiler = device->instance->physicalDevice.compiler; 98b8e80941Smrg device->blorp.lookup_shader = lookup_blorp_shader; 99b8e80941Smrg device->blorp.upload_shader = upload_blorp_shader; 100b8e80941Smrg switch (device->info.gen) { 101b8e80941Smrg case 7: 102b8e80941Smrg if (device->info.is_haswell) { 103b8e80941Smrg device->blorp.exec = gen75_blorp_exec; 104b8e80941Smrg } else { 105b8e80941Smrg device->blorp.exec = gen7_blorp_exec; 106b8e80941Smrg } 107b8e80941Smrg break; 108b8e80941Smrg case 8: 109b8e80941Smrg device->blorp.exec = gen8_blorp_exec; 110b8e80941Smrg break; 111b8e80941Smrg case 9: 112b8e80941Smrg device->blorp.exec = gen9_blorp_exec; 113b8e80941Smrg break; 114b8e80941Smrg case 10: 115b8e80941Smrg device->blorp.exec = gen10_blorp_exec; 116b8e80941Smrg break; 117b8e80941Smrg case 11: 118b8e80941Smrg device->blorp.exec = gen11_blorp_exec; 119b8e80941Smrg break; 120b8e80941Smrg default: 121b8e80941Smrg unreachable("Unknown hardware generation"); 122b8e80941Smrg } 123b8e80941Smrg} 124b8e80941Smrg 125b8e80941Smrgvoid 126b8e80941Smrganv_device_finish_blorp(struct anv_device *device) 127b8e80941Smrg{ 128b8e80941Smrg blorp_finish(&device->blorp); 129b8e80941Smrg} 130b8e80941Smrg 131b8e80941Smrgstatic void 132b8e80941Smrgget_blorp_surf_for_anv_buffer(struct anv_device *device, 133b8e80941Smrg struct anv_buffer *buffer, uint64_t offset, 134b8e80941Smrg uint32_t width, uint32_t height, 135b8e80941Smrg uint32_t row_pitch, enum isl_format format, 136b8e80941Smrg struct blorp_surf *blorp_surf, 137b8e80941Smrg struct isl_surf *isl_surf) 138b8e80941Smrg{ 139b8e80941Smrg const struct isl_format_layout *fmtl = 140b8e80941Smrg isl_format_get_layout(format); 141b8e80941Smrg bool ok UNUSED; 142b8e80941Smrg 143b8e80941Smrg /* ASTC is the only format which doesn't support linear layouts. 144b8e80941Smrg * Create an equivalently sized surface with ISL to get around this. 145b8e80941Smrg */ 146b8e80941Smrg if (fmtl->txc == ISL_TXC_ASTC) { 147b8e80941Smrg /* Use an equivalently sized format */ 148b8e80941Smrg format = ISL_FORMAT_R32G32B32A32_UINT; 149b8e80941Smrg assert(fmtl->bpb == isl_format_get_layout(format)->bpb); 150b8e80941Smrg 151b8e80941Smrg /* Shrink the dimensions for the new format */ 152b8e80941Smrg width = DIV_ROUND_UP(width, fmtl->bw); 153b8e80941Smrg height = DIV_ROUND_UP(height, fmtl->bh); 154b8e80941Smrg } 155b8e80941Smrg 156b8e80941Smrg *blorp_surf = (struct blorp_surf) { 157b8e80941Smrg .surf = isl_surf, 158b8e80941Smrg .addr = { 159b8e80941Smrg .buffer = buffer->address.bo, 160b8e80941Smrg .offset = buffer->address.offset + offset, 161b8e80941Smrg .mocs = anv_mocs_for_bo(device, buffer->address.bo), 162b8e80941Smrg }, 163b8e80941Smrg }; 164b8e80941Smrg 165b8e80941Smrg ok = isl_surf_init(&device->isl_dev, isl_surf, 166b8e80941Smrg .dim = ISL_SURF_DIM_2D, 167b8e80941Smrg .format = format, 168b8e80941Smrg .width = width, 169b8e80941Smrg .height = height, 170b8e80941Smrg .depth = 1, 171b8e80941Smrg .levels = 1, 172b8e80941Smrg .array_len = 1, 173b8e80941Smrg .samples = 1, 174b8e80941Smrg .row_pitch_B = row_pitch, 175b8e80941Smrg .usage = ISL_SURF_USAGE_TEXTURE_BIT | 176b8e80941Smrg ISL_SURF_USAGE_RENDER_TARGET_BIT, 177b8e80941Smrg .tiling_flags = ISL_TILING_LINEAR_BIT); 178b8e80941Smrg assert(ok); 179b8e80941Smrg} 180b8e80941Smrg 181b8e80941Smrg/* Pick something high enough that it won't be used in core and low enough it 182b8e80941Smrg * will never map to an extension. 183b8e80941Smrg */ 184b8e80941Smrg#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000 185b8e80941Smrg 186b8e80941Smrgstatic struct blorp_address 187b8e80941Smrganv_to_blorp_address(struct anv_address addr) 188b8e80941Smrg{ 189b8e80941Smrg return (struct blorp_address) { 190b8e80941Smrg .buffer = addr.bo, 191b8e80941Smrg .offset = addr.offset, 192b8e80941Smrg }; 193b8e80941Smrg} 194b8e80941Smrg 195b8e80941Smrgstatic void 196b8e80941Smrgget_blorp_surf_for_anv_image(const struct anv_device *device, 197b8e80941Smrg const struct anv_image *image, 198b8e80941Smrg VkImageAspectFlags aspect, 199b8e80941Smrg VkImageLayout layout, 200b8e80941Smrg enum isl_aux_usage aux_usage, 201b8e80941Smrg struct blorp_surf *blorp_surf) 202b8e80941Smrg{ 203b8e80941Smrg uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); 204b8e80941Smrg 205b8e80941Smrg if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) 206b8e80941Smrg aux_usage = anv_layout_to_aux_usage(&device->info, image, aspect, layout); 207b8e80941Smrg 208b8e80941Smrg const struct anv_surface *surface = &image->planes[plane].surface; 209b8e80941Smrg *blorp_surf = (struct blorp_surf) { 210b8e80941Smrg .surf = &surface->isl, 211b8e80941Smrg .addr = { 212b8e80941Smrg .buffer = image->planes[plane].address.bo, 213b8e80941Smrg .offset = image->planes[plane].address.offset + surface->offset, 214b8e80941Smrg .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), 215b8e80941Smrg }, 216b8e80941Smrg }; 217b8e80941Smrg 218b8e80941Smrg if (aux_usage != ISL_AUX_USAGE_NONE) { 219b8e80941Smrg const struct anv_surface *aux_surface = &image->planes[plane].aux_surface; 220b8e80941Smrg blorp_surf->aux_surf = &aux_surface->isl, 221b8e80941Smrg blorp_surf->aux_addr = (struct blorp_address) { 222b8e80941Smrg .buffer = image->planes[plane].address.bo, 223b8e80941Smrg .offset = image->planes[plane].address.offset + aux_surface->offset, 224b8e80941Smrg .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), 225b8e80941Smrg }; 226b8e80941Smrg blorp_surf->aux_usage = aux_usage; 227b8e80941Smrg 228b8e80941Smrg /* If we're doing a partial resolve, then we need the indirect clear 229b8e80941Smrg * color. If we are doing a fast clear and want to store/update the 230b8e80941Smrg * clear color, we also pass the address to blorp, otherwise it will only 231b8e80941Smrg * stomp the CCS to a particular value and won't care about format or 232b8e80941Smrg * clear value 233b8e80941Smrg */ 234b8e80941Smrg if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { 235b8e80941Smrg const struct anv_address clear_color_addr = 236b8e80941Smrg anv_image_get_clear_color_addr(device, image, aspect); 237b8e80941Smrg blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); 238b8e80941Smrg } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT 239b8e80941Smrg && device->info.gen >= 10) { 240b8e80941Smrg /* Vulkan always clears to 1.0. On gen < 10, we set that directly in 241b8e80941Smrg * the state packet. For gen >= 10, must provide the clear value in a 242b8e80941Smrg * buffer. We have a single global buffer that stores the 1.0 value. 243b8e80941Smrg */ 244b8e80941Smrg const struct anv_address clear_color_addr = (struct anv_address) { 245b8e80941Smrg .bo = (struct anv_bo *)&device->hiz_clear_bo 246b8e80941Smrg }; 247b8e80941Smrg blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); 248b8e80941Smrg } 249b8e80941Smrg } 250b8e80941Smrg} 251b8e80941Smrg 252b8e80941Smrgvoid anv_CmdCopyImage( 253b8e80941Smrg VkCommandBuffer commandBuffer, 254b8e80941Smrg VkImage srcImage, 255b8e80941Smrg VkImageLayout srcImageLayout, 256b8e80941Smrg VkImage dstImage, 257b8e80941Smrg VkImageLayout dstImageLayout, 258b8e80941Smrg uint32_t regionCount, 259b8e80941Smrg const VkImageCopy* pRegions) 260b8e80941Smrg{ 261b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 262b8e80941Smrg ANV_FROM_HANDLE(anv_image, src_image, srcImage); 263b8e80941Smrg ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 264b8e80941Smrg 265b8e80941Smrg struct blorp_batch batch; 266b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 267b8e80941Smrg 268b8e80941Smrg for (unsigned r = 0; r < regionCount; r++) { 269b8e80941Smrg VkOffset3D srcOffset = 270b8e80941Smrg anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset); 271b8e80941Smrg VkOffset3D dstOffset = 272b8e80941Smrg anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset); 273b8e80941Smrg VkExtent3D extent = 274b8e80941Smrg anv_sanitize_image_extent(src_image->type, pRegions[r].extent); 275b8e80941Smrg 276b8e80941Smrg const uint32_t dst_level = pRegions[r].dstSubresource.mipLevel; 277b8e80941Smrg unsigned dst_base_layer, layer_count; 278b8e80941Smrg if (dst_image->type == VK_IMAGE_TYPE_3D) { 279b8e80941Smrg dst_base_layer = pRegions[r].dstOffset.z; 280b8e80941Smrg layer_count = pRegions[r].extent.depth; 281b8e80941Smrg } else { 282b8e80941Smrg dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer; 283b8e80941Smrg layer_count = 284b8e80941Smrg anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); 285b8e80941Smrg } 286b8e80941Smrg 287b8e80941Smrg const uint32_t src_level = pRegions[r].srcSubresource.mipLevel; 288b8e80941Smrg unsigned src_base_layer; 289b8e80941Smrg if (src_image->type == VK_IMAGE_TYPE_3D) { 290b8e80941Smrg src_base_layer = pRegions[r].srcOffset.z; 291b8e80941Smrg } else { 292b8e80941Smrg src_base_layer = pRegions[r].srcSubresource.baseArrayLayer; 293b8e80941Smrg assert(layer_count == 294b8e80941Smrg anv_get_layerCount(src_image, &pRegions[r].srcSubresource)); 295b8e80941Smrg } 296b8e80941Smrg 297b8e80941Smrg VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask, 298b8e80941Smrg dst_mask = pRegions[r].dstSubresource.aspectMask; 299b8e80941Smrg 300b8e80941Smrg assert(anv_image_aspects_compatible(src_mask, dst_mask)); 301b8e80941Smrg 302b8e80941Smrg if (util_bitcount(src_mask) > 1) { 303b8e80941Smrg uint32_t aspect_bit; 304b8e80941Smrg anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) { 305b8e80941Smrg struct blorp_surf src_surf, dst_surf; 306b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 307b8e80941Smrg src_image, 1UL << aspect_bit, 308b8e80941Smrg srcImageLayout, ISL_AUX_USAGE_NONE, 309b8e80941Smrg &src_surf); 310b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 311b8e80941Smrg dst_image, 1UL << aspect_bit, 312b8e80941Smrg dstImageLayout, ISL_AUX_USAGE_NONE, 313b8e80941Smrg &dst_surf); 314b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 315b8e80941Smrg 1UL << aspect_bit, 316b8e80941Smrg dst_surf.aux_usage, dst_level, 317b8e80941Smrg dst_base_layer, layer_count); 318b8e80941Smrg 319b8e80941Smrg for (unsigned i = 0; i < layer_count; i++) { 320b8e80941Smrg blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, 321b8e80941Smrg &dst_surf, dst_level, dst_base_layer + i, 322b8e80941Smrg srcOffset.x, srcOffset.y, 323b8e80941Smrg dstOffset.x, dstOffset.y, 324b8e80941Smrg extent.width, extent.height); 325b8e80941Smrg } 326b8e80941Smrg } 327b8e80941Smrg } else { 328b8e80941Smrg struct blorp_surf src_surf, dst_surf; 329b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask, 330b8e80941Smrg srcImageLayout, ISL_AUX_USAGE_NONE, 331b8e80941Smrg &src_surf); 332b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask, 333b8e80941Smrg dstImageLayout, ISL_AUX_USAGE_NONE, 334b8e80941Smrg &dst_surf); 335b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask, 336b8e80941Smrg dst_surf.aux_usage, dst_level, 337b8e80941Smrg dst_base_layer, layer_count); 338b8e80941Smrg 339b8e80941Smrg for (unsigned i = 0; i < layer_count; i++) { 340b8e80941Smrg blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, 341b8e80941Smrg &dst_surf, dst_level, dst_base_layer + i, 342b8e80941Smrg srcOffset.x, srcOffset.y, 343b8e80941Smrg dstOffset.x, dstOffset.y, 344b8e80941Smrg extent.width, extent.height); 345b8e80941Smrg } 346b8e80941Smrg } 347b8e80941Smrg } 348b8e80941Smrg 349b8e80941Smrg blorp_batch_finish(&batch); 350b8e80941Smrg} 351b8e80941Smrg 352b8e80941Smrgstatic void 353b8e80941Smrgcopy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, 354b8e80941Smrg struct anv_buffer *anv_buffer, 355b8e80941Smrg struct anv_image *anv_image, 356b8e80941Smrg VkImageLayout image_layout, 357b8e80941Smrg uint32_t regionCount, 358b8e80941Smrg const VkBufferImageCopy* pRegions, 359b8e80941Smrg bool buffer_to_image) 360b8e80941Smrg{ 361b8e80941Smrg struct blorp_batch batch; 362b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 363b8e80941Smrg 364b8e80941Smrg struct { 365b8e80941Smrg struct blorp_surf surf; 366b8e80941Smrg uint32_t level; 367b8e80941Smrg VkOffset3D offset; 368b8e80941Smrg } image, buffer, *src, *dst; 369b8e80941Smrg 370b8e80941Smrg buffer.level = 0; 371b8e80941Smrg buffer.offset = (VkOffset3D) { 0, 0, 0 }; 372b8e80941Smrg 373b8e80941Smrg if (buffer_to_image) { 374b8e80941Smrg src = &buffer; 375b8e80941Smrg dst = ℑ 376b8e80941Smrg } else { 377b8e80941Smrg src = ℑ 378b8e80941Smrg dst = &buffer; 379b8e80941Smrg } 380b8e80941Smrg 381b8e80941Smrg for (unsigned r = 0; r < regionCount; r++) { 382b8e80941Smrg const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; 383b8e80941Smrg 384b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect, 385b8e80941Smrg image_layout, ISL_AUX_USAGE_NONE, 386b8e80941Smrg &image.surf); 387b8e80941Smrg image.offset = 388b8e80941Smrg anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset); 389b8e80941Smrg image.level = pRegions[r].imageSubresource.mipLevel; 390b8e80941Smrg 391b8e80941Smrg VkExtent3D extent = 392b8e80941Smrg anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent); 393b8e80941Smrg if (anv_image->type != VK_IMAGE_TYPE_3D) { 394b8e80941Smrg image.offset.z = pRegions[r].imageSubresource.baseArrayLayer; 395b8e80941Smrg extent.depth = 396b8e80941Smrg anv_get_layerCount(anv_image, &pRegions[r].imageSubresource); 397b8e80941Smrg } 398b8e80941Smrg 399b8e80941Smrg const enum isl_format buffer_format = 400b8e80941Smrg anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format, 401b8e80941Smrg aspect, VK_IMAGE_TILING_LINEAR); 402b8e80941Smrg 403b8e80941Smrg const VkExtent3D bufferImageExtent = { 404b8e80941Smrg .width = pRegions[r].bufferRowLength ? 405b8e80941Smrg pRegions[r].bufferRowLength : extent.width, 406b8e80941Smrg .height = pRegions[r].bufferImageHeight ? 407b8e80941Smrg pRegions[r].bufferImageHeight : extent.height, 408b8e80941Smrg }; 409b8e80941Smrg 410b8e80941Smrg const struct isl_format_layout *buffer_fmtl = 411b8e80941Smrg isl_format_get_layout(buffer_format); 412b8e80941Smrg 413b8e80941Smrg const uint32_t buffer_row_pitch = 414b8e80941Smrg DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) * 415b8e80941Smrg (buffer_fmtl->bpb / 8); 416b8e80941Smrg 417b8e80941Smrg const uint32_t buffer_layer_stride = 418b8e80941Smrg DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) * 419b8e80941Smrg buffer_row_pitch; 420b8e80941Smrg 421b8e80941Smrg struct isl_surf buffer_isl_surf; 422b8e80941Smrg get_blorp_surf_for_anv_buffer(cmd_buffer->device, 423b8e80941Smrg anv_buffer, pRegions[r].bufferOffset, 424b8e80941Smrg extent.width, extent.height, 425b8e80941Smrg buffer_row_pitch, buffer_format, 426b8e80941Smrg &buffer.surf, &buffer_isl_surf); 427b8e80941Smrg 428b8e80941Smrg if (&image == dst) { 429b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image, 430b8e80941Smrg aspect, dst->surf.aux_usage, 431b8e80941Smrg dst->level, 432b8e80941Smrg dst->offset.z, extent.depth); 433b8e80941Smrg } 434b8e80941Smrg 435b8e80941Smrg for (unsigned z = 0; z < extent.depth; z++) { 436b8e80941Smrg blorp_copy(&batch, &src->surf, src->level, src->offset.z, 437b8e80941Smrg &dst->surf, dst->level, dst->offset.z, 438b8e80941Smrg src->offset.x, src->offset.y, dst->offset.x, dst->offset.y, 439b8e80941Smrg extent.width, extent.height); 440b8e80941Smrg 441b8e80941Smrg image.offset.z++; 442b8e80941Smrg buffer.surf.addr.offset += buffer_layer_stride; 443b8e80941Smrg } 444b8e80941Smrg } 445b8e80941Smrg 446b8e80941Smrg blorp_batch_finish(&batch); 447b8e80941Smrg} 448b8e80941Smrg 449b8e80941Smrgvoid anv_CmdCopyBufferToImage( 450b8e80941Smrg VkCommandBuffer commandBuffer, 451b8e80941Smrg VkBuffer srcBuffer, 452b8e80941Smrg VkImage dstImage, 453b8e80941Smrg VkImageLayout dstImageLayout, 454b8e80941Smrg uint32_t regionCount, 455b8e80941Smrg const VkBufferImageCopy* pRegions) 456b8e80941Smrg{ 457b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 458b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); 459b8e80941Smrg ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 460b8e80941Smrg 461b8e80941Smrg copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, dstImageLayout, 462b8e80941Smrg regionCount, pRegions, true); 463b8e80941Smrg} 464b8e80941Smrg 465b8e80941Smrgvoid anv_CmdCopyImageToBuffer( 466b8e80941Smrg VkCommandBuffer commandBuffer, 467b8e80941Smrg VkImage srcImage, 468b8e80941Smrg VkImageLayout srcImageLayout, 469b8e80941Smrg VkBuffer dstBuffer, 470b8e80941Smrg uint32_t regionCount, 471b8e80941Smrg const VkBufferImageCopy* pRegions) 472b8e80941Smrg{ 473b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 474b8e80941Smrg ANV_FROM_HANDLE(anv_image, src_image, srcImage); 475b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 476b8e80941Smrg 477b8e80941Smrg copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout, 478b8e80941Smrg regionCount, pRegions, false); 479b8e80941Smrg 480b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 481b8e80941Smrg} 482b8e80941Smrg 483b8e80941Smrgstatic bool 484b8e80941Smrgflip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1) 485b8e80941Smrg{ 486b8e80941Smrg bool flip = false; 487b8e80941Smrg if (*src0 > *src1) { 488b8e80941Smrg unsigned tmp = *src0; 489b8e80941Smrg *src0 = *src1; 490b8e80941Smrg *src1 = tmp; 491b8e80941Smrg flip = !flip; 492b8e80941Smrg } 493b8e80941Smrg 494b8e80941Smrg if (*dst0 > *dst1) { 495b8e80941Smrg unsigned tmp = *dst0; 496b8e80941Smrg *dst0 = *dst1; 497b8e80941Smrg *dst1 = tmp; 498b8e80941Smrg flip = !flip; 499b8e80941Smrg } 500b8e80941Smrg 501b8e80941Smrg return flip; 502b8e80941Smrg} 503b8e80941Smrg 504b8e80941Smrgvoid anv_CmdBlitImage( 505b8e80941Smrg VkCommandBuffer commandBuffer, 506b8e80941Smrg VkImage srcImage, 507b8e80941Smrg VkImageLayout srcImageLayout, 508b8e80941Smrg VkImage dstImage, 509b8e80941Smrg VkImageLayout dstImageLayout, 510b8e80941Smrg uint32_t regionCount, 511b8e80941Smrg const VkImageBlit* pRegions, 512b8e80941Smrg VkFilter filter) 513b8e80941Smrg 514b8e80941Smrg{ 515b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 516b8e80941Smrg ANV_FROM_HANDLE(anv_image, src_image, srcImage); 517b8e80941Smrg ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 518b8e80941Smrg 519b8e80941Smrg struct blorp_surf src, dst; 520b8e80941Smrg 521b8e80941Smrg enum blorp_filter blorp_filter; 522b8e80941Smrg switch (filter) { 523b8e80941Smrg case VK_FILTER_NEAREST: 524b8e80941Smrg blorp_filter = BLORP_FILTER_NEAREST; 525b8e80941Smrg break; 526b8e80941Smrg case VK_FILTER_LINEAR: 527b8e80941Smrg blorp_filter = BLORP_FILTER_BILINEAR; 528b8e80941Smrg break; 529b8e80941Smrg default: 530b8e80941Smrg unreachable("Invalid filter"); 531b8e80941Smrg } 532b8e80941Smrg 533b8e80941Smrg struct blorp_batch batch; 534b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 535b8e80941Smrg 536b8e80941Smrg for (unsigned r = 0; r < regionCount; r++) { 537b8e80941Smrg const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; 538b8e80941Smrg const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; 539b8e80941Smrg 540b8e80941Smrg assert(anv_image_aspects_compatible(src_res->aspectMask, 541b8e80941Smrg dst_res->aspectMask)); 542b8e80941Smrg 543b8e80941Smrg uint32_t aspect_bit; 544b8e80941Smrg anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) { 545b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 546b8e80941Smrg src_image, 1U << aspect_bit, 547b8e80941Smrg srcImageLayout, ISL_AUX_USAGE_NONE, &src); 548b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 549b8e80941Smrg dst_image, 1U << aspect_bit, 550b8e80941Smrg dstImageLayout, ISL_AUX_USAGE_NONE, &dst); 551b8e80941Smrg 552b8e80941Smrg struct anv_format_plane src_format = 553b8e80941Smrg anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format, 554b8e80941Smrg 1U << aspect_bit, src_image->tiling); 555b8e80941Smrg struct anv_format_plane dst_format = 556b8e80941Smrg anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format, 557b8e80941Smrg 1U << aspect_bit, dst_image->tiling); 558b8e80941Smrg 559b8e80941Smrg unsigned dst_start, dst_end; 560b8e80941Smrg if (dst_image->type == VK_IMAGE_TYPE_3D) { 561b8e80941Smrg assert(dst_res->baseArrayLayer == 0); 562b8e80941Smrg dst_start = pRegions[r].dstOffsets[0].z; 563b8e80941Smrg dst_end = pRegions[r].dstOffsets[1].z; 564b8e80941Smrg } else { 565b8e80941Smrg dst_start = dst_res->baseArrayLayer; 566b8e80941Smrg dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); 567b8e80941Smrg } 568b8e80941Smrg 569b8e80941Smrg unsigned src_start, src_end; 570b8e80941Smrg if (src_image->type == VK_IMAGE_TYPE_3D) { 571b8e80941Smrg assert(src_res->baseArrayLayer == 0); 572b8e80941Smrg src_start = pRegions[r].srcOffsets[0].z; 573b8e80941Smrg src_end = pRegions[r].srcOffsets[1].z; 574b8e80941Smrg } else { 575b8e80941Smrg src_start = src_res->baseArrayLayer; 576b8e80941Smrg src_end = src_start + anv_get_layerCount(src_image, src_res); 577b8e80941Smrg } 578b8e80941Smrg 579b8e80941Smrg bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); 580b8e80941Smrg float src_z_step = (float)(src_end + 1 - src_start) / 581b8e80941Smrg (float)(dst_end + 1 - dst_start); 582b8e80941Smrg 583b8e80941Smrg if (flip_z) { 584b8e80941Smrg src_start = src_end; 585b8e80941Smrg src_z_step *= -1; 586b8e80941Smrg } 587b8e80941Smrg 588b8e80941Smrg unsigned src_x0 = pRegions[r].srcOffsets[0].x; 589b8e80941Smrg unsigned src_x1 = pRegions[r].srcOffsets[1].x; 590b8e80941Smrg unsigned dst_x0 = pRegions[r].dstOffsets[0].x; 591b8e80941Smrg unsigned dst_x1 = pRegions[r].dstOffsets[1].x; 592b8e80941Smrg bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); 593b8e80941Smrg 594b8e80941Smrg unsigned src_y0 = pRegions[r].srcOffsets[0].y; 595b8e80941Smrg unsigned src_y1 = pRegions[r].srcOffsets[1].y; 596b8e80941Smrg unsigned dst_y0 = pRegions[r].dstOffsets[0].y; 597b8e80941Smrg unsigned dst_y1 = pRegions[r].dstOffsets[1].y; 598b8e80941Smrg bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); 599b8e80941Smrg 600b8e80941Smrg const unsigned num_layers = dst_end - dst_start; 601b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 602b8e80941Smrg 1U << aspect_bit, 603b8e80941Smrg dst.aux_usage, 604b8e80941Smrg dst_res->mipLevel, 605b8e80941Smrg dst_start, num_layers); 606b8e80941Smrg 607b8e80941Smrg for (unsigned i = 0; i < num_layers; i++) { 608b8e80941Smrg unsigned dst_z = dst_start + i; 609b8e80941Smrg unsigned src_z = src_start + i * src_z_step; 610b8e80941Smrg 611b8e80941Smrg blorp_blit(&batch, &src, src_res->mipLevel, src_z, 612b8e80941Smrg src_format.isl_format, src_format.swizzle, 613b8e80941Smrg &dst, dst_res->mipLevel, dst_z, 614b8e80941Smrg dst_format.isl_format, dst_format.swizzle, 615b8e80941Smrg src_x0, src_y0, src_x1, src_y1, 616b8e80941Smrg dst_x0, dst_y0, dst_x1, dst_y1, 617b8e80941Smrg blorp_filter, flip_x, flip_y); 618b8e80941Smrg } 619b8e80941Smrg } 620b8e80941Smrg } 621b8e80941Smrg 622b8e80941Smrg blorp_batch_finish(&batch); 623b8e80941Smrg} 624b8e80941Smrg 625b8e80941Smrgstatic enum isl_format 626b8e80941Smrgisl_format_for_size(unsigned size_B) 627b8e80941Smrg{ 628b8e80941Smrg switch (size_B) { 629b8e80941Smrg case 4: return ISL_FORMAT_R32_UINT; 630b8e80941Smrg case 8: return ISL_FORMAT_R32G32_UINT; 631b8e80941Smrg case 16: return ISL_FORMAT_R32G32B32A32_UINT; 632b8e80941Smrg default: 633b8e80941Smrg unreachable("Not a power-of-two format size"); 634b8e80941Smrg } 635b8e80941Smrg} 636b8e80941Smrg 637b8e80941Smrg/** 638b8e80941Smrg * Returns the greatest common divisor of a and b that is a power of two. 639b8e80941Smrg */ 640b8e80941Smrgstatic uint64_t 641b8e80941Smrggcd_pow2_u64(uint64_t a, uint64_t b) 642b8e80941Smrg{ 643b8e80941Smrg assert(a > 0 || b > 0); 644b8e80941Smrg 645b8e80941Smrg unsigned a_log2 = ffsll(a) - 1; 646b8e80941Smrg unsigned b_log2 = ffsll(b) - 1; 647b8e80941Smrg 648b8e80941Smrg /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which 649b8e80941Smrg * case, the MIN2() will take the other one. If both are 0 then we will 650b8e80941Smrg * hit the assert above. 651b8e80941Smrg */ 652b8e80941Smrg return 1 << MIN2(a_log2, b_log2); 653b8e80941Smrg} 654b8e80941Smrg 655b8e80941Smrg/* This is maximum possible width/height our HW can handle */ 656b8e80941Smrg#define MAX_SURFACE_DIM (1ull << 14) 657b8e80941Smrg 658b8e80941Smrgvoid anv_CmdCopyBuffer( 659b8e80941Smrg VkCommandBuffer commandBuffer, 660b8e80941Smrg VkBuffer srcBuffer, 661b8e80941Smrg VkBuffer dstBuffer, 662b8e80941Smrg uint32_t regionCount, 663b8e80941Smrg const VkBufferCopy* pRegions) 664b8e80941Smrg{ 665b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 666b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); 667b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 668b8e80941Smrg 669b8e80941Smrg struct blorp_batch batch; 670b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 671b8e80941Smrg 672b8e80941Smrg for (unsigned r = 0; r < regionCount; r++) { 673b8e80941Smrg struct blorp_address src = { 674b8e80941Smrg .buffer = src_buffer->address.bo, 675b8e80941Smrg .offset = src_buffer->address.offset + pRegions[r].srcOffset, 676b8e80941Smrg .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo), 677b8e80941Smrg }; 678b8e80941Smrg struct blorp_address dst = { 679b8e80941Smrg .buffer = dst_buffer->address.bo, 680b8e80941Smrg .offset = dst_buffer->address.offset + pRegions[r].dstOffset, 681b8e80941Smrg .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), 682b8e80941Smrg }; 683b8e80941Smrg 684b8e80941Smrg blorp_buffer_copy(&batch, src, dst, pRegions[r].size); 685b8e80941Smrg } 686b8e80941Smrg 687b8e80941Smrg blorp_batch_finish(&batch); 688b8e80941Smrg 689b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 690b8e80941Smrg} 691b8e80941Smrg 692b8e80941Smrgvoid anv_CmdUpdateBuffer( 693b8e80941Smrg VkCommandBuffer commandBuffer, 694b8e80941Smrg VkBuffer dstBuffer, 695b8e80941Smrg VkDeviceSize dstOffset, 696b8e80941Smrg VkDeviceSize dataSize, 697b8e80941Smrg const void* pData) 698b8e80941Smrg{ 699b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 700b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 701b8e80941Smrg 702b8e80941Smrg struct blorp_batch batch; 703b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 704b8e80941Smrg 705b8e80941Smrg /* We can't quite grab a full block because the state stream needs a 706b8e80941Smrg * little data at the top to build its linked list. 707b8e80941Smrg */ 708b8e80941Smrg const uint32_t max_update_size = 709b8e80941Smrg cmd_buffer->device->dynamic_state_pool.block_size - 64; 710b8e80941Smrg 711b8e80941Smrg assert(max_update_size < MAX_SURFACE_DIM * 4); 712b8e80941Smrg 713b8e80941Smrg /* We're about to read data that was written from the CPU. Flush the 714b8e80941Smrg * texture cache so we don't get anything stale. 715b8e80941Smrg */ 716b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; 717b8e80941Smrg 718b8e80941Smrg while (dataSize) { 719b8e80941Smrg const uint32_t copy_size = MIN2(dataSize, max_update_size); 720b8e80941Smrg 721b8e80941Smrg struct anv_state tmp_data = 722b8e80941Smrg anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); 723b8e80941Smrg 724b8e80941Smrg memcpy(tmp_data.map, pData, copy_size); 725b8e80941Smrg 726b8e80941Smrg struct blorp_address src = { 727b8e80941Smrg .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, 728b8e80941Smrg .offset = tmp_data.offset, 729b8e80941Smrg .mocs = cmd_buffer->device->default_mocs, 730b8e80941Smrg }; 731b8e80941Smrg struct blorp_address dst = { 732b8e80941Smrg .buffer = dst_buffer->address.bo, 733b8e80941Smrg .offset = dst_buffer->address.offset + dstOffset, 734b8e80941Smrg .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), 735b8e80941Smrg }; 736b8e80941Smrg 737b8e80941Smrg blorp_buffer_copy(&batch, src, dst, copy_size); 738b8e80941Smrg 739b8e80941Smrg dataSize -= copy_size; 740b8e80941Smrg dstOffset += copy_size; 741b8e80941Smrg pData = (void *)pData + copy_size; 742b8e80941Smrg } 743b8e80941Smrg 744b8e80941Smrg blorp_batch_finish(&batch); 745b8e80941Smrg 746b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 747b8e80941Smrg} 748b8e80941Smrg 749b8e80941Smrgvoid anv_CmdFillBuffer( 750b8e80941Smrg VkCommandBuffer commandBuffer, 751b8e80941Smrg VkBuffer dstBuffer, 752b8e80941Smrg VkDeviceSize dstOffset, 753b8e80941Smrg VkDeviceSize fillSize, 754b8e80941Smrg uint32_t data) 755b8e80941Smrg{ 756b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 757b8e80941Smrg ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); 758b8e80941Smrg struct blorp_surf surf; 759b8e80941Smrg struct isl_surf isl_surf; 760b8e80941Smrg 761b8e80941Smrg struct blorp_batch batch; 762b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 763b8e80941Smrg 764b8e80941Smrg fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize); 765b8e80941Smrg 766b8e80941Smrg /* From the Vulkan spec: 767b8e80941Smrg * 768b8e80941Smrg * "size is the number of bytes to fill, and must be either a multiple 769b8e80941Smrg * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of 770b8e80941Smrg * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the 771b8e80941Smrg * buffer is not a multiple of 4, then the nearest smaller multiple is 772b8e80941Smrg * used." 773b8e80941Smrg */ 774b8e80941Smrg fillSize &= ~3ull; 775b8e80941Smrg 776b8e80941Smrg /* First, we compute the biggest format that can be used with the 777b8e80941Smrg * given offsets and size. 778b8e80941Smrg */ 779b8e80941Smrg int bs = 16; 780b8e80941Smrg bs = gcd_pow2_u64(bs, dstOffset); 781b8e80941Smrg bs = gcd_pow2_u64(bs, fillSize); 782b8e80941Smrg enum isl_format isl_format = isl_format_for_size(bs); 783b8e80941Smrg 784b8e80941Smrg union isl_color_value color = { 785b8e80941Smrg .u32 = { data, data, data, data }, 786b8e80941Smrg }; 787b8e80941Smrg 788b8e80941Smrg const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; 789b8e80941Smrg while (fillSize >= max_fill_size) { 790b8e80941Smrg get_blorp_surf_for_anv_buffer(cmd_buffer->device, 791b8e80941Smrg dst_buffer, dstOffset, 792b8e80941Smrg MAX_SURFACE_DIM, MAX_SURFACE_DIM, 793b8e80941Smrg MAX_SURFACE_DIM * bs, isl_format, 794b8e80941Smrg &surf, &isl_surf); 795b8e80941Smrg 796b8e80941Smrg blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 797b8e80941Smrg 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM, 798b8e80941Smrg color, NULL); 799b8e80941Smrg fillSize -= max_fill_size; 800b8e80941Smrg dstOffset += max_fill_size; 801b8e80941Smrg } 802b8e80941Smrg 803b8e80941Smrg uint64_t height = fillSize / (MAX_SURFACE_DIM * bs); 804b8e80941Smrg assert(height < MAX_SURFACE_DIM); 805b8e80941Smrg if (height != 0) { 806b8e80941Smrg const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs; 807b8e80941Smrg get_blorp_surf_for_anv_buffer(cmd_buffer->device, 808b8e80941Smrg dst_buffer, dstOffset, 809b8e80941Smrg MAX_SURFACE_DIM, height, 810b8e80941Smrg MAX_SURFACE_DIM * bs, isl_format, 811b8e80941Smrg &surf, &isl_surf); 812b8e80941Smrg 813b8e80941Smrg blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 814b8e80941Smrg 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height, 815b8e80941Smrg color, NULL); 816b8e80941Smrg fillSize -= rect_fill_size; 817b8e80941Smrg dstOffset += rect_fill_size; 818b8e80941Smrg } 819b8e80941Smrg 820b8e80941Smrg if (fillSize != 0) { 821b8e80941Smrg const uint32_t width = fillSize / bs; 822b8e80941Smrg get_blorp_surf_for_anv_buffer(cmd_buffer->device, 823b8e80941Smrg dst_buffer, dstOffset, 824b8e80941Smrg width, 1, 825b8e80941Smrg width * bs, isl_format, 826b8e80941Smrg &surf, &isl_surf); 827b8e80941Smrg 828b8e80941Smrg blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, 829b8e80941Smrg 0, 0, 1, 0, 0, width, 1, 830b8e80941Smrg color, NULL); 831b8e80941Smrg } 832b8e80941Smrg 833b8e80941Smrg blorp_batch_finish(&batch); 834b8e80941Smrg 835b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; 836b8e80941Smrg} 837b8e80941Smrg 838b8e80941Smrgvoid anv_CmdClearColorImage( 839b8e80941Smrg VkCommandBuffer commandBuffer, 840b8e80941Smrg VkImage _image, 841b8e80941Smrg VkImageLayout imageLayout, 842b8e80941Smrg const VkClearColorValue* pColor, 843b8e80941Smrg uint32_t rangeCount, 844b8e80941Smrg const VkImageSubresourceRange* pRanges) 845b8e80941Smrg{ 846b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 847b8e80941Smrg ANV_FROM_HANDLE(anv_image, image, _image); 848b8e80941Smrg 849b8e80941Smrg static const bool color_write_disable[4] = { false, false, false, false }; 850b8e80941Smrg 851b8e80941Smrg struct blorp_batch batch; 852b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 853b8e80941Smrg 854b8e80941Smrg 855b8e80941Smrg for (unsigned r = 0; r < rangeCount; r++) { 856b8e80941Smrg if (pRanges[r].aspectMask == 0) 857b8e80941Smrg continue; 858b8e80941Smrg 859b8e80941Smrg assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); 860b8e80941Smrg 861b8e80941Smrg struct blorp_surf surf; 862b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 863b8e80941Smrg image, pRanges[r].aspectMask, 864b8e80941Smrg imageLayout, ISL_AUX_USAGE_NONE, &surf); 865b8e80941Smrg 866b8e80941Smrg struct anv_format_plane src_format = 867b8e80941Smrg anv_get_format_plane(&cmd_buffer->device->info, image->vk_format, 868b8e80941Smrg VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); 869b8e80941Smrg 870b8e80941Smrg unsigned base_layer = pRanges[r].baseArrayLayer; 871b8e80941Smrg unsigned layer_count = anv_get_layerCount(image, &pRanges[r]); 872b8e80941Smrg 873b8e80941Smrg for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { 874b8e80941Smrg const unsigned level = pRanges[r].baseMipLevel + i; 875b8e80941Smrg const unsigned level_width = anv_minify(image->extent.width, level); 876b8e80941Smrg const unsigned level_height = anv_minify(image->extent.height, level); 877b8e80941Smrg 878b8e80941Smrg if (image->type == VK_IMAGE_TYPE_3D) { 879b8e80941Smrg base_layer = 0; 880b8e80941Smrg layer_count = anv_minify(image->extent.depth, level); 881b8e80941Smrg } 882b8e80941Smrg 883b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, image, 884b8e80941Smrg pRanges[r].aspectMask, 885b8e80941Smrg surf.aux_usage, level, 886b8e80941Smrg base_layer, layer_count); 887b8e80941Smrg 888b8e80941Smrg blorp_clear(&batch, &surf, 889b8e80941Smrg src_format.isl_format, src_format.swizzle, 890b8e80941Smrg level, base_layer, layer_count, 891b8e80941Smrg 0, 0, level_width, level_height, 892b8e80941Smrg vk_to_isl_color(*pColor), color_write_disable); 893b8e80941Smrg } 894b8e80941Smrg } 895b8e80941Smrg 896b8e80941Smrg blorp_batch_finish(&batch); 897b8e80941Smrg} 898b8e80941Smrg 899b8e80941Smrgvoid anv_CmdClearDepthStencilImage( 900b8e80941Smrg VkCommandBuffer commandBuffer, 901b8e80941Smrg VkImage image_h, 902b8e80941Smrg VkImageLayout imageLayout, 903b8e80941Smrg const VkClearDepthStencilValue* pDepthStencil, 904b8e80941Smrg uint32_t rangeCount, 905b8e80941Smrg const VkImageSubresourceRange* pRanges) 906b8e80941Smrg{ 907b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 908b8e80941Smrg ANV_FROM_HANDLE(anv_image, image, image_h); 909b8e80941Smrg 910b8e80941Smrg struct blorp_batch batch; 911b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 912b8e80941Smrg 913b8e80941Smrg struct blorp_surf depth, stencil; 914b8e80941Smrg if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 915b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 916b8e80941Smrg image, VK_IMAGE_ASPECT_DEPTH_BIT, 917b8e80941Smrg imageLayout, ISL_AUX_USAGE_NONE, &depth); 918b8e80941Smrg } else { 919b8e80941Smrg memset(&depth, 0, sizeof(depth)); 920b8e80941Smrg } 921b8e80941Smrg 922b8e80941Smrg if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 923b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 924b8e80941Smrg image, VK_IMAGE_ASPECT_STENCIL_BIT, 925b8e80941Smrg imageLayout, ISL_AUX_USAGE_NONE, &stencil); 926b8e80941Smrg } else { 927b8e80941Smrg memset(&stencil, 0, sizeof(stencil)); 928b8e80941Smrg } 929b8e80941Smrg 930b8e80941Smrg for (unsigned r = 0; r < rangeCount; r++) { 931b8e80941Smrg if (pRanges[r].aspectMask == 0) 932b8e80941Smrg continue; 933b8e80941Smrg 934b8e80941Smrg bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 935b8e80941Smrg bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 936b8e80941Smrg 937b8e80941Smrg unsigned base_layer = pRanges[r].baseArrayLayer; 938b8e80941Smrg unsigned layer_count = anv_get_layerCount(image, &pRanges[r]); 939b8e80941Smrg 940b8e80941Smrg for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { 941b8e80941Smrg const unsigned level = pRanges[r].baseMipLevel + i; 942b8e80941Smrg const unsigned level_width = anv_minify(image->extent.width, level); 943b8e80941Smrg const unsigned level_height = anv_minify(image->extent.height, level); 944b8e80941Smrg 945b8e80941Smrg if (image->type == VK_IMAGE_TYPE_3D) 946b8e80941Smrg layer_count = anv_minify(image->extent.depth, level); 947b8e80941Smrg 948b8e80941Smrg blorp_clear_depth_stencil(&batch, &depth, &stencil, 949b8e80941Smrg level, base_layer, layer_count, 950b8e80941Smrg 0, 0, level_width, level_height, 951b8e80941Smrg clear_depth, pDepthStencil->depth, 952b8e80941Smrg clear_stencil ? 0xff : 0, 953b8e80941Smrg pDepthStencil->stencil); 954b8e80941Smrg } 955b8e80941Smrg } 956b8e80941Smrg 957b8e80941Smrg blorp_batch_finish(&batch); 958b8e80941Smrg} 959b8e80941Smrg 960b8e80941SmrgVkResult 961b8e80941Smrganv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer, 962b8e80941Smrg uint32_t num_entries, 963b8e80941Smrg uint32_t *state_offset, 964b8e80941Smrg struct anv_state *bt_state) 965b8e80941Smrg{ 966b8e80941Smrg *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 967b8e80941Smrg state_offset); 968b8e80941Smrg if (bt_state->map == NULL) { 969b8e80941Smrg /* We ran out of space. Grab a new binding table block. */ 970b8e80941Smrg VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); 971b8e80941Smrg if (result != VK_SUCCESS) 972b8e80941Smrg return result; 973b8e80941Smrg 974b8e80941Smrg /* Re-emit state base addresses so we get the new surface state base 975b8e80941Smrg * address before we start emitting binding tables etc. 976b8e80941Smrg */ 977b8e80941Smrg anv_cmd_buffer_emit_state_base_address(cmd_buffer); 978b8e80941Smrg 979b8e80941Smrg *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries, 980b8e80941Smrg state_offset); 981b8e80941Smrg assert(bt_state->map != NULL); 982b8e80941Smrg } 983b8e80941Smrg 984b8e80941Smrg return VK_SUCCESS; 985b8e80941Smrg} 986b8e80941Smrg 987b8e80941Smrgstatic VkResult 988b8e80941Smrgbinding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer, 989b8e80941Smrg struct anv_state surface_state, 990b8e80941Smrg uint32_t *bt_offset) 991b8e80941Smrg{ 992b8e80941Smrg uint32_t state_offset; 993b8e80941Smrg struct anv_state bt_state; 994b8e80941Smrg 995b8e80941Smrg VkResult result = 996b8e80941Smrg anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset, 997b8e80941Smrg &bt_state); 998b8e80941Smrg if (result != VK_SUCCESS) 999b8e80941Smrg return result; 1000b8e80941Smrg 1001b8e80941Smrg uint32_t *bt_map = bt_state.map; 1002b8e80941Smrg bt_map[0] = surface_state.offset + state_offset; 1003b8e80941Smrg 1004b8e80941Smrg *bt_offset = bt_state.offset; 1005b8e80941Smrg return VK_SUCCESS; 1006b8e80941Smrg} 1007b8e80941Smrg 1008b8e80941Smrgstatic void 1009b8e80941Smrgclear_color_attachment(struct anv_cmd_buffer *cmd_buffer, 1010b8e80941Smrg struct blorp_batch *batch, 1011b8e80941Smrg const VkClearAttachment *attachment, 1012b8e80941Smrg uint32_t rectCount, const VkClearRect *pRects) 1013b8e80941Smrg{ 1014b8e80941Smrg const struct anv_subpass *subpass = cmd_buffer->state.subpass; 1015b8e80941Smrg const uint32_t color_att = attachment->colorAttachment; 1016b8e80941Smrg assert(color_att < subpass->color_count); 1017b8e80941Smrg const uint32_t att_idx = subpass->color_attachments[color_att].attachment; 1018b8e80941Smrg 1019b8e80941Smrg if (att_idx == VK_ATTACHMENT_UNUSED) 1020b8e80941Smrg return; 1021b8e80941Smrg 1022b8e80941Smrg struct anv_render_pass_attachment *pass_att = 1023b8e80941Smrg &cmd_buffer->state.pass->attachments[att_idx]; 1024b8e80941Smrg struct anv_attachment_state *att_state = 1025b8e80941Smrg &cmd_buffer->state.attachments[att_idx]; 1026b8e80941Smrg 1027b8e80941Smrg uint32_t binding_table; 1028b8e80941Smrg VkResult result = 1029b8e80941Smrg binding_table_for_surface_state(cmd_buffer, att_state->color.state, 1030b8e80941Smrg &binding_table); 1031b8e80941Smrg if (result != VK_SUCCESS) 1032b8e80941Smrg return; 1033b8e80941Smrg 1034b8e80941Smrg union isl_color_value clear_color = 1035b8e80941Smrg vk_to_isl_color(attachment->clearValue.color); 1036b8e80941Smrg 1037b8e80941Smrg /* If multiview is enabled we ignore baseArrayLayer and layerCount */ 1038b8e80941Smrg if (subpass->view_mask) { 1039b8e80941Smrg uint32_t view_idx; 1040b8e80941Smrg for_each_bit(view_idx, subpass->view_mask) { 1041b8e80941Smrg for (uint32_t r = 0; r < rectCount; ++r) { 1042b8e80941Smrg const VkOffset2D offset = pRects[r].rect.offset; 1043b8e80941Smrg const VkExtent2D extent = pRects[r].rect.extent; 1044b8e80941Smrg blorp_clear_attachments(batch, binding_table, 1045b8e80941Smrg ISL_FORMAT_UNSUPPORTED, pass_att->samples, 1046b8e80941Smrg view_idx, 1, 1047b8e80941Smrg offset.x, offset.y, 1048b8e80941Smrg offset.x + extent.width, 1049b8e80941Smrg offset.y + extent.height, 1050b8e80941Smrg true, clear_color, false, 0.0f, 0, 0); 1051b8e80941Smrg } 1052b8e80941Smrg } 1053b8e80941Smrg return; 1054b8e80941Smrg } 1055b8e80941Smrg 1056b8e80941Smrg for (uint32_t r = 0; r < rectCount; ++r) { 1057b8e80941Smrg const VkOffset2D offset = pRects[r].rect.offset; 1058b8e80941Smrg const VkExtent2D extent = pRects[r].rect.extent; 1059b8e80941Smrg assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); 1060b8e80941Smrg blorp_clear_attachments(batch, binding_table, 1061b8e80941Smrg ISL_FORMAT_UNSUPPORTED, pass_att->samples, 1062b8e80941Smrg pRects[r].baseArrayLayer, 1063b8e80941Smrg pRects[r].layerCount, 1064b8e80941Smrg offset.x, offset.y, 1065b8e80941Smrg offset.x + extent.width, offset.y + extent.height, 1066b8e80941Smrg true, clear_color, false, 0.0f, 0, 0); 1067b8e80941Smrg } 1068b8e80941Smrg} 1069b8e80941Smrg 1070b8e80941Smrgstatic void 1071b8e80941Smrgclear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, 1072b8e80941Smrg struct blorp_batch *batch, 1073b8e80941Smrg const VkClearAttachment *attachment, 1074b8e80941Smrg uint32_t rectCount, const VkClearRect *pRects) 1075b8e80941Smrg{ 1076b8e80941Smrg static const union isl_color_value color_value = { .u32 = { 0, } }; 1077b8e80941Smrg const struct anv_subpass *subpass = cmd_buffer->state.subpass; 1078b8e80941Smrg if (!subpass->depth_stencil_attachment) 1079b8e80941Smrg return; 1080b8e80941Smrg 1081b8e80941Smrg const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; 1082b8e80941Smrg assert(att_idx != VK_ATTACHMENT_UNUSED); 1083b8e80941Smrg struct anv_render_pass_attachment *pass_att = 1084b8e80941Smrg &cmd_buffer->state.pass->attachments[att_idx]; 1085b8e80941Smrg 1086b8e80941Smrg bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 1087b8e80941Smrg bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 1088b8e80941Smrg 1089b8e80941Smrg enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED; 1090b8e80941Smrg if (clear_depth) { 1091b8e80941Smrg depth_format = anv_get_isl_format(&cmd_buffer->device->info, 1092b8e80941Smrg pass_att->format, 1093b8e80941Smrg VK_IMAGE_ASPECT_DEPTH_BIT, 1094b8e80941Smrg VK_IMAGE_TILING_OPTIMAL); 1095b8e80941Smrg } 1096b8e80941Smrg 1097b8e80941Smrg uint32_t binding_table; 1098b8e80941Smrg VkResult result = 1099b8e80941Smrg binding_table_for_surface_state(cmd_buffer, 1100b8e80941Smrg cmd_buffer->state.null_surface_state, 1101b8e80941Smrg &binding_table); 1102b8e80941Smrg if (result != VK_SUCCESS) 1103b8e80941Smrg return; 1104b8e80941Smrg 1105b8e80941Smrg /* If multiview is enabled we ignore baseArrayLayer and layerCount */ 1106b8e80941Smrg if (subpass->view_mask) { 1107b8e80941Smrg uint32_t view_idx; 1108b8e80941Smrg for_each_bit(view_idx, subpass->view_mask) { 1109b8e80941Smrg for (uint32_t r = 0; r < rectCount; ++r) { 1110b8e80941Smrg const VkOffset2D offset = pRects[r].rect.offset; 1111b8e80941Smrg const VkExtent2D extent = pRects[r].rect.extent; 1112b8e80941Smrg VkClearDepthStencilValue value = attachment->clearValue.depthStencil; 1113b8e80941Smrg blorp_clear_attachments(batch, binding_table, 1114b8e80941Smrg depth_format, pass_att->samples, 1115b8e80941Smrg view_idx, 1, 1116b8e80941Smrg offset.x, offset.y, 1117b8e80941Smrg offset.x + extent.width, 1118b8e80941Smrg offset.y + extent.height, 1119b8e80941Smrg false, color_value, 1120b8e80941Smrg clear_depth, value.depth, 1121b8e80941Smrg clear_stencil ? 0xff : 0, value.stencil); 1122b8e80941Smrg } 1123b8e80941Smrg } 1124b8e80941Smrg return; 1125b8e80941Smrg } 1126b8e80941Smrg 1127b8e80941Smrg for (uint32_t r = 0; r < rectCount; ++r) { 1128b8e80941Smrg const VkOffset2D offset = pRects[r].rect.offset; 1129b8e80941Smrg const VkExtent2D extent = pRects[r].rect.extent; 1130b8e80941Smrg VkClearDepthStencilValue value = attachment->clearValue.depthStencil; 1131b8e80941Smrg assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); 1132b8e80941Smrg blorp_clear_attachments(batch, binding_table, 1133b8e80941Smrg depth_format, pass_att->samples, 1134b8e80941Smrg pRects[r].baseArrayLayer, 1135b8e80941Smrg pRects[r].layerCount, 1136b8e80941Smrg offset.x, offset.y, 1137b8e80941Smrg offset.x + extent.width, offset.y + extent.height, 1138b8e80941Smrg false, color_value, 1139b8e80941Smrg clear_depth, value.depth, 1140b8e80941Smrg clear_stencil ? 0xff : 0, value.stencil); 1141b8e80941Smrg } 1142b8e80941Smrg} 1143b8e80941Smrg 1144b8e80941Smrgvoid anv_CmdClearAttachments( 1145b8e80941Smrg VkCommandBuffer commandBuffer, 1146b8e80941Smrg uint32_t attachmentCount, 1147b8e80941Smrg const VkClearAttachment* pAttachments, 1148b8e80941Smrg uint32_t rectCount, 1149b8e80941Smrg const VkClearRect* pRects) 1150b8e80941Smrg{ 1151b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1152b8e80941Smrg 1153b8e80941Smrg /* Because this gets called within a render pass, we tell blorp not to 1154b8e80941Smrg * trash our depth and stencil buffers. 1155b8e80941Smrg */ 1156b8e80941Smrg struct blorp_batch batch; 1157b8e80941Smrg enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; 1158b8e80941Smrg if (cmd_buffer->state.conditional_render_enabled) { 1159b8e80941Smrg anv_cmd_emit_conditional_render_predicate(cmd_buffer); 1160b8e80941Smrg flags |= BLORP_BATCH_PREDICATE_ENABLE; 1161b8e80941Smrg } 1162b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags); 1163b8e80941Smrg 1164b8e80941Smrg for (uint32_t a = 0; a < attachmentCount; ++a) { 1165b8e80941Smrg if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { 1166b8e80941Smrg assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 1167b8e80941Smrg clear_color_attachment(cmd_buffer, &batch, 1168b8e80941Smrg &pAttachments[a], 1169b8e80941Smrg rectCount, pRects); 1170b8e80941Smrg } else { 1171b8e80941Smrg clear_depth_stencil_attachment(cmd_buffer, &batch, 1172b8e80941Smrg &pAttachments[a], 1173b8e80941Smrg rectCount, pRects); 1174b8e80941Smrg } 1175b8e80941Smrg } 1176b8e80941Smrg 1177b8e80941Smrg blorp_batch_finish(&batch); 1178b8e80941Smrg} 1179b8e80941Smrg 1180b8e80941Smrgenum subpass_stage { 1181b8e80941Smrg SUBPASS_STAGE_LOAD, 1182b8e80941Smrg SUBPASS_STAGE_DRAW, 1183b8e80941Smrg SUBPASS_STAGE_RESOLVE, 1184b8e80941Smrg}; 1185b8e80941Smrg 1186b8e80941Smrgvoid 1187b8e80941Smrganv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, 1188b8e80941Smrg const struct anv_image *src_image, 1189b8e80941Smrg enum isl_aux_usage src_aux_usage, 1190b8e80941Smrg uint32_t src_level, uint32_t src_base_layer, 1191b8e80941Smrg const struct anv_image *dst_image, 1192b8e80941Smrg enum isl_aux_usage dst_aux_usage, 1193b8e80941Smrg uint32_t dst_level, uint32_t dst_base_layer, 1194b8e80941Smrg VkImageAspectFlagBits aspect, 1195b8e80941Smrg uint32_t src_x, uint32_t src_y, 1196b8e80941Smrg uint32_t dst_x, uint32_t dst_y, 1197b8e80941Smrg uint32_t width, uint32_t height, 1198b8e80941Smrg uint32_t layer_count, 1199b8e80941Smrg enum blorp_filter filter) 1200b8e80941Smrg{ 1201b8e80941Smrg struct blorp_batch batch; 1202b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1203b8e80941Smrg 1204b8e80941Smrg assert(src_image->type == VK_IMAGE_TYPE_2D); 1205b8e80941Smrg assert(src_image->samples > 1); 1206b8e80941Smrg assert(dst_image->type == VK_IMAGE_TYPE_2D); 1207b8e80941Smrg assert(dst_image->samples == 1); 1208b8e80941Smrg assert(src_image->n_planes == dst_image->n_planes); 1209b8e80941Smrg assert(!src_image->format->can_ycbcr); 1210b8e80941Smrg assert(!dst_image->format->can_ycbcr); 1211b8e80941Smrg 1212b8e80941Smrg struct blorp_surf src_surf, dst_surf; 1213b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect, 1214b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1215b8e80941Smrg src_aux_usage, &src_surf); 1216b8e80941Smrg if (src_aux_usage == ISL_AUX_USAGE_MCS) { 1217b8e80941Smrg src_surf.clear_color_addr = anv_to_blorp_address( 1218b8e80941Smrg anv_image_get_clear_color_addr(cmd_buffer->device, src_image, 1219b8e80941Smrg VK_IMAGE_ASPECT_COLOR_BIT)); 1220b8e80941Smrg } 1221b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect, 1222b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1223b8e80941Smrg dst_aux_usage, &dst_surf); 1224b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, 1225b8e80941Smrg aspect, dst_aux_usage, 1226b8e80941Smrg dst_level, dst_base_layer, layer_count); 1227b8e80941Smrg 1228b8e80941Smrg if (filter == BLORP_FILTER_NONE) { 1229b8e80941Smrg /* If no explicit filter is provided, then it's implied by the type of 1230b8e80941Smrg * the source image. 1231b8e80941Smrg */ 1232b8e80941Smrg if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) || 1233b8e80941Smrg (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) || 1234b8e80941Smrg isl_format_has_int_channel(src_surf.surf->format)) { 1235b8e80941Smrg filter = BLORP_FILTER_SAMPLE_0; 1236b8e80941Smrg } else { 1237b8e80941Smrg filter = BLORP_FILTER_AVERAGE; 1238b8e80941Smrg } 1239b8e80941Smrg } 1240b8e80941Smrg 1241b8e80941Smrg for (uint32_t l = 0; l < layer_count; l++) { 1242b8e80941Smrg blorp_blit(&batch, 1243b8e80941Smrg &src_surf, src_level, src_base_layer + l, 1244b8e80941Smrg ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1245b8e80941Smrg &dst_surf, dst_level, dst_base_layer + l, 1246b8e80941Smrg ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, 1247b8e80941Smrg src_x, src_y, src_x + width, src_y + height, 1248b8e80941Smrg dst_x, dst_y, dst_x + width, dst_y + height, 1249b8e80941Smrg filter, false, false); 1250b8e80941Smrg } 1251b8e80941Smrg 1252b8e80941Smrg blorp_batch_finish(&batch); 1253b8e80941Smrg} 1254b8e80941Smrg 1255b8e80941Smrgvoid anv_CmdResolveImage( 1256b8e80941Smrg VkCommandBuffer commandBuffer, 1257b8e80941Smrg VkImage srcImage, 1258b8e80941Smrg VkImageLayout srcImageLayout, 1259b8e80941Smrg VkImage dstImage, 1260b8e80941Smrg VkImageLayout dstImageLayout, 1261b8e80941Smrg uint32_t regionCount, 1262b8e80941Smrg const VkImageResolve* pRegions) 1263b8e80941Smrg{ 1264b8e80941Smrg ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 1265b8e80941Smrg ANV_FROM_HANDLE(anv_image, src_image, srcImage); 1266b8e80941Smrg ANV_FROM_HANDLE(anv_image, dst_image, dstImage); 1267b8e80941Smrg 1268b8e80941Smrg assert(!src_image->format->can_ycbcr); 1269b8e80941Smrg 1270b8e80941Smrg for (uint32_t r = 0; r < regionCount; r++) { 1271b8e80941Smrg assert(pRegions[r].srcSubresource.aspectMask == 1272b8e80941Smrg pRegions[r].dstSubresource.aspectMask); 1273b8e80941Smrg assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) == 1274b8e80941Smrg anv_get_layerCount(dst_image, &pRegions[r].dstSubresource)); 1275b8e80941Smrg 1276b8e80941Smrg const uint32_t layer_count = 1277b8e80941Smrg anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); 1278b8e80941Smrg 1279b8e80941Smrg uint32_t aspect_bit; 1280b8e80941Smrg anv_foreach_image_aspect_bit(aspect_bit, src_image, 1281b8e80941Smrg pRegions[r].srcSubresource.aspectMask) { 1282b8e80941Smrg enum isl_aux_usage src_aux_usage = 1283b8e80941Smrg anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image, 1284b8e80941Smrg (1 << aspect_bit), srcImageLayout); 1285b8e80941Smrg enum isl_aux_usage dst_aux_usage = 1286b8e80941Smrg anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image, 1287b8e80941Smrg (1 << aspect_bit), dstImageLayout); 1288b8e80941Smrg 1289b8e80941Smrg anv_image_msaa_resolve(cmd_buffer, 1290b8e80941Smrg src_image, src_aux_usage, 1291b8e80941Smrg pRegions[r].srcSubresource.mipLevel, 1292b8e80941Smrg pRegions[r].srcSubresource.baseArrayLayer, 1293b8e80941Smrg dst_image, dst_aux_usage, 1294b8e80941Smrg pRegions[r].dstSubresource.mipLevel, 1295b8e80941Smrg pRegions[r].dstSubresource.baseArrayLayer, 1296b8e80941Smrg (1 << aspect_bit), 1297b8e80941Smrg pRegions[r].srcOffset.x, 1298b8e80941Smrg pRegions[r].srcOffset.y, 1299b8e80941Smrg pRegions[r].dstOffset.x, 1300b8e80941Smrg pRegions[r].dstOffset.y, 1301b8e80941Smrg pRegions[r].extent.width, 1302b8e80941Smrg pRegions[r].extent.height, 1303b8e80941Smrg layer_count, BLORP_FILTER_NONE); 1304b8e80941Smrg } 1305b8e80941Smrg } 1306b8e80941Smrg} 1307b8e80941Smrg 1308b8e80941Smrgstatic enum isl_aux_usage 1309b8e80941Smrgfast_clear_aux_usage(const struct anv_image *image, 1310b8e80941Smrg VkImageAspectFlagBits aspect) 1311b8e80941Smrg{ 1312b8e80941Smrg uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); 1313b8e80941Smrg if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) 1314b8e80941Smrg return ISL_AUX_USAGE_CCS_D; 1315b8e80941Smrg else 1316b8e80941Smrg return image->planes[plane].aux_usage; 1317b8e80941Smrg} 1318b8e80941Smrg 1319b8e80941Smrgvoid 1320b8e80941Smrganv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, 1321b8e80941Smrg const struct anv_image *image, 1322b8e80941Smrg uint32_t base_level, uint32_t level_count, 1323b8e80941Smrg uint32_t base_layer, uint32_t layer_count) 1324b8e80941Smrg{ 1325b8e80941Smrg struct blorp_batch batch; 1326b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1327b8e80941Smrg 1328b8e80941Smrg assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->n_planes == 1); 1329b8e80941Smrg 1330b8e80941Smrg struct blorp_surf surf; 1331b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1332b8e80941Smrg image, VK_IMAGE_ASPECT_COLOR_BIT, 1333b8e80941Smrg VK_IMAGE_LAYOUT_GENERAL, 1334b8e80941Smrg ISL_AUX_USAGE_NONE, &surf); 1335b8e80941Smrg assert(surf.aux_usage == ISL_AUX_USAGE_NONE); 1336b8e80941Smrg 1337b8e80941Smrg struct blorp_surf shadow_surf = { 1338b8e80941Smrg .surf = &image->planes[0].shadow_surface.isl, 1339b8e80941Smrg .addr = { 1340b8e80941Smrg .buffer = image->planes[0].address.bo, 1341b8e80941Smrg .offset = image->planes[0].address.offset + 1342b8e80941Smrg image->planes[0].shadow_surface.offset, 1343b8e80941Smrg .mocs = anv_mocs_for_bo(cmd_buffer->device, 1344b8e80941Smrg image->planes[0].address.bo), 1345b8e80941Smrg }, 1346b8e80941Smrg }; 1347b8e80941Smrg 1348b8e80941Smrg for (uint32_t l = 0; l < level_count; l++) { 1349b8e80941Smrg const uint32_t level = base_level + l; 1350b8e80941Smrg 1351b8e80941Smrg const VkExtent3D extent = { 1352b8e80941Smrg .width = anv_minify(image->extent.width, level), 1353b8e80941Smrg .height = anv_minify(image->extent.height, level), 1354b8e80941Smrg .depth = anv_minify(image->extent.depth, level), 1355b8e80941Smrg }; 1356b8e80941Smrg 1357b8e80941Smrg if (image->type == VK_IMAGE_TYPE_3D) 1358b8e80941Smrg layer_count = extent.depth; 1359b8e80941Smrg 1360b8e80941Smrg for (uint32_t a = 0; a < layer_count; a++) { 1361b8e80941Smrg const uint32_t layer = base_layer + a; 1362b8e80941Smrg 1363b8e80941Smrg blorp_copy(&batch, &surf, level, layer, 1364b8e80941Smrg &shadow_surf, level, layer, 1365b8e80941Smrg 0, 0, 0, 0, extent.width, extent.height); 1366b8e80941Smrg } 1367b8e80941Smrg } 1368b8e80941Smrg 1369b8e80941Smrg blorp_batch_finish(&batch); 1370b8e80941Smrg} 1371b8e80941Smrg 1372b8e80941Smrgvoid 1373b8e80941Smrganv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, 1374b8e80941Smrg const struct anv_image *image, 1375b8e80941Smrg VkImageAspectFlagBits aspect, 1376b8e80941Smrg enum isl_aux_usage aux_usage, 1377b8e80941Smrg enum isl_format format, struct isl_swizzle swizzle, 1378b8e80941Smrg uint32_t level, uint32_t base_layer, uint32_t layer_count, 1379b8e80941Smrg VkRect2D area, union isl_color_value clear_color) 1380b8e80941Smrg{ 1381b8e80941Smrg assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1382b8e80941Smrg 1383b8e80941Smrg /* We don't support planar images with multisampling yet */ 1384b8e80941Smrg assert(image->n_planes == 1); 1385b8e80941Smrg 1386b8e80941Smrg struct blorp_batch batch; 1387b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1388b8e80941Smrg 1389b8e80941Smrg struct blorp_surf surf; 1390b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1391b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1392b8e80941Smrg aux_usage, &surf); 1393b8e80941Smrg anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage, 1394b8e80941Smrg level, base_layer, layer_count); 1395b8e80941Smrg 1396b8e80941Smrg blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle), 1397b8e80941Smrg level, base_layer, layer_count, 1398b8e80941Smrg area.offset.x, area.offset.y, 1399b8e80941Smrg area.offset.x + area.extent.width, 1400b8e80941Smrg area.offset.y + area.extent.height, 1401b8e80941Smrg clear_color, NULL); 1402b8e80941Smrg 1403b8e80941Smrg blorp_batch_finish(&batch); 1404b8e80941Smrg} 1405b8e80941Smrg 1406b8e80941Smrgvoid 1407b8e80941Smrganv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, 1408b8e80941Smrg const struct anv_image *image, 1409b8e80941Smrg VkImageAspectFlags aspects, 1410b8e80941Smrg enum isl_aux_usage depth_aux_usage, 1411b8e80941Smrg uint32_t level, 1412b8e80941Smrg uint32_t base_layer, uint32_t layer_count, 1413b8e80941Smrg VkRect2D area, 1414b8e80941Smrg float depth_value, uint8_t stencil_value) 1415b8e80941Smrg{ 1416b8e80941Smrg assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 1417b8e80941Smrg VK_IMAGE_ASPECT_STENCIL_BIT)); 1418b8e80941Smrg 1419b8e80941Smrg struct blorp_batch batch; 1420b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1421b8e80941Smrg 1422b8e80941Smrg struct blorp_surf depth = {}; 1423b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 1424b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1425b8e80941Smrg image, VK_IMAGE_ASPECT_DEPTH_BIT, 1426b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1427b8e80941Smrg depth_aux_usage, &depth); 1428b8e80941Smrg depth.clear_color.f32[0] = ANV_HZ_FC_VAL; 1429b8e80941Smrg } 1430b8e80941Smrg 1431b8e80941Smrg struct blorp_surf stencil = {}; 1432b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1433b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1434b8e80941Smrg image, VK_IMAGE_ASPECT_STENCIL_BIT, 1435b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1436b8e80941Smrg ISL_AUX_USAGE_NONE, &stencil); 1437b8e80941Smrg } 1438b8e80941Smrg 1439b8e80941Smrg blorp_clear_depth_stencil(&batch, &depth, &stencil, 1440b8e80941Smrg level, base_layer, layer_count, 1441b8e80941Smrg area.offset.x, area.offset.y, 1442b8e80941Smrg area.offset.x + area.extent.width, 1443b8e80941Smrg area.offset.y + area.extent.height, 1444b8e80941Smrg aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1445b8e80941Smrg depth_value, 1446b8e80941Smrg (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0, 1447b8e80941Smrg stencil_value); 1448b8e80941Smrg 1449b8e80941Smrg blorp_batch_finish(&batch); 1450b8e80941Smrg} 1451b8e80941Smrg 1452b8e80941Smrgvoid 1453b8e80941Smrganv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, 1454b8e80941Smrg const struct anv_image *image, 1455b8e80941Smrg VkImageAspectFlagBits aspect, uint32_t level, 1456b8e80941Smrg uint32_t base_layer, uint32_t layer_count, 1457b8e80941Smrg enum isl_aux_op hiz_op) 1458b8e80941Smrg{ 1459b8e80941Smrg assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT); 1460b8e80941Smrg assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level)); 1461b8e80941Smrg assert(anv_image_aspect_to_plane(image->aspects, 1462b8e80941Smrg VK_IMAGE_ASPECT_DEPTH_BIT) == 0); 1463b8e80941Smrg 1464b8e80941Smrg struct blorp_batch batch; 1465b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1466b8e80941Smrg 1467b8e80941Smrg struct blorp_surf surf; 1468b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1469b8e80941Smrg image, VK_IMAGE_ASPECT_DEPTH_BIT, 1470b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1471b8e80941Smrg ISL_AUX_USAGE_HIZ, &surf); 1472b8e80941Smrg surf.clear_color.f32[0] = ANV_HZ_FC_VAL; 1473b8e80941Smrg 1474b8e80941Smrg blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op); 1475b8e80941Smrg 1476b8e80941Smrg blorp_batch_finish(&batch); 1477b8e80941Smrg} 1478b8e80941Smrg 1479b8e80941Smrgvoid 1480b8e80941Smrganv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, 1481b8e80941Smrg const struct anv_image *image, 1482b8e80941Smrg VkImageAspectFlags aspects, 1483b8e80941Smrg uint32_t level, 1484b8e80941Smrg uint32_t base_layer, uint32_t layer_count, 1485b8e80941Smrg VkRect2D area, uint8_t stencil_value) 1486b8e80941Smrg{ 1487b8e80941Smrg assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 1488b8e80941Smrg VK_IMAGE_ASPECT_STENCIL_BIT)); 1489b8e80941Smrg 1490b8e80941Smrg struct blorp_batch batch; 1491b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); 1492b8e80941Smrg 1493b8e80941Smrg struct blorp_surf depth = {}; 1494b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 1495b8e80941Smrg assert(base_layer + layer_count <= 1496b8e80941Smrg anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level)); 1497b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1498b8e80941Smrg image, VK_IMAGE_ASPECT_DEPTH_BIT, 1499b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1500b8e80941Smrg ISL_AUX_USAGE_HIZ, &depth); 1501b8e80941Smrg depth.clear_color.f32[0] = ANV_HZ_FC_VAL; 1502b8e80941Smrg } 1503b8e80941Smrg 1504b8e80941Smrg struct blorp_surf stencil = {}; 1505b8e80941Smrg if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 1506b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, 1507b8e80941Smrg image, VK_IMAGE_ASPECT_STENCIL_BIT, 1508b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1509b8e80941Smrg ISL_AUX_USAGE_NONE, &stencil); 1510b8e80941Smrg } 1511b8e80941Smrg 1512b8e80941Smrg /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear": 1513b8e80941Smrg * 1514b8e80941Smrg * "The following is required when performing a depth buffer clear with 1515b8e80941Smrg * using the WM_STATE or 3DSTATE_WM: 1516b8e80941Smrg * 1517b8e80941Smrg * * If other rendering operations have preceded this clear, a 1518b8e80941Smrg * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 1519b8e80941Smrg * enabled must be issued before the rectangle primitive used for 1520b8e80941Smrg * the depth buffer clear operation. 1521b8e80941Smrg * * [...]" 1522b8e80941Smrg * 1523b8e80941Smrg * Even though the PRM only says that this is required if using 3DSTATE_WM 1524b8e80941Smrg * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional 1525b8e80941Smrg * hangs when doing a clear with WM_HZ_OP. 1526b8e80941Smrg */ 1527b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1528b8e80941Smrg ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; 1529b8e80941Smrg 1530b8e80941Smrg blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, 1531b8e80941Smrg level, base_layer, layer_count, 1532b8e80941Smrg area.offset.x, area.offset.y, 1533b8e80941Smrg area.offset.x + area.extent.width, 1534b8e80941Smrg area.offset.y + area.extent.height, 1535b8e80941Smrg aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1536b8e80941Smrg ANV_HZ_FC_VAL, 1537b8e80941Smrg aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 1538b8e80941Smrg stencil_value); 1539b8e80941Smrg 1540b8e80941Smrg blorp_batch_finish(&batch); 1541b8e80941Smrg 1542b8e80941Smrg /* From the SKL PRM, Depth Buffer Clear: 1543b8e80941Smrg * 1544b8e80941Smrg * "Depth Buffer Clear Workaround 1545b8e80941Smrg * 1546b8e80941Smrg * Depth buffer clear pass using any of the methods (WM_STATE, 1547b8e80941Smrg * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL 1548b8e80941Smrg * command with DEPTH_STALL bit and Depth FLUSH bits “set” before 1549b8e80941Smrg * starting to render. DepthStall and DepthFlush are not needed between 1550b8e80941Smrg * consecutive depth clear passes nor is it required if the depth-clear 1551b8e80941Smrg * pass was done with “full_surf_clear” bit set in the 1552b8e80941Smrg * 3DSTATE_WM_HZ_OP." 1553b8e80941Smrg * 1554b8e80941Smrg * Even though the PRM provides a bunch of conditions under which this is 1555b8e80941Smrg * supposedly unnecessary, we choose to perform the flush unconditionally 1556b8e80941Smrg * just to be safe. 1557b8e80941Smrg */ 1558b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1559b8e80941Smrg ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; 1560b8e80941Smrg} 1561b8e80941Smrg 1562b8e80941Smrgvoid 1563b8e80941Smrganv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, 1564b8e80941Smrg const struct anv_image *image, 1565b8e80941Smrg enum isl_format format, 1566b8e80941Smrg VkImageAspectFlagBits aspect, 1567b8e80941Smrg uint32_t base_layer, uint32_t layer_count, 1568b8e80941Smrg enum isl_aux_op mcs_op, union isl_color_value *clear_value, 1569b8e80941Smrg bool predicate) 1570b8e80941Smrg{ 1571b8e80941Smrg assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); 1572b8e80941Smrg assert(image->samples > 1); 1573b8e80941Smrg assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0)); 1574b8e80941Smrg 1575b8e80941Smrg /* Multisampling with multi-planar formats is not supported */ 1576b8e80941Smrg assert(image->n_planes == 1); 1577b8e80941Smrg 1578b8e80941Smrg struct blorp_batch batch; 1579b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 1580b8e80941Smrg predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0); 1581b8e80941Smrg 1582b8e80941Smrg struct blorp_surf surf; 1583b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1584b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1585b8e80941Smrg ISL_AUX_USAGE_MCS, &surf); 1586b8e80941Smrg 1587b8e80941Smrg /* Blorp will store the clear color for us if we provide the clear color 1588b8e80941Smrg * address and we are doing a fast clear. So we save the clear value into 1589b8e80941Smrg * the blorp surface. However, in some situations we want to do a fast clear 1590b8e80941Smrg * without changing the clear value stored in the state buffer. For those 1591b8e80941Smrg * cases, we set the clear color address pointer to NULL, so blorp will not 1592b8e80941Smrg * try to store a garbage color. 1593b8e80941Smrg */ 1594b8e80941Smrg if (mcs_op == ISL_AUX_OP_FAST_CLEAR) { 1595b8e80941Smrg if (clear_value) 1596b8e80941Smrg surf.clear_color = *clear_value; 1597b8e80941Smrg else 1598b8e80941Smrg surf.clear_color_addr.buffer = NULL; 1599b8e80941Smrg } 1600b8e80941Smrg 1601b8e80941Smrg /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": 1602b8e80941Smrg * 1603b8e80941Smrg * "After Render target fast clear, pipe-control with color cache 1604b8e80941Smrg * write-flush must be issued before sending any DRAW commands on 1605b8e80941Smrg * that render target." 1606b8e80941Smrg * 1607b8e80941Smrg * This comment is a bit cryptic and doesn't really tell you what's going 1608b8e80941Smrg * or what's really needed. It appears that fast clear ops are not 1609b8e80941Smrg * properly synchronized with other drawing. This means that we cannot 1610b8e80941Smrg * have a fast clear operation in the pipe at the same time as other 1611b8e80941Smrg * regular drawing operations. We need to use a PIPE_CONTROL to ensure 1612b8e80941Smrg * that the contents of the previous draw hit the render target before we 1613b8e80941Smrg * resolve and then use a second PIPE_CONTROL after the resolve to ensure 1614b8e80941Smrg * that it is completed before any additional drawing occurs. 1615b8e80941Smrg */ 1616b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1617b8e80941Smrg ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1618b8e80941Smrg 1619b8e80941Smrg switch (mcs_op) { 1620b8e80941Smrg case ISL_AUX_OP_FAST_CLEAR: 1621b8e80941Smrg blorp_fast_clear(&batch, &surf, format, 1622b8e80941Smrg 0, base_layer, layer_count, 1623b8e80941Smrg 0, 0, image->extent.width, image->extent.height); 1624b8e80941Smrg break; 1625b8e80941Smrg case ISL_AUX_OP_PARTIAL_RESOLVE: 1626b8e80941Smrg blorp_mcs_partial_resolve(&batch, &surf, format, 1627b8e80941Smrg base_layer, layer_count); 1628b8e80941Smrg break; 1629b8e80941Smrg case ISL_AUX_OP_FULL_RESOLVE: 1630b8e80941Smrg case ISL_AUX_OP_AMBIGUATE: 1631b8e80941Smrg default: 1632b8e80941Smrg unreachable("Unsupported MCS operation"); 1633b8e80941Smrg } 1634b8e80941Smrg 1635b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1636b8e80941Smrg ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1637b8e80941Smrg 1638b8e80941Smrg blorp_batch_finish(&batch); 1639b8e80941Smrg} 1640b8e80941Smrg 1641b8e80941Smrgvoid 1642b8e80941Smrganv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, 1643b8e80941Smrg const struct anv_image *image, 1644b8e80941Smrg enum isl_format format, 1645b8e80941Smrg VkImageAspectFlagBits aspect, uint32_t level, 1646b8e80941Smrg uint32_t base_layer, uint32_t layer_count, 1647b8e80941Smrg enum isl_aux_op ccs_op, union isl_color_value *clear_value, 1648b8e80941Smrg bool predicate) 1649b8e80941Smrg{ 1650b8e80941Smrg assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); 1651b8e80941Smrg assert(image->samples == 1); 1652b8e80941Smrg assert(level < anv_image_aux_levels(image, aspect)); 1653b8e80941Smrg /* Multi-LOD YcBcR is not allowed */ 1654b8e80941Smrg assert(image->n_planes == 1 || level == 0); 1655b8e80941Smrg assert(base_layer + layer_count <= 1656b8e80941Smrg anv_image_aux_layers(image, aspect, level)); 1657b8e80941Smrg 1658b8e80941Smrg uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); 1659b8e80941Smrg uint32_t width_div = image->format->planes[plane].denominator_scales[0]; 1660b8e80941Smrg uint32_t height_div = image->format->planes[plane].denominator_scales[1]; 1661b8e80941Smrg uint32_t level_width = anv_minify(image->extent.width, level) / width_div; 1662b8e80941Smrg uint32_t level_height = anv_minify(image->extent.height, level) / height_div; 1663b8e80941Smrg 1664b8e80941Smrg struct blorp_batch batch; 1665b8e80941Smrg blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 1666b8e80941Smrg predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0); 1667b8e80941Smrg 1668b8e80941Smrg struct blorp_surf surf; 1669b8e80941Smrg get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, 1670b8e80941Smrg ANV_IMAGE_LAYOUT_EXPLICIT_AUX, 1671b8e80941Smrg fast_clear_aux_usage(image, aspect), 1672b8e80941Smrg &surf); 1673b8e80941Smrg 1674b8e80941Smrg /* Blorp will store the clear color for us if we provide the clear color 1675b8e80941Smrg * address and we are doing a fast clear. So we save the clear value into 1676b8e80941Smrg * the blorp surface. However, in some situations we want to do a fast clear 1677b8e80941Smrg * without changing the clear value stored in the state buffer. For those 1678b8e80941Smrg * cases, we set the clear color address pointer to NULL, so blorp will not 1679b8e80941Smrg * try to store a garbage color. 1680b8e80941Smrg */ 1681b8e80941Smrg if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { 1682b8e80941Smrg if (clear_value) 1683b8e80941Smrg surf.clear_color = *clear_value; 1684b8e80941Smrg else 1685b8e80941Smrg surf.clear_color_addr.buffer = NULL; 1686b8e80941Smrg } 1687b8e80941Smrg 1688b8e80941Smrg /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": 1689b8e80941Smrg * 1690b8e80941Smrg * "After Render target fast clear, pipe-control with color cache 1691b8e80941Smrg * write-flush must be issued before sending any DRAW commands on 1692b8e80941Smrg * that render target." 1693b8e80941Smrg * 1694b8e80941Smrg * This comment is a bit cryptic and doesn't really tell you what's going 1695b8e80941Smrg * or what's really needed. It appears that fast clear ops are not 1696b8e80941Smrg * properly synchronized with other drawing. This means that we cannot 1697b8e80941Smrg * have a fast clear operation in the pipe at the same time as other 1698b8e80941Smrg * regular drawing operations. We need to use a PIPE_CONTROL to ensure 1699b8e80941Smrg * that the contents of the previous draw hit the render target before we 1700b8e80941Smrg * resolve and then use a second PIPE_CONTROL after the resolve to ensure 1701b8e80941Smrg * that it is completed before any additional drawing occurs. 1702b8e80941Smrg */ 1703b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1704b8e80941Smrg ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1705b8e80941Smrg 1706b8e80941Smrg switch (ccs_op) { 1707b8e80941Smrg case ISL_AUX_OP_FAST_CLEAR: 1708b8e80941Smrg blorp_fast_clear(&batch, &surf, format, 1709b8e80941Smrg level, base_layer, layer_count, 1710b8e80941Smrg 0, 0, level_width, level_height); 1711b8e80941Smrg break; 1712b8e80941Smrg case ISL_AUX_OP_FULL_RESOLVE: 1713b8e80941Smrg case ISL_AUX_OP_PARTIAL_RESOLVE: 1714b8e80941Smrg blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count, 1715b8e80941Smrg format, ccs_op); 1716b8e80941Smrg break; 1717b8e80941Smrg case ISL_AUX_OP_AMBIGUATE: 1718b8e80941Smrg for (uint32_t a = 0; a < layer_count; a++) { 1719b8e80941Smrg const uint32_t layer = base_layer + a; 1720b8e80941Smrg blorp_ccs_ambiguate(&batch, &surf, level, layer); 1721b8e80941Smrg } 1722b8e80941Smrg break; 1723b8e80941Smrg default: 1724b8e80941Smrg unreachable("Unsupported CCS operation"); 1725b8e80941Smrg } 1726b8e80941Smrg 1727b8e80941Smrg cmd_buffer->state.pending_pipe_bits |= 1728b8e80941Smrg ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; 1729b8e80941Smrg 1730b8e80941Smrg blorp_batch_finish(&batch); 1731b8e80941Smrg} 1732