1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "tu_private.h" 25 26#include "a6xx.xml.h" 27#include "adreno_common.xml.h" 28#include "adreno_pm4.xml.h" 29 30#include "vk_format.h" 31 32#include "tu_cs.h" 33 34/* 35 * TODO: 36 * - image -> image copies 37 * - 3D textures 38 * - compressed image formats (need to divide offset/extent) 39 */ 40 41static uint32_t 42blit_control(enum a6xx_color_fmt fmt) 43{ 44 unsigned blit_cntl = 0xf00000; 45 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt); 46 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt)); 47 return blit_cntl; 48} 49 50static uint32_t tu6_sp_2d_src_format(VkFormat format) 51{ 52 const struct vk_format_description *desc = vk_format_description(format); 53 uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb); 54 55 int channel = vk_format_get_first_non_void_channel(format); 56 if (channel < 0) { 57 /* TODO special format. */ 58 return reg; 59 } 60 if (desc->channel[channel].normalized) { 61 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) 62 reg |= A6XX_SP_2D_SRC_FORMAT_SINT; 63 reg |= A6XX_SP_2D_SRC_FORMAT_NORM; 64 } else if (desc->channel[channel].pure_integer) { 65 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) 66 reg |= A6XX_SP_2D_SRC_FORMAT_SINT; 67 else 68 reg |= A6XX_SP_2D_SRC_FORMAT_UINT; 69 } 70 return reg; 71} 72 73static void 74tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) 75{ 76 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10); 77 78 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); 79 tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR); 80 81 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); 82 tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH); 83 84 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 85 tu_cs_emit(&cmdbuf->cs, 0x0); 86 87 tu_cs_emit_wfi(&cmdbuf->cs); 88 89 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1); 90 tu_cs_emit(&cmdbuf->cs, 0x10000000); 91} 92 93static void 94tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, 95 struct tu_bo *src_bo, 96 uint64_t src_offset, 97 struct tu_bo *dst_bo, 98 uint64_t dst_offset, 99 uint64_t size) 100{ 101 const unsigned max_size_per_iter = 0x4000 - 0x40; 102 const unsigned max_iterations = 103 (size + max_size_per_iter) / max_size_per_iter; 104 105 tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ); 106 tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE); 107 108 tu_dma_prepare(cmdbuf); 109 110 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations); 111 112 /* buffer copy setup */ 113 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); 114 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); 115 116 const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; 117 118 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); 119 tu_cs_emit(&cmdbuf->cs, blit_cntl); 120 121 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); 122 tu_cs_emit(&cmdbuf->cs, blit_cntl); 123 124 for (; size;) { 125 uint64_t src_va = src_bo->iova + src_offset; 126 uint64_t dst_va = dst_bo->iova + dst_offset; 127 128 unsigned src_shift = src_va & 0x3f; 129 unsigned dst_shift = dst_va & 0x3f; 130 unsigned max_shift = MAX2(src_shift, dst_shift); 131 132 src_va -= src_shift; 133 dst_va -= dst_shift; 134 135 uint32_t size_todo = MIN2(0x4000 - max_shift, size); 136 unsigned pitch = (size_todo + max_shift + 63) & ~63; 137 138 /* 139 * Emit source: 140 */ 141 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); 142 tu_cs_emit(&cmdbuf->cs, 143 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) | 144 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | 145 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); 146 tu_cs_emit(&cmdbuf->cs, 147 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) | 148 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */ 149 tu_cs_emit_qw(&cmdbuf->cs, src_va); 150 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch)); 151 152 tu_cs_emit(&cmdbuf->cs, 0x00000000); 153 tu_cs_emit(&cmdbuf->cs, 0x00000000); 154 tu_cs_emit(&cmdbuf->cs, 0x00000000); 155 tu_cs_emit(&cmdbuf->cs, 0x00000000); 156 tu_cs_emit(&cmdbuf->cs, 0x00000000); 157 158 tu_cs_emit(&cmdbuf->cs, 0x00000000); 159 tu_cs_emit(&cmdbuf->cs, 0x00000000); 160 tu_cs_emit(&cmdbuf->cs, 0x00000000); 161 162 /* 163 * Emit destination: 164 */ 165 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); 166 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) | 167 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | 168 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); 169 tu_cs_emit_qw(&cmdbuf->cs, dst_va); 170 171 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch)); 172 tu_cs_emit(&cmdbuf->cs, 0x00000000); 173 tu_cs_emit(&cmdbuf->cs, 0x00000000); 174 tu_cs_emit(&cmdbuf->cs, 0x00000000); 175 tu_cs_emit(&cmdbuf->cs, 0x00000000); 176 tu_cs_emit(&cmdbuf->cs, 0x00000000); 177 178 /* 179 * Blit command: 180 */ 181 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); 182 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift)); 183 tu_cs_emit(&cmdbuf->cs, 184 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1)); 185 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); 186 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); 187 188 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); 189 tu_cs_emit(&cmdbuf->cs, 190 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0)); 191 tu_cs_emit(&cmdbuf->cs, 192 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) | 193 A6XX_GRAS_2D_DST_BR_Y(0)); 194 195 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); 196 tu_cs_emit(&cmdbuf->cs, 0x3f); 197 tu_cs_emit_wfi(&cmdbuf->cs); 198 199 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); 200 tu_cs_emit(&cmdbuf->cs, 0); 201 202 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); 203 tu_cs_emit(&cmdbuf->cs, 0xf180); 204 205 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 206 tu_cs_emit(&cmdbuf->cs, 0x01000000); 207 208 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); 209 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); 210 211 tu_cs_emit_wfi(&cmdbuf->cs); 212 213 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 214 tu_cs_emit(&cmdbuf->cs, 0); 215 216 src_offset += size_todo; 217 dst_offset += size_todo; 218 size -= size_todo; 219 } 220 221 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); 222 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); 223 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); 224} 225 226static void 227tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf, 228 struct tu_buffer *src_buffer, 229 struct tu_image *dst_image, 230 const VkBufferImageCopy *copy_info, 231 VkFormat format, 232 uint32_t layer, 233 uint64_t src_va) 234{ 235 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; 236 237 uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset; 238 unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch * 239 vk_format_get_blocksize(format); 240 241 unsigned src_pitch; 242 unsigned src_offset = 0; 243 if (copy_info->imageExtent.height == 1) { 244 /* Can't find this in the spec, but not having it is sort of insane? */ 245 assert(src_va % vk_format_get_blocksize(format) == 0); 246 247 src_offset = (src_va & 63) / vk_format_get_blocksize(format); 248 src_va &= ~63; 249 250 src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); 251 } else { 252 unsigned src_pixel_stride = copy_info->bufferRowLength 253 ? copy_info->bufferRowLength 254 : copy_info->imageExtent.width; 255 src_pitch = src_pixel_stride * vk_format_get_blocksize(format); 256 assert(!(src_pitch & 63)); 257 assert(!(src_va & 63)); 258 } 259 260 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); 261 262 /* 263 * Emit source: 264 */ 265 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); 266 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | 267 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | 268 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 269 0x500000); 270 tu_cs_emit(&cmdbuf->cs, 271 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) | 272 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( 273 copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */ 274 tu_cs_emit_qw(&cmdbuf->cs, src_va); 275 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); 276 277 tu_cs_emit(&cmdbuf->cs, 0x00000000); 278 tu_cs_emit(&cmdbuf->cs, 0x00000000); 279 tu_cs_emit(&cmdbuf->cs, 0x00000000); 280 tu_cs_emit(&cmdbuf->cs, 0x00000000); 281 tu_cs_emit(&cmdbuf->cs, 0x00000000); 282 283 tu_cs_emit(&cmdbuf->cs, 0x00000000); 284 tu_cs_emit(&cmdbuf->cs, 0x00000000); 285 tu_cs_emit(&cmdbuf->cs, 0x00000000); 286 287 /* 288 * Emit destination: 289 */ 290 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); 291 tu_cs_emit(&cmdbuf->cs, 292 A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | 293 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) | 294 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); 295 tu_cs_emit_qw(&cmdbuf->cs, dst_va); 296 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); 297 tu_cs_emit(&cmdbuf->cs, 0x00000000); 298 tu_cs_emit(&cmdbuf->cs, 0x00000000); 299 tu_cs_emit(&cmdbuf->cs, 0x00000000); 300 tu_cs_emit(&cmdbuf->cs, 0x00000000); 301 tu_cs_emit(&cmdbuf->cs, 0x00000000); 302 303 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); 304 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset)); 305 tu_cs_emit(&cmdbuf->cs, 306 A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1)); 307 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); 308 tu_cs_emit(&cmdbuf->cs, 309 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1)); 310 311 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); 312 tu_cs_emit(&cmdbuf->cs, 313 A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) | 314 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y)); 315 tu_cs_emit(&cmdbuf->cs, 316 A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x + 317 copy_info->imageExtent.width - 1) | 318 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y + 319 copy_info->imageExtent.height - 1)); 320 321 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); 322 tu_cs_emit(&cmdbuf->cs, 0x3f); 323 tu_cs_emit_wfi(&cmdbuf->cs); 324 325 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); 326 tu_cs_emit(&cmdbuf->cs, 0); 327 328 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); 329 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); 330 331 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 332 tu_cs_emit(&cmdbuf->cs, 0x01000000); 333 334 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); 335 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); 336 337 tu_cs_emit_wfi(&cmdbuf->cs); 338 339 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 340 tu_cs_emit(&cmdbuf->cs, 0); 341} 342 343static void 344tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf, 345 struct tu_buffer *src_buffer, 346 struct tu_image *dst_image, 347 const VkBufferImageCopy *copy_info) 348{ 349 tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); 350 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); 351 352 /* general setup */ 353 tu_dma_prepare(cmdbuf); 354 355 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); 356 357 /* buffer copy setup */ 358 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); 359 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); 360 361 VkFormat format = dst_image->vk_format; 362 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; 363 364 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; 365 366 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); 367 tu_cs_emit(&cmdbuf->cs, blit_cntl); 368 369 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); 370 tu_cs_emit(&cmdbuf->cs, blit_cntl); 371 372 unsigned src_pixel_stride = copy_info->bufferRowLength 373 ? copy_info->bufferRowLength 374 : copy_info->imageExtent.width; 375 unsigned cpp = vk_format_get_blocksize(format); 376 unsigned src_pitch = src_pixel_stride * cpp; 377 378 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { 379 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; 380 uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch; 381 382 if ((src_pitch & 63) || (src_va & 63)) { 383 /* Do a per line copy */ 384 VkBufferImageCopy line_copy_info = *copy_info; 385 line_copy_info.imageExtent.height = 1; 386 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { 387 /* 388 * if src_va is not aligned the line copy will need to adjust. Give it 389 * room to do so. 390 */ 391 unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0; 392 line_copy_info.imageOffset.x = copy_info->imageOffset.x; 393 line_copy_info.imageExtent.width = copy_info->imageExtent.width; 394 395 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { 396 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp); 397 398 line_copy_info.imageOffset.x += max_width; 399 line_copy_info.imageExtent.width -= max_width; 400 } 401 402 line_copy_info.imageOffset.y++; 403 src_va += src_pitch; 404 } 405 } else { 406 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va); 407 } 408 } 409 410 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); 411 412 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); 413 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); 414 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); 415} 416 417static void 418tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf, 419 struct tu_image *src_image, 420 struct tu_buffer *dst_buffer, 421 const VkBufferImageCopy *copy_info, 422 VkFormat format, 423 uint32_t layer, 424 uint64_t dst_va) 425{ 426 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; 427 428 uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset; 429 unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch * 430 vk_format_get_blocksize(format); 431 432 unsigned dst_pitch; 433 unsigned dst_offset = 0; 434 if (copy_info->imageExtent.height == 1) { 435 /* Can't find this in the spec, but not having it is sort of insane? */ 436 assert(dst_va % vk_format_get_blocksize(format) == 0); 437 438 dst_offset = (dst_va & 63) / vk_format_get_blocksize(format); 439 dst_va &= ~63; 440 441 dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); 442 } else { 443 unsigned dst_pixel_stride = copy_info->bufferRowLength 444 ? copy_info->bufferRowLength 445 : copy_info->imageExtent.width; 446 dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format); 447 assert(!(dst_pitch & 63)); 448 assert(!(dst_va & 63)); 449 } 450 451 452 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); 453 454 /* 455 * Emit source: 456 */ 457 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); 458 tu_cs_emit(&cmdbuf->cs, 459 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | 460 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | 461 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); 462 tu_cs_emit(&cmdbuf->cs, 463 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | 464 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( 465 src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ 466 tu_cs_emit_qw(&cmdbuf->cs, src_va); 467 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); 468 469 tu_cs_emit(&cmdbuf->cs, 0x00000000); 470 tu_cs_emit(&cmdbuf->cs, 0x00000000); 471 tu_cs_emit(&cmdbuf->cs, 0x00000000); 472 tu_cs_emit(&cmdbuf->cs, 0x00000000); 473 tu_cs_emit(&cmdbuf->cs, 0x00000000); 474 475 tu_cs_emit(&cmdbuf->cs, 0x00000000); 476 tu_cs_emit(&cmdbuf->cs, 0x00000000); 477 tu_cs_emit(&cmdbuf->cs, 0x00000000); 478 479 /* 480 * Emit destination: 481 */ 482 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); 483 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | 484 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | 485 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); 486 tu_cs_emit_qw(&cmdbuf->cs, dst_va); 487 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); 488 tu_cs_emit(&cmdbuf->cs, 0x00000000); 489 tu_cs_emit(&cmdbuf->cs, 0x00000000); 490 tu_cs_emit(&cmdbuf->cs, 0x00000000); 491 tu_cs_emit(&cmdbuf->cs, 0x00000000); 492 tu_cs_emit(&cmdbuf->cs, 0x00000000); 493 494 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); 495 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x)); 496 tu_cs_emit(&cmdbuf->cs, 497 A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x + 498 copy_info->imageExtent.width - 1)); 499 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y)); 500 tu_cs_emit(&cmdbuf->cs, 501 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y + 502 copy_info->imageExtent.height - 1)); 503 504 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); 505 tu_cs_emit(&cmdbuf->cs, 506 A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0)); 507 tu_cs_emit(&cmdbuf->cs, 508 A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) | 509 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1)); 510 511 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); 512 tu_cs_emit(&cmdbuf->cs, 0x3f); 513 tu_cs_emit_wfi(&cmdbuf->cs); 514 515 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); 516 tu_cs_emit(&cmdbuf->cs, 0); 517 518 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); 519 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); 520 521 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 522 tu_cs_emit(&cmdbuf->cs, 0x01000000); 523 524 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); 525 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); 526 527 tu_cs_emit_wfi(&cmdbuf->cs); 528 529 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); 530 tu_cs_emit(&cmdbuf->cs, 0); 531} 532 533static void 534tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf, 535 struct tu_image *src_image, 536 struct tu_buffer *dst_buffer, 537 const VkBufferImageCopy *copy_info) 538{ 539 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); 540 tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); 541 542 /* general setup */ 543 tu_dma_prepare(cmdbuf); 544 545 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); 546 547 /* buffer copy setup */ 548 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); 549 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); 550 551 VkFormat format = src_image->vk_format; 552 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; 553 554 unsigned dst_pixel_stride = copy_info->bufferRowLength 555 ? copy_info->bufferRowLength 556 : copy_info->imageExtent.width; 557 unsigned cpp = vk_format_get_blocksize(format); 558 unsigned dst_pitch = dst_pixel_stride * cpp; 559 560 561 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; 562 563 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); 564 tu_cs_emit(&cmdbuf->cs, blit_cntl); 565 566 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); 567 tu_cs_emit(&cmdbuf->cs, blit_cntl); 568 569 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { 570 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; 571 uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch; 572 573 if ((dst_pitch & 63) || (dst_va & 63)) { 574 /* Do a per line copy */ 575 VkBufferImageCopy line_copy_info = *copy_info; 576 line_copy_info.imageExtent.height = 1; 577 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { 578 /* 579 * if dst_va is not aligned the line copy will need to adjust. Give it 580 * room to do so. 581 */ 582 unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0; 583 line_copy_info.imageOffset.x = copy_info->imageOffset.x; 584 line_copy_info.imageExtent.width = copy_info->imageExtent.width; 585 586 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { 587 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp); 588 589 line_copy_info.imageOffset.x += max_width; 590 line_copy_info.imageExtent.width -= max_width; 591 } 592 593 line_copy_info.imageOffset.y++; 594 dst_va += dst_pitch; 595 } 596 } else { 597 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va); 598 } 599 } 600 601 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); 602 603 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); 604 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); 605 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); 606} 607 608void 609tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, 610 VkBuffer srcBuffer, 611 VkBuffer destBuffer, 612 uint32_t regionCount, 613 const VkBufferCopy *pRegions) 614{ 615 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); 616 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); 617 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); 618 619 for (unsigned i = 0; i < regionCount; ++i) { 620 uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset; 621 uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset; 622 623 tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo, 624 dst_offset, pRegions[i].size); 625 } 626} 627 628void 629tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, 630 VkBuffer srcBuffer, 631 VkImage destImage, 632 VkImageLayout destImageLayout, 633 uint32_t regionCount, 634 const VkBufferImageCopy *pRegions) 635{ 636 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 637 TU_FROM_HANDLE(tu_image, dest_image, destImage); 638 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); 639 640 for (unsigned i = 0; i < regionCount; ++i) { 641 tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, 642 pRegions + i); 643 } 644} 645 646void 647tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, 648 VkImage srcImage, 649 VkImageLayout srcImageLayout, 650 VkBuffer destBuffer, 651 uint32_t regionCount, 652 const VkBufferImageCopy *pRegions) 653{ 654 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 655 TU_FROM_HANDLE(tu_image, src_image, srcImage); 656 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); 657 658 for (unsigned i = 0; i < regionCount; ++i) { 659 tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer, 660 pRegions + i); 661 } 662} 663 664static void 665meta_copy_image(struct tu_cmd_buffer *cmd_buffer, 666 struct tu_image *src_image, 667 VkImageLayout src_image_layout, 668 struct tu_image *dest_image, 669 VkImageLayout dest_image_layout, 670 uint32_t regionCount, 671 const VkImageCopy *pRegions) 672{ 673} 674 675void 676tu_CmdCopyImage(VkCommandBuffer commandBuffer, 677 VkImage srcImage, 678 VkImageLayout srcImageLayout, 679 VkImage destImage, 680 VkImageLayout destImageLayout, 681 uint32_t regionCount, 682 const VkImageCopy *pRegions) 683{ 684 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); 685 TU_FROM_HANDLE(tu_image, src_image, srcImage); 686 TU_FROM_HANDLE(tu_image, dest_image, destImage); 687 688 meta_copy_image(cmd_buffer, src_image, srcImageLayout, dest_image, 689 destImageLayout, regionCount, pRegions); 690} 691