1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "tu_private.h"
25
26#include "a6xx.xml.h"
27#include "adreno_common.xml.h"
28#include "adreno_pm4.xml.h"
29
30#include "vk_format.h"
31
32#include "tu_cs.h"
33
34/*
35 * TODO:
36 *   - image -> image copies
37 *   - 3D textures
38 *   - compressed image formats (need to divide offset/extent)
39 */
40
41static uint32_t
42blit_control(enum a6xx_color_fmt fmt)
43{
44   unsigned blit_cntl = 0xf00000;
45   blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
46   blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
47   return blit_cntl;
48}
49
50static uint32_t tu6_sp_2d_src_format(VkFormat format)
51{
52   const struct vk_format_description *desc = vk_format_description(format);
53   uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
54
55   int channel = vk_format_get_first_non_void_channel(format);
56   if (channel < 0) {
57      /* TODO special format. */
58      return reg;
59   }
60   if (desc->channel[channel].normalized) {
61      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
62         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
63      reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
64   } else if (desc->channel[channel].pure_integer) {
65      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
66         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
67      else
68         reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
69   }
70   return reg;
71}
72
73static void
74tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
75{
76   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
77
78   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
79   tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
80
81   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
82   tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
83
84   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
85   tu_cs_emit(&cmdbuf->cs, 0x0);
86
87   tu_cs_emit_wfi(&cmdbuf->cs);
88
89   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
90   tu_cs_emit(&cmdbuf->cs, 0x10000000);
91}
92
93static void
94tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
95               struct tu_bo *src_bo,
96               uint64_t src_offset,
97               struct tu_bo *dst_bo,
98               uint64_t dst_offset,
99               uint64_t size)
100{
101   const unsigned max_size_per_iter = 0x4000 - 0x40;
102   const unsigned max_iterations =
103      (size + max_size_per_iter) / max_size_per_iter;
104
105   tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
106   tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
107
108   tu_dma_prepare(cmdbuf);
109
110   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
111
112   /* buffer copy setup */
113   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
114   tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
115
116   const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
117
118   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
119   tu_cs_emit(&cmdbuf->cs, blit_cntl);
120
121   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
122   tu_cs_emit(&cmdbuf->cs, blit_cntl);
123
124   for (; size;) {
125      uint64_t src_va = src_bo->iova + src_offset;
126      uint64_t dst_va = dst_bo->iova + dst_offset;
127
128      unsigned src_shift = src_va & 0x3f;
129      unsigned dst_shift = dst_va & 0x3f;
130      unsigned max_shift = MAX2(src_shift, dst_shift);
131
132      src_va -= src_shift;
133      dst_va -= dst_shift;
134
135      uint32_t size_todo = MIN2(0x4000 - max_shift, size);
136      unsigned pitch = (size_todo + max_shift + 63) & ~63;
137
138      /*
139       * Emit source:
140       */
141      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
142      tu_cs_emit(&cmdbuf->cs,
143                 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
144                    A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
145                    A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
146      tu_cs_emit(&cmdbuf->cs,
147                 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
148                    A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
149      tu_cs_emit_qw(&cmdbuf->cs, src_va);
150      tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
151
152      tu_cs_emit(&cmdbuf->cs, 0x00000000);
153      tu_cs_emit(&cmdbuf->cs, 0x00000000);
154      tu_cs_emit(&cmdbuf->cs, 0x00000000);
155      tu_cs_emit(&cmdbuf->cs, 0x00000000);
156      tu_cs_emit(&cmdbuf->cs, 0x00000000);
157
158      tu_cs_emit(&cmdbuf->cs, 0x00000000);
159      tu_cs_emit(&cmdbuf->cs, 0x00000000);
160      tu_cs_emit(&cmdbuf->cs, 0x00000000);
161
162      /*
163       * Emit destination:
164       */
165      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
166      tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
167                                 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
168                                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
169      tu_cs_emit_qw(&cmdbuf->cs, dst_va);
170
171      tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
172      tu_cs_emit(&cmdbuf->cs, 0x00000000);
173      tu_cs_emit(&cmdbuf->cs, 0x00000000);
174      tu_cs_emit(&cmdbuf->cs, 0x00000000);
175      tu_cs_emit(&cmdbuf->cs, 0x00000000);
176      tu_cs_emit(&cmdbuf->cs, 0x00000000);
177
178      /*
179       * Blit command:
180       */
181      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
182      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
183      tu_cs_emit(&cmdbuf->cs,
184                 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
185      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
186      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
187
188      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
189      tu_cs_emit(&cmdbuf->cs,
190                 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
191      tu_cs_emit(&cmdbuf->cs,
192                 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
193                    A6XX_GRAS_2D_DST_BR_Y(0));
194
195      tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
196      tu_cs_emit(&cmdbuf->cs, 0x3f);
197      tu_cs_emit_wfi(&cmdbuf->cs);
198
199      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
200      tu_cs_emit(&cmdbuf->cs, 0);
201
202      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
203      tu_cs_emit(&cmdbuf->cs, 0xf180);
204
205      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
206      tu_cs_emit(&cmdbuf->cs, 0x01000000);
207
208      tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
209      tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
210
211      tu_cs_emit_wfi(&cmdbuf->cs);
212
213      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
214      tu_cs_emit(&cmdbuf->cs, 0);
215
216      src_offset += size_todo;
217      dst_offset += size_todo;
218      size -= size_todo;
219   }
220
221   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
222   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
223   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
224}
225
226static void
227tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
228                             struct tu_buffer *src_buffer,
229                             struct tu_image *dst_image,
230                             const VkBufferImageCopy *copy_info,
231                             VkFormat format,
232                             uint32_t layer,
233                             uint64_t src_va)
234{
235   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
236
237   uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
238   unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
239                        vk_format_get_blocksize(format);
240
241   unsigned src_pitch;
242   unsigned src_offset = 0;
243   if (copy_info->imageExtent.height == 1) {
244      /* Can't find this in the spec, but not having it is sort of insane? */
245      assert(src_va % vk_format_get_blocksize(format) == 0);
246
247      src_offset = (src_va & 63) / vk_format_get_blocksize(format);
248      src_va &= ~63;
249
250      src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
251   } else {
252      unsigned src_pixel_stride = copy_info->bufferRowLength
253                                  ? copy_info->bufferRowLength
254                                  : copy_info->imageExtent.width;
255      src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
256      assert(!(src_pitch & 63));
257      assert(!(src_va & 63));
258   }
259
260   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
261
262   /*
263    * Emit source:
264    */
265   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
266   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
267                              A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
268                              A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
269                              0x500000);
270   tu_cs_emit(&cmdbuf->cs,
271              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
272                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
273                    copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
274   tu_cs_emit_qw(&cmdbuf->cs, src_va);
275   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
276
277   tu_cs_emit(&cmdbuf->cs, 0x00000000);
278   tu_cs_emit(&cmdbuf->cs, 0x00000000);
279   tu_cs_emit(&cmdbuf->cs, 0x00000000);
280   tu_cs_emit(&cmdbuf->cs, 0x00000000);
281   tu_cs_emit(&cmdbuf->cs, 0x00000000);
282
283   tu_cs_emit(&cmdbuf->cs, 0x00000000);
284   tu_cs_emit(&cmdbuf->cs, 0x00000000);
285   tu_cs_emit(&cmdbuf->cs, 0x00000000);
286
287   /*
288    * Emit destination:
289    */
290   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
291   tu_cs_emit(&cmdbuf->cs,
292              A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
293                 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
294                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
295   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
296   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
297   tu_cs_emit(&cmdbuf->cs, 0x00000000);
298   tu_cs_emit(&cmdbuf->cs, 0x00000000);
299   tu_cs_emit(&cmdbuf->cs, 0x00000000);
300   tu_cs_emit(&cmdbuf->cs, 0x00000000);
301   tu_cs_emit(&cmdbuf->cs, 0x00000000);
302
303   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
304   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
305   tu_cs_emit(&cmdbuf->cs,
306              A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
307   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
308   tu_cs_emit(&cmdbuf->cs,
309              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
310
311   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
312   tu_cs_emit(&cmdbuf->cs,
313              A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
314                 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
315   tu_cs_emit(&cmdbuf->cs,
316              A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
317                                    copy_info->imageExtent.width - 1) |
318                 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
319                                       copy_info->imageExtent.height - 1));
320
321   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
322   tu_cs_emit(&cmdbuf->cs, 0x3f);
323   tu_cs_emit_wfi(&cmdbuf->cs);
324
325   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
326   tu_cs_emit(&cmdbuf->cs, 0);
327
328   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
329   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
330
331   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
332   tu_cs_emit(&cmdbuf->cs, 0x01000000);
333
334   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
335   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
336
337   tu_cs_emit_wfi(&cmdbuf->cs);
338
339   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
340   tu_cs_emit(&cmdbuf->cs, 0);
341}
342
343static void
344tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
345                        struct tu_buffer *src_buffer,
346                        struct tu_image *dst_image,
347                        const VkBufferImageCopy *copy_info)
348{
349   tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
350   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
351
352   /* general setup */
353   tu_dma_prepare(cmdbuf);
354
355   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
356
357   /* buffer copy setup */
358   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
359   tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
360
361   VkFormat format = dst_image->vk_format;
362   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
363
364   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
365
366   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
367   tu_cs_emit(&cmdbuf->cs, blit_cntl);
368
369   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
370   tu_cs_emit(&cmdbuf->cs, blit_cntl);
371
372   unsigned src_pixel_stride = copy_info->bufferRowLength
373                                  ? copy_info->bufferRowLength
374                                  : copy_info->imageExtent.width;
375   unsigned cpp = vk_format_get_blocksize(format);
376   unsigned src_pitch = src_pixel_stride * cpp;
377
378   for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
379      unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
380      uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
381
382      if ((src_pitch & 63) || (src_va & 63)) {
383         /* Do a per line copy */
384         VkBufferImageCopy line_copy_info = *copy_info;
385         line_copy_info.imageExtent.height = 1;
386         for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
387            /*
388             * if src_va is not aligned the line copy will need to adjust. Give it
389             * room to do so.
390             */
391            unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
392            line_copy_info.imageOffset.x = copy_info->imageOffset.x;
393            line_copy_info.imageExtent.width = copy_info->imageExtent.width;
394
395            for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
396               tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
397
398               line_copy_info.imageOffset.x += max_width;
399               line_copy_info.imageExtent.width -= max_width;
400            }
401
402            line_copy_info.imageOffset.y++;
403            src_va += src_pitch;
404         }
405      } else {
406         tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
407      }
408   }
409
410   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
411
412   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
413   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
414   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
415}
416
417static void
418tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
419                             struct tu_image *src_image,
420                             struct tu_buffer *dst_buffer,
421                             const VkBufferImageCopy *copy_info,
422                             VkFormat format,
423                             uint32_t layer,
424                             uint64_t dst_va)
425{
426   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
427
428   uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
429   unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
430                        vk_format_get_blocksize(format);
431
432   unsigned dst_pitch;
433   unsigned dst_offset = 0;
434   if (copy_info->imageExtent.height == 1) {
435      /* Can't find this in the spec, but not having it is sort of insane? */
436      assert(dst_va % vk_format_get_blocksize(format) == 0);
437
438      dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
439      dst_va &= ~63;
440
441      dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
442   } else {
443      unsigned dst_pixel_stride = copy_info->bufferRowLength
444                                  ? copy_info->bufferRowLength
445                                  : copy_info->imageExtent.width;
446      dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
447      assert(!(dst_pitch & 63));
448      assert(!(dst_va & 63));
449   }
450
451
452   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
453
454   /*
455    * Emit source:
456    */
457   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
458   tu_cs_emit(&cmdbuf->cs,
459              A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
460                 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
461                 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
462   tu_cs_emit(&cmdbuf->cs,
463              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
464                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
465                    src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
466   tu_cs_emit_qw(&cmdbuf->cs, src_va);
467   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
468
469   tu_cs_emit(&cmdbuf->cs, 0x00000000);
470   tu_cs_emit(&cmdbuf->cs, 0x00000000);
471   tu_cs_emit(&cmdbuf->cs, 0x00000000);
472   tu_cs_emit(&cmdbuf->cs, 0x00000000);
473   tu_cs_emit(&cmdbuf->cs, 0x00000000);
474
475   tu_cs_emit(&cmdbuf->cs, 0x00000000);
476   tu_cs_emit(&cmdbuf->cs, 0x00000000);
477   tu_cs_emit(&cmdbuf->cs, 0x00000000);
478
479   /*
480    * Emit destination:
481    */
482   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
483   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
484                              A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
485                              A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
486   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
487   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
488   tu_cs_emit(&cmdbuf->cs, 0x00000000);
489   tu_cs_emit(&cmdbuf->cs, 0x00000000);
490   tu_cs_emit(&cmdbuf->cs, 0x00000000);
491   tu_cs_emit(&cmdbuf->cs, 0x00000000);
492   tu_cs_emit(&cmdbuf->cs, 0x00000000);
493
494   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
495   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
496   tu_cs_emit(&cmdbuf->cs,
497              A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
498                                      copy_info->imageExtent.width - 1));
499   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
500   tu_cs_emit(&cmdbuf->cs,
501              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
502                                      copy_info->imageExtent.height - 1));
503
504   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
505   tu_cs_emit(&cmdbuf->cs,
506              A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
507   tu_cs_emit(&cmdbuf->cs,
508              A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
509                 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
510
511   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
512   tu_cs_emit(&cmdbuf->cs, 0x3f);
513   tu_cs_emit_wfi(&cmdbuf->cs);
514
515   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
516   tu_cs_emit(&cmdbuf->cs, 0);
517
518   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
519   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
520
521   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
522   tu_cs_emit(&cmdbuf->cs, 0x01000000);
523
524   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
525   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
526
527   tu_cs_emit_wfi(&cmdbuf->cs);
528
529   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
530   tu_cs_emit(&cmdbuf->cs, 0);
531}
532
533static void
534tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
535                        struct tu_image *src_image,
536                        struct tu_buffer *dst_buffer,
537                        const VkBufferImageCopy *copy_info)
538{
539   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
540   tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
541
542   /* general setup */
543   tu_dma_prepare(cmdbuf);
544
545   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
546
547   /* buffer copy setup */
548   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
549   tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
550
551   VkFormat format = src_image->vk_format;
552   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
553
554   unsigned dst_pixel_stride = copy_info->bufferRowLength
555                                  ? copy_info->bufferRowLength
556                                  : copy_info->imageExtent.width;
557   unsigned cpp = vk_format_get_blocksize(format);
558   unsigned dst_pitch = dst_pixel_stride * cpp;
559
560
561   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
562
563   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
564   tu_cs_emit(&cmdbuf->cs, blit_cntl);
565
566   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
567   tu_cs_emit(&cmdbuf->cs, blit_cntl);
568
569   for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
570       unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
571       uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
572
573       if ((dst_pitch & 63) || (dst_va & 63)) {
574         /* Do a per line copy */
575         VkBufferImageCopy line_copy_info = *copy_info;
576         line_copy_info.imageExtent.height = 1;
577         for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
578            /*
579             * if dst_va is not aligned the line copy will need to adjust. Give it
580             * room to do so.
581             */
582            unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
583            line_copy_info.imageOffset.x = copy_info->imageOffset.x;
584            line_copy_info.imageExtent.width = copy_info->imageExtent.width;
585
586            for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
587               tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
588
589               line_copy_info.imageOffset.x += max_width;
590               line_copy_info.imageExtent.width -= max_width;
591            }
592
593            line_copy_info.imageOffset.y++;
594            dst_va += dst_pitch;
595         }
596      } else {
597         tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
598      }
599   }
600
601   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
602
603   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
604   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
605   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
606}
607
608void
609tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
610                 VkBuffer srcBuffer,
611                 VkBuffer destBuffer,
612                 uint32_t regionCount,
613                 const VkBufferCopy *pRegions)
614{
615   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
616   TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
617   TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
618
619   for (unsigned i = 0; i < regionCount; ++i) {
620      uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
621      uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
622
623      tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
624                     dst_offset, pRegions[i].size);
625   }
626}
627
628void
629tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
630                        VkBuffer srcBuffer,
631                        VkImage destImage,
632                        VkImageLayout destImageLayout,
633                        uint32_t regionCount,
634                        const VkBufferImageCopy *pRegions)
635{
636   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
637   TU_FROM_HANDLE(tu_image, dest_image, destImage);
638   TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
639
640   for (unsigned i = 0; i < regionCount; ++i) {
641      tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
642                              pRegions + i);
643   }
644}
645
646void
647tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
648                        VkImage srcImage,
649                        VkImageLayout srcImageLayout,
650                        VkBuffer destBuffer,
651                        uint32_t regionCount,
652                        const VkBufferImageCopy *pRegions)
653{
654   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
655   TU_FROM_HANDLE(tu_image, src_image, srcImage);
656   TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
657
658   for (unsigned i = 0; i < regionCount; ++i) {
659      tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
660                              pRegions + i);
661   }
662}
663
664static void
665meta_copy_image(struct tu_cmd_buffer *cmd_buffer,
666                struct tu_image *src_image,
667                VkImageLayout src_image_layout,
668                struct tu_image *dest_image,
669                VkImageLayout dest_image_layout,
670                uint32_t regionCount,
671                const VkImageCopy *pRegions)
672{
673}
674
675void
676tu_CmdCopyImage(VkCommandBuffer commandBuffer,
677                VkImage srcImage,
678                VkImageLayout srcImageLayout,
679                VkImage destImage,
680                VkImageLayout destImageLayout,
681                uint32_t regionCount,
682                const VkImageCopy *pRegions)
683{
684   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
685   TU_FROM_HANDLE(tu_image, src_image, srcImage);
686   TU_FROM_HANDLE(tu_image, dest_image, destImage);
687
688   meta_copy_image(cmd_buffer, src_image, srcImageLayout, dest_image,
689                   destImageLayout, regionCount, pRegions);
690}
691