1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28#include "tu_private.h"
29
30#include "registers/adreno_pm4.xml.h"
31#include "registers/adreno_common.xml.h"
32#include "registers/a6xx.xml.h"
33
34#include "vk_format.h"
35
36#include "tu_cs.h"
37
38void
39tu_bo_list_init(struct tu_bo_list *list)
40{
41   list->count = list->capacity = 0;
42   list->bo_infos = NULL;
43}
44
45void
46tu_bo_list_destroy(struct tu_bo_list *list)
47{
48   free(list->bo_infos);
49}
50
51void
52tu_bo_list_reset(struct tu_bo_list *list)
53{
54   list->count = 0;
55}
56
57/**
58 * \a flags consists of MSM_SUBMIT_BO_FLAGS.
59 */
60static uint32_t
61tu_bo_list_add_info(struct tu_bo_list *list,
62                    const struct drm_msm_gem_submit_bo *bo_info)
63{
64   for (uint32_t i = 0; i < list->count; ++i) {
65      if (list->bo_infos[i].handle == bo_info->handle) {
66         assert(list->bo_infos[i].presumed == bo_info->presumed);
67         list->bo_infos[i].flags |= bo_info->flags;
68         return i;
69      }
70   }
71
72   /* grow list->bo_infos if needed */
73   if (list->count == list->capacity) {
74      uint32_t new_capacity = MAX2(2 * list->count, 16);
75      struct drm_msm_gem_submit_bo *new_bo_infos = realloc(
76         list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo));
77      if (!new_bo_infos)
78         return TU_BO_LIST_FAILED;
79      list->bo_infos = new_bo_infos;
80      list->capacity = new_capacity;
81   }
82
83   list->bo_infos[list->count] = *bo_info;
84   return list->count++;
85}
86
87uint32_t
88tu_bo_list_add(struct tu_bo_list *list,
89               const struct tu_bo *bo,
90               uint32_t flags)
91{
92   return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) {
93                                       .flags = flags,
94                                       .handle = bo->gem_handle,
95                                       .presumed = bo->iova,
96                                    });
97}
98
99VkResult
100tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
101{
102   for (uint32_t i = 0; i < other->count; i++) {
103      if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED)
104         return VK_ERROR_OUT_OF_HOST_MEMORY;
105   }
106
107   return VK_SUCCESS;
108}
109
110static VkResult
111tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling,
112                                    const struct tu_device *dev)
113{
114   const uint32_t gmem_size = dev->physical_device->gmem_size;
115   uint32_t offset = 0;
116
117   for (uint32_t i = 0; i < tiling->buffer_count; i++) {
118      /* 16KB-aligned */
119      offset = align(offset, 0x4000);
120
121      tiling->gmem_offsets[i] = offset;
122      offset += tiling->tile0.extent.width * tiling->tile0.extent.height *
123                tiling->buffer_cpp[i];
124   }
125
126   return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
127}
128
129static void
130tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
131                                    const struct tu_device *dev)
132{
133   const uint32_t tile_align_w = dev->physical_device->tile_align_w;
134   const uint32_t tile_align_h = dev->physical_device->tile_align_h;
135   const uint32_t max_tile_width = 1024; /* A6xx */
136
137   tiling->tile0.offset = (VkOffset2D) {
138      .x = tiling->render_area.offset.x & ~(tile_align_w - 1),
139      .y = tiling->render_area.offset.y & ~(tile_align_h - 1),
140   };
141
142   const uint32_t ra_width =
143      tiling->render_area.extent.width +
144      (tiling->render_area.offset.x - tiling->tile0.offset.x);
145   const uint32_t ra_height =
146      tiling->render_area.extent.height +
147      (tiling->render_area.offset.y - tiling->tile0.offset.y);
148
149   /* start from 1 tile */
150   tiling->tile_count = (VkExtent2D) {
151      .width = 1,
152      .height = 1,
153   };
154   tiling->tile0.extent = (VkExtent2D) {
155      .width = align(ra_width, tile_align_w),
156      .height = align(ra_height, tile_align_h),
157   };
158
159   /* do not exceed max tile width */
160   while (tiling->tile0.extent.width > max_tile_width) {
161      tiling->tile_count.width++;
162      tiling->tile0.extent.width =
163         align(ra_width / tiling->tile_count.width, tile_align_w);
164   }
165
166   /* do not exceed gmem size */
167   while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) {
168      if (tiling->tile0.extent.width > tiling->tile0.extent.height) {
169         tiling->tile_count.width++;
170         tiling->tile0.extent.width =
171            align(ra_width / tiling->tile_count.width, tile_align_w);
172      } else {
173         tiling->tile_count.height++;
174         tiling->tile0.extent.height =
175            align(ra_height / tiling->tile_count.height, tile_align_h);
176      }
177   }
178}
179
180static void
181tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
182                                    const struct tu_device *dev)
183{
184   const uint32_t max_pipe_count = 32; /* A6xx */
185
186   /* start from 1 tile per pipe */
187   tiling->pipe0 = (VkExtent2D) {
188      .width = 1,
189      .height = 1,
190   };
191   tiling->pipe_count = tiling->tile_count;
192
193   /* do not exceed max pipe count vertically */
194   while (tiling->pipe_count.height > max_pipe_count) {
195      tiling->pipe0.height += 2;
196      tiling->pipe_count.height =
197         (tiling->tile_count.height + tiling->pipe0.height - 1) /
198         tiling->pipe0.height;
199   }
200
201   /* do not exceed max pipe count */
202   while (tiling->pipe_count.width * tiling->pipe_count.height >
203          max_pipe_count) {
204      tiling->pipe0.width += 1;
205      tiling->pipe_count.width =
206         (tiling->tile_count.width + tiling->pipe0.width - 1) /
207         tiling->pipe0.width;
208   }
209}
210
211static void
212tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
213                              const struct tu_device *dev)
214{
215   const uint32_t max_pipe_count = 32; /* A6xx */
216   const uint32_t used_pipe_count =
217      tiling->pipe_count.width * tiling->pipe_count.height;
218   const VkExtent2D last_pipe = {
219      .width = tiling->tile_count.width % tiling->pipe0.width,
220      .height = tiling->tile_count.height % tiling->pipe0.height,
221   };
222
223   assert(used_pipe_count <= max_pipe_count);
224   assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
225
226   for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
227      for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
228         const uint32_t pipe_x = tiling->pipe0.width * x;
229         const uint32_t pipe_y = tiling->pipe0.height * y;
230         const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
231                                    ? last_pipe.width
232                                    : tiling->pipe0.width;
233         const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
234                                    ? last_pipe.height
235                                    : tiling->pipe0.height;
236         const uint32_t n = tiling->pipe_count.width * y + x;
237
238         tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
239                                  A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
240                                  A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
241                                  A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
242         tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
243      }
244   }
245
246   memset(tiling->pipe_config + used_pipe_count, 0,
247          sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
248}
249
250static void
251tu_tiling_config_update(struct tu_tiling_config *tiling,
252                        const struct tu_device *dev,
253                        const uint32_t *buffer_cpp,
254                        uint32_t buffer_count,
255                        const VkRect2D *render_area)
256{
257   /* see if there is any real change */
258   const bool ra_changed =
259      render_area &&
260      memcmp(&tiling->render_area, render_area, sizeof(*render_area));
261   const bool buf_changed = tiling->buffer_count != buffer_count ||
262                            memcmp(tiling->buffer_cpp, buffer_cpp,
263                                   sizeof(*buffer_cpp) * buffer_count);
264   if (!ra_changed && !buf_changed)
265      return;
266
267   if (ra_changed)
268      tiling->render_area = *render_area;
269
270   if (buf_changed) {
271      memcpy(tiling->buffer_cpp, buffer_cpp,
272             sizeof(*buffer_cpp) * buffer_count);
273      tiling->buffer_count = buffer_count;
274   }
275
276   tu_tiling_config_update_tile_layout(tiling, dev);
277   tu_tiling_config_update_pipe_layout(tiling, dev);
278   tu_tiling_config_update_pipes(tiling, dev);
279}
280
281static void
282tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
283                          const struct tu_device *dev,
284                          uint32_t tx,
285                          uint32_t ty,
286                          struct tu_tile *tile)
287{
288   /* find the pipe and the slot for tile (tx, ty) */
289   const uint32_t px = tx / tiling->pipe0.width;
290   const uint32_t py = ty / tiling->pipe0.height;
291   const uint32_t sx = tx - tiling->pipe0.width * px;
292   const uint32_t sy = ty - tiling->pipe0.height * py;
293
294   assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
295   assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
296   assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
297
298   /* convert to 1D indices */
299   tile->pipe = tiling->pipe_count.width * py + px;
300   tile->slot = tiling->pipe0.width * sy + sx;
301
302   /* get the blit area for the tile */
303   tile->begin = (VkOffset2D) {
304      .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
305      .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
306   };
307   tile->end.x =
308      (tx == tiling->tile_count.width - 1)
309         ? tiling->render_area.offset.x + tiling->render_area.extent.width
310         : tile->begin.x + tiling->tile0.extent.width;
311   tile->end.y =
312      (ty == tiling->tile_count.height - 1)
313         ? tiling->render_area.offset.y + tiling->render_area.extent.height
314         : tile->begin.y + tiling->tile0.extent.height;
315}
316
317static enum a3xx_msaa_samples
318tu6_msaa_samples(uint32_t samples)
319{
320   switch (samples) {
321   case 1:
322      return MSAA_ONE;
323   case 2:
324      return MSAA_TWO;
325   case 4:
326      return MSAA_FOUR;
327   case 8:
328      return MSAA_EIGHT;
329   default:
330      assert(!"invalid sample count");
331      return MSAA_ONE;
332   }
333}
334
335static enum a4xx_index_size
336tu6_index_size(VkIndexType type)
337{
338   switch (type) {
339   case VK_INDEX_TYPE_UINT16:
340      return INDEX4_SIZE_16_BIT;
341   case VK_INDEX_TYPE_UINT32:
342      return INDEX4_SIZE_32_BIT;
343   default:
344      unreachable("invalid VkIndexType");
345      return INDEX4_SIZE_8_BIT;
346   }
347}
348
349static void
350tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
351{
352   tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno);
353}
354
355void
356tu6_emit_event_write(struct tu_cmd_buffer *cmd,
357                     struct tu_cs *cs,
358                     enum vgt_event_type event,
359                     bool need_seqno)
360{
361   tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1);
362   tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event));
363   if (need_seqno) {
364      tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
365      tu_cs_emit(cs, ++cmd->scratch_seqno);
366   }
367}
368
369static void
370tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
371{
372   tu6_emit_event_write(cmd, cs, 0x31, false);
373}
374
375static void
376tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
377{
378   tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false);
379}
380
381static void
382tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
383{
384   if (cmd->wait_for_idle) {
385      tu_cs_emit_wfi(cs);
386      cmd->wait_for_idle = false;
387   }
388}
389
390static void
391tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
392{
393   const struct tu_subpass *subpass = cmd->state.subpass;
394
395   const uint32_t a = subpass->depth_stencil_attachment.attachment;
396   if (a == VK_ATTACHMENT_UNUSED) {
397      tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
398      tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
399      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
400      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
401      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
402      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
403      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
404
405      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
406      tu_cs_emit(cs,
407                 A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
408
409      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
410      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
411      tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
412      tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
413      tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
414      tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
415
416      tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
417      tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
418
419      return;
420   }
421
422   /* enable zs? */
423}
424
425static void
426tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
427{
428   const struct tu_framebuffer *fb = cmd->state.framebuffer;
429   const struct tu_subpass *subpass = cmd->state.subpass;
430   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
431   unsigned char mrt_comp[MAX_RTS] = { 0 };
432   unsigned srgb_cntl = 0;
433
434   uint32_t gmem_index = 0;
435   for (uint32_t i = 0; i < subpass->color_count; ++i) {
436      uint32_t a = subpass->color_attachments[i].attachment;
437      if (a == VK_ATTACHMENT_UNUSED)
438         continue;
439
440      const struct tu_image_view *iview = fb->attachments[a].attachment;
441      const struct tu_image_level *slice =
442         &iview->image->levels[iview->base_mip];
443      const enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
444      uint32_t stride = 0;
445      uint32_t offset = 0;
446
447      mrt_comp[i] = 0xf;
448
449      if (vk_format_is_srgb(iview->vk_format))
450         srgb_cntl |= (1 << i);
451
452      const struct tu_native_format *format =
453         tu6_get_native_format(iview->vk_format);
454      assert(format && format->rb >= 0);
455
456      offset = slice->offset + slice->size * iview->base_layer;
457      stride = slice->pitch * vk_format_get_blocksize(iview->vk_format);
458
459      tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
460      tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
461                        A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
462                        A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
463      tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride));
464      tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size));
465      tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset +
466                           offset); /* BASE_LO/HI */
467      tu_cs_emit(
468         cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */
469
470      tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
471      tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb));
472
473#if 0
474      /* when we support UBWC, these would be the system memory
475       * addr/pitch/etc:
476       */
477      tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4);
478      tu_cs_emit(cs, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
479      tu_cs_emit(cs, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
480      tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0));
481      tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
482#endif
483   }
484
485   tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
486   tu_cs_emit(cs, srgb_cntl);
487
488   tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1);
489   tu_cs_emit(cs, srgb_cntl);
490
491   tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1);
492   tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
493                     A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
494                     A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
495                     A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
496                     A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
497                     A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
498                     A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
499                     A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
500
501   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1);
502   tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
503                     A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
504                     A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
505                     A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
506                     A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
507                     A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
508                     A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
509                     A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7]));
510}
511
512static void
513tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
514{
515   const struct tu_subpass *subpass = cmd->state.subpass;
516   const enum a3xx_msaa_samples samples =
517      tu6_msaa_samples(subpass->max_sample_count);
518
519   tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
520   tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
521   tu_cs_emit(
522      cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
523             ((samples == MSAA_ONE) ? A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE
524                                    : 0));
525
526   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
527   tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
528   tu_cs_emit(
529      cs,
530      A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
531         ((samples == MSAA_ONE) ? A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE : 0));
532
533   tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
534   tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
535   tu_cs_emit(
536      cs,
537      A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
538         ((samples == MSAA_ONE) ? A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE : 0));
539
540   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1);
541   tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
542}
543
544static void
545tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags)
546{
547   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
548   const uint32_t bin_w = tiling->tile0.extent.width;
549   const uint32_t bin_h = tiling->tile0.extent.height;
550
551   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1);
552   tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) |
553                     A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags);
554
555   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1);
556   tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) |
557                     A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags);
558
559   /* no flag for RB_BIN_CONTROL2... */
560   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1);
561   tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) |
562                     A6XX_RB_BIN_CONTROL2_BINH(bin_h));
563}
564
565static void
566tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
567                     struct tu_cs *cs,
568                     bool binning)
569{
570   uint32_t cntl = 0;
571   cntl |= A6XX_RB_RENDER_CNTL_UNK4;
572   if (binning)
573      cntl |= A6XX_RB_RENDER_CNTL_BINNING;
574
575   tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
576   tu_cs_emit(cs, 0x2);
577   tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL);
578   tu_cs_emit(cs, cntl);
579}
580
581static void
582tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
583{
584   const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
585   const uint32_t x1 = render_area->offset.x;
586   const uint32_t y1 = render_area->offset.y;
587   const uint32_t x2 = x1 + render_area->extent.width - 1;
588   const uint32_t y2 = y1 + render_area->extent.height - 1;
589
590   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
591   tu_cs_emit(cs,
592              A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
593   tu_cs_emit(cs,
594              A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
595}
596
597static void
598tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
599                   struct tu_cs *cs,
600                   const struct tu_image_view *iview,
601                   uint32_t gmem_offset,
602                   uint32_t blit_info)
603{
604   const struct tu_image_level *slice =
605      &iview->image->levels[iview->base_mip];
606   const uint32_t offset = slice->offset + slice->size * iview->base_layer;
607   const uint32_t stride =
608      slice->pitch * vk_format_get_blocksize(iview->vk_format);
609   const enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
610   const enum a3xx_msaa_samples samples = tu6_msaa_samples(1);
611
612   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
613   tu_cs_emit(cs, blit_info);
614
615   /* tile mode? */
616   const struct tu_native_format *format =
617      tu6_get_native_format(iview->vk_format);
618   assert(format && format->rb >= 0);
619
620   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5);
621   tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
622                     A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
623                     A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
624                     A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap));
625   tu_cs_emit_qw(cs,
626                 iview->image->bo->iova + iview->image->bo_offset + offset);
627   tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride));
628   tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size));
629
630   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
631   tu_cs_emit(cs, gmem_offset);
632}
633
634static void
635tu6_emit_blit_clear(struct tu_cmd_buffer *cmd,
636                    struct tu_cs *cs,
637                    const struct tu_image_view *iview,
638                    uint32_t gmem_offset,
639                    const VkClearValue *clear_value)
640{
641   const enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
642   const enum a3xx_msaa_samples samples = tu6_msaa_samples(1);
643
644   const struct tu_native_format *format =
645      tu6_get_native_format(iview->vk_format);
646   assert(format && format->rb >= 0);
647   /* must be WZYX; other values are ignored */
648   const enum a3xx_color_swap swap = WZYX;
649
650   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
651   tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
652                     A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
653                     A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
654                     A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap));
655
656   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
657   tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
658
659   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
660   tu_cs_emit(cs, gmem_offset);
661
662   tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
663   tu_cs_emit(cs, 0);
664
665   /* pack clear_value into WZYX order */
666   uint32_t clear_vals[4] = { 0 };
667   tu_pack_clear_value(clear_value, iview->vk_format, clear_vals);
668
669   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
670   tu_cs_emit(cs, clear_vals[0]);
671   tu_cs_emit(cs, clear_vals[1]);
672   tu_cs_emit(cs, clear_vals[2]);
673   tu_cs_emit(cs, clear_vals[3]);
674}
675
676static void
677tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
678{
679   tu6_emit_marker(cmd, cs);
680   tu6_emit_event_write(cmd, cs, BLIT, false);
681   tu6_emit_marker(cmd, cs);
682}
683
684static void
685tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
686                        struct tu_cs *cs,
687                        uint32_t x1,
688                        uint32_t y1,
689                        uint32_t x2,
690                        uint32_t y2)
691{
692   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
693   tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
694                     A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
695   tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
696                     A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
697
698   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
699   tu_cs_emit(
700      cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1));
701   tu_cs_emit(
702      cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2));
703}
704
705static void
706tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
707                       struct tu_cs *cs,
708                       uint32_t x1,
709                       uint32_t y1)
710{
711   tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
712   tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
713
714   tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1);
715   tu_cs_emit(cs,
716              A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
717
718   tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1);
719   tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
720
721   tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
722   tu_cs_emit(
723      cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
724}
725
726static void
727tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
728                     struct tu_cs *cs,
729                     const struct tu_tile *tile)
730{
731   tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
732   tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(0x7));
733
734   tu6_emit_marker(cmd, cs);
735   tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
736   tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
737   tu6_emit_marker(cmd, cs);
738
739   const uint32_t x1 = tile->begin.x;
740   const uint32_t y1 = tile->begin.y;
741   const uint32_t x2 = tile->end.x - 1;
742   const uint32_t y2 = tile->end.y - 1;
743   tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
744   tu6_emit_window_offset(cmd, cs, x1, y1);
745
746   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1);
747   tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
748
749   if (false) {
750      /* hw binning? */
751   } else {
752      tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
753      tu_cs_emit(cs, 0x1);
754
755      tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
756      tu_cs_emit(cs, 0x0);
757   }
758}
759
760static void
761tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
762{
763   const struct tu_framebuffer *fb = cmd->state.framebuffer;
764   const struct tu_subpass *subpass = cmd->state.subpass;
765   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
766   const struct tu_attachment_state *attachments = cmd->state.attachments;
767
768   tu6_emit_blit_scissor(cmd, cs);
769
770   uint32_t gmem_index = 0;
771   for (uint32_t i = 0; i < subpass->color_count; ++i) {
772      const uint32_t a = subpass->color_attachments[i].attachment;
773      if (a == VK_ATTACHMENT_UNUSED)
774         continue;
775
776      const struct tu_image_view *iview = fb->attachments[a].attachment;
777      const struct tu_attachment_state *att = attachments + a;
778      if (att->pending_clear_aspects) {
779         assert(att->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
780         tu6_emit_blit_clear(cmd, cs, iview,
781                             tiling->gmem_offsets[gmem_index++],
782                             &att->clear_value);
783      } else {
784         tu6_emit_blit_info(cmd, cs, iview,
785                            tiling->gmem_offsets[gmem_index++],
786                            A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM);
787      }
788
789      tu6_emit_blit(cmd, cs);
790   }
791
792   /* load/clear zs? */
793}
794
795static void
796tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
797{
798   const struct tu_framebuffer *fb = cmd->state.framebuffer;
799   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
800
801   if (false) {
802      /* hw binning? */
803   }
804
805   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
806   tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
807                     CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
808                     CP_SET_DRAW_STATE__0_GROUP_ID(0));
809   tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
810   tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
811
812   tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
813   tu_cs_emit(cs, 0x0);
814
815   tu6_emit_marker(cmd, cs);
816   tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
817   tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
818   tu6_emit_marker(cmd, cs);
819
820   tu6_emit_blit_scissor(cmd, cs);
821
822   uint32_t gmem_index = 0;
823   for (uint32_t i = 0; i < cmd->state.subpass->color_count; ++i) {
824      uint32_t a = cmd->state.subpass->color_attachments[i].attachment;
825      if (a == VK_ATTACHMENT_UNUSED)
826         continue;
827
828      const struct tu_image_view *iview = fb->attachments[a].attachment;
829      tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[gmem_index++],
830                         0);
831      tu6_emit_blit(cmd, cs);
832   }
833}
834
835static void
836tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index)
837{
838   tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1);
839   tu_cs_emit(cs, restart_index);
840}
841
842static void
843tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
844{
845   VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
846   if (result != VK_SUCCESS) {
847      cmd->record_result = result;
848      return;
849   }
850
851   tu6_emit_cache_flush(cmd, cs);
852
853   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
854
855   tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004);
856   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
857   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8);
858   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0);
859   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f);
860   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44);
861   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000);
862   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
863   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
864
865   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0);
866   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
867   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0);
868   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410);
869   tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
870   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
871   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0);
872   tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
873   tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
874   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
875   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
876   tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001);
877   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
878   tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
879
880   tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0);
881
882   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0);
883   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0);
884   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0);
885
886   tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401);
887   tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0);
888   tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0);
889   tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0);
890   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0);
891   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0);
892   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0);
893   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0);
894   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
895   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
896   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
897   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
898
899   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00);
900   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
901
902   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1);
903   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0);
904
905   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE,
906                        A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
907
908   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0);
909   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
910   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0);
911
912   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0);
913   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0);
914
915   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
916
917   tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
918
919   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
920   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0);
921   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
922   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
923   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
924   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
925   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0);
926   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3);
927   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0);
928   tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3);
929   tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0);
930   tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
931   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0);
932   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0);
933   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0);
934   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0);
935   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0);
936   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0);
937   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0);
938   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0);
939   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
940
941   tu6_emit_marker(cmd, cs);
942
943   tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000);
944
945   tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
946
947   tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f);
948
949   /* we don't use this yet.. probably best to disable.. */
950   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
951   tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
952                     CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
953                     CP_SET_DRAW_STATE__0_GROUP_ID(0));
954   tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
955   tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
956
957   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3);
958   tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
959   tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
960   tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
961
962   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2);
963   tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
964   tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
965
966   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1);
967   tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */
968
969   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1);
970   tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */
971
972   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3);
973   tu_cs_emit(cs, 0x00000000);
974   tu_cs_emit(cs, 0x00000000);
975   tu_cs_emit(cs, 0x00000000);
976
977   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6);
978   tu_cs_emit(cs, 0x00000000);
979   tu_cs_emit(cs, 0x00000000);
980   tu_cs_emit(cs, 0x00000000);
981   tu_cs_emit(cs, 0x00000000);
982   tu_cs_emit(cs, 0x00000000);
983   tu_cs_emit(cs, 0x00000000);
984
985   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6);
986   tu_cs_emit(cs, 0x00000000);
987   tu_cs_emit(cs, 0x00000000);
988   tu_cs_emit(cs, 0x00000000);
989   tu_cs_emit(cs, 0x00000000);
990   tu_cs_emit(cs, 0x00000000);
991   tu_cs_emit(cs, 0x00000000);
992
993   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3);
994   tu_cs_emit(cs, 0x00000000);
995   tu_cs_emit(cs, 0x00000000);
996   tu_cs_emit(cs, 0x00000000);
997
998   tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1);
999   tu_cs_emit(cs, 0x00000000);
1000
1001   tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1);
1002   tu_cs_emit(cs, 0x00000000);
1003
1004   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1005   tu_cs_emit(cs, 0x00000000);
1006
1007   tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1);
1008   tu_cs_emit(cs, 0x00000000);
1009
1010   tu_cs_sanity_check(cs);
1011}
1012
1013static void
1014tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1015{
1016   VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
1017   if (result != VK_SUCCESS) {
1018      cmd->record_result = result;
1019      return;
1020   }
1021
1022   tu6_emit_lrz_flush(cmd, cs);
1023
1024   /* lrz clear? */
1025
1026   tu6_emit_cache_flush(cmd, cs);
1027
1028   tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1029   tu_cs_emit(cs, 0x0);
1030
1031   /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
1032   tu6_emit_wfi(cmd, cs);
1033   tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1034   tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */
1035
1036   tu6_emit_zs(cmd, cs);
1037   tu6_emit_mrt(cmd, cs);
1038   tu6_emit_msaa(cmd, cs);
1039
1040   if (false) {
1041      /* hw binning? */
1042   } else {
1043      tu6_emit_bin_size(cmd, cs, 0x6000000);
1044      /* no draws */
1045   }
1046
1047   tu6_emit_render_cntl(cmd, cs, false);
1048
1049   tu_cs_sanity_check(cs);
1050}
1051
1052static void
1053tu6_render_tile(struct tu_cmd_buffer *cmd,
1054                struct tu_cs *cs,
1055                const struct tu_tile *tile)
1056{
1057   const uint32_t render_tile_space = 64 + tu_cs_get_call_size(&cmd->draw_cs);
1058   VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space);
1059   if (result != VK_SUCCESS) {
1060      cmd->record_result = result;
1061      return;
1062   }
1063
1064   tu6_emit_tile_select(cmd, cs, tile);
1065   tu_cs_emit_ib(cs, &cmd->state.tile_load_ib);
1066
1067   tu_cs_emit_call(cs, &cmd->draw_cs);
1068   cmd->wait_for_idle = true;
1069
1070   tu_cs_emit_ib(cs, &cmd->state.tile_store_ib);
1071
1072   tu_cs_sanity_check(cs);
1073}
1074
1075static void
1076tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1077{
1078   VkResult result = tu_cs_reserve_space(cmd->device, cs, 16);
1079   if (result != VK_SUCCESS) {
1080      cmd->record_result = result;
1081      return;
1082   }
1083
1084   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1085   tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
1086
1087   tu6_emit_lrz_flush(cmd, cs);
1088
1089   tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1090
1091   tu_cs_sanity_check(cs);
1092}
1093
1094static void
1095tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
1096{
1097   const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1098
1099   tu6_render_begin(cmd, &cmd->cs);
1100
1101   for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
1102      for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
1103         struct tu_tile tile;
1104         tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
1105         tu6_render_tile(cmd, &cmd->cs, &tile);
1106      }
1107   }
1108
1109   tu6_render_end(cmd, &cmd->cs);
1110}
1111
1112static void
1113tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd)
1114{
1115   const uint32_t tile_load_space = 16 + 32 * MAX_RTS;
1116   const struct tu_subpass *subpass = cmd->state.subpass;
1117   struct tu_attachment_state *attachments = cmd->state.attachments;
1118   struct tu_cs sub_cs;
1119
1120   VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1121                                            tile_load_space, &sub_cs);
1122   if (result != VK_SUCCESS) {
1123      cmd->record_result = result;
1124      return;
1125   }
1126
1127   /* emit to tile-load sub_cs */
1128   tu6_emit_tile_load(cmd, &sub_cs);
1129
1130   cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1131
1132   for (uint32_t i = 0; i < subpass->color_count; ++i) {
1133      const uint32_t a = subpass->color_attachments[i].attachment;
1134      if (a != VK_ATTACHMENT_UNUSED)
1135         attachments[a].pending_clear_aspects = 0;
1136   }
1137}
1138
1139static void
1140tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
1141{
1142   const uint32_t tile_store_space = 32 + 32 * MAX_RTS;
1143   struct tu_cs sub_cs;
1144
1145   VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1146                                            tile_store_space, &sub_cs);
1147   if (result != VK_SUCCESS) {
1148      cmd->record_result = result;
1149      return;
1150   }
1151
1152   /* emit to tile-store sub_cs */
1153   tu6_emit_tile_store(cmd, &sub_cs);
1154
1155   cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1156}
1157
1158static void
1159tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
1160                            const VkRect2D *render_area)
1161{
1162   const struct tu_device *dev = cmd->device;
1163   const struct tu_render_pass *pass = cmd->state.pass;
1164   const struct tu_subpass *subpass = cmd->state.subpass;
1165   struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1166
1167   uint32_t buffer_cpp[MAX_RTS + 2];
1168   uint32_t buffer_count = 0;
1169
1170   for (uint32_t i = 0; i < subpass->color_count; ++i) {
1171      const uint32_t a = subpass->color_attachments[i].attachment;
1172      if (a == VK_ATTACHMENT_UNUSED)
1173         continue;
1174
1175      const struct tu_render_pass_attachment *att = &pass->attachments[a];
1176      buffer_cpp[buffer_count++] =
1177         vk_format_get_blocksize(att->format) * att->samples;
1178   }
1179
1180   if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
1181      const uint32_t a = subpass->depth_stencil_attachment.attachment;
1182      const struct tu_render_pass_attachment *att = &pass->attachments[a];
1183
1184      /* TODO */
1185      assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT);
1186
1187      buffer_cpp[buffer_count++] =
1188         vk_format_get_blocksize(att->format) * att->samples;
1189   }
1190
1191   tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count,
1192                           render_area);
1193}
1194
1195const struct tu_dynamic_state default_dynamic_state = {
1196   .viewport =
1197     {
1198       .count = 0,
1199     },
1200   .scissor =
1201     {
1202       .count = 0,
1203     },
1204   .line_width = 1.0f,
1205   .depth_bias =
1206     {
1207       .bias = 0.0f,
1208       .clamp = 0.0f,
1209       .slope = 0.0f,
1210     },
1211   .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
1212   .depth_bounds =
1213     {
1214       .min = 0.0f,
1215       .max = 1.0f,
1216     },
1217   .stencil_compare_mask =
1218     {
1219       .front = ~0u,
1220       .back = ~0u,
1221     },
1222   .stencil_write_mask =
1223     {
1224       .front = ~0u,
1225       .back = ~0u,
1226     },
1227   .stencil_reference =
1228     {
1229       .front = 0u,
1230       .back = 0u,
1231     },
1232};
1233
1234static void UNUSED /* FINISHME */
1235tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer,
1236                      const struct tu_dynamic_state *src)
1237{
1238   struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic;
1239   uint32_t copy_mask = src->mask;
1240   uint32_t dest_mask = 0;
1241
1242   tu_use_args(cmd_buffer); /* FINISHME */
1243
1244   /* Make sure to copy the number of viewports/scissors because they can
1245    * only be specified at pipeline creation time.
1246    */
1247   dest->viewport.count = src->viewport.count;
1248   dest->scissor.count = src->scissor.count;
1249   dest->discard_rectangle.count = src->discard_rectangle.count;
1250
1251   if (copy_mask & TU_DYNAMIC_VIEWPORT) {
1252      if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
1253                 src->viewport.count * sizeof(VkViewport))) {
1254         typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
1255                      src->viewport.count);
1256         dest_mask |= TU_DYNAMIC_VIEWPORT;
1257      }
1258   }
1259
1260   if (copy_mask & TU_DYNAMIC_SCISSOR) {
1261      if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
1262                 src->scissor.count * sizeof(VkRect2D))) {
1263         typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
1264                      src->scissor.count);
1265         dest_mask |= TU_DYNAMIC_SCISSOR;
1266      }
1267   }
1268
1269   if (copy_mask & TU_DYNAMIC_LINE_WIDTH) {
1270      if (dest->line_width != src->line_width) {
1271         dest->line_width = src->line_width;
1272         dest_mask |= TU_DYNAMIC_LINE_WIDTH;
1273      }
1274   }
1275
1276   if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) {
1277      if (memcmp(&dest->depth_bias, &src->depth_bias,
1278                 sizeof(src->depth_bias))) {
1279         dest->depth_bias = src->depth_bias;
1280         dest_mask |= TU_DYNAMIC_DEPTH_BIAS;
1281      }
1282   }
1283
1284   if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) {
1285      if (memcmp(&dest->blend_constants, &src->blend_constants,
1286                 sizeof(src->blend_constants))) {
1287         typed_memcpy(dest->blend_constants, src->blend_constants, 4);
1288         dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS;
1289      }
1290   }
1291
1292   if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) {
1293      if (memcmp(&dest->depth_bounds, &src->depth_bounds,
1294                 sizeof(src->depth_bounds))) {
1295         dest->depth_bounds = src->depth_bounds;
1296         dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS;
1297      }
1298   }
1299
1300   if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) {
1301      if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
1302                 sizeof(src->stencil_compare_mask))) {
1303         dest->stencil_compare_mask = src->stencil_compare_mask;
1304         dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK;
1305      }
1306   }
1307
1308   if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) {
1309      if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
1310                 sizeof(src->stencil_write_mask))) {
1311         dest->stencil_write_mask = src->stencil_write_mask;
1312         dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK;
1313      }
1314   }
1315
1316   if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) {
1317      if (memcmp(&dest->stencil_reference, &src->stencil_reference,
1318                 sizeof(src->stencil_reference))) {
1319         dest->stencil_reference = src->stencil_reference;
1320         dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE;
1321      }
1322   }
1323
1324   if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) {
1325      if (memcmp(&dest->discard_rectangle.rectangles,
1326                 &src->discard_rectangle.rectangles,
1327                 src->discard_rectangle.count * sizeof(VkRect2D))) {
1328         typed_memcpy(dest->discard_rectangle.rectangles,
1329                      src->discard_rectangle.rectangles,
1330                      src->discard_rectangle.count);
1331         dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE;
1332      }
1333   }
1334}
1335
1336static VkResult
1337tu_create_cmd_buffer(struct tu_device *device,
1338                     struct tu_cmd_pool *pool,
1339                     VkCommandBufferLevel level,
1340                     VkCommandBuffer *pCommandBuffer)
1341{
1342   struct tu_cmd_buffer *cmd_buffer;
1343   cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
1344                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1345   if (cmd_buffer == NULL)
1346      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1347
1348   cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1349   cmd_buffer->device = device;
1350   cmd_buffer->pool = pool;
1351   cmd_buffer->level = level;
1352
1353   if (pool) {
1354      list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1355      cmd_buffer->queue_family_index = pool->queue_family_index;
1356
1357   } else {
1358      /* Init the pool_link so we can safely call list_del when we destroy
1359       * the command buffer
1360       */
1361      list_inithead(&cmd_buffer->pool_link);
1362      cmd_buffer->queue_family_index = TU_QUEUE_GENERAL;
1363   }
1364
1365   tu_bo_list_init(&cmd_buffer->bo_list);
1366   tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096);
1367   tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096);
1368   tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024);
1369
1370   *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer);
1371
1372   list_inithead(&cmd_buffer->upload.list);
1373
1374   cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG(
1375      cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6);
1376
1377   VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000);
1378   if (result != VK_SUCCESS)
1379      return result;
1380
1381   return VK_SUCCESS;
1382}
1383
1384static void
1385tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
1386{
1387   tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1388
1389   list_del(&cmd_buffer->pool_link);
1390
1391   for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++)
1392      free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
1393
1394   tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs);
1395   tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs);
1396   tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs);
1397
1398   tu_bo_list_destroy(&cmd_buffer->bo_list);
1399   vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
1400}
1401
1402static VkResult
1403tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
1404{
1405   cmd_buffer->wait_for_idle = true;
1406
1407   cmd_buffer->record_result = VK_SUCCESS;
1408
1409   tu_bo_list_reset(&cmd_buffer->bo_list);
1410   tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs);
1411   tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs);
1412   tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs);
1413
1414   for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
1415      cmd_buffer->descriptors[i].dirty = 0;
1416      cmd_buffer->descriptors[i].valid = 0;
1417      cmd_buffer->descriptors[i].push_dirty = false;
1418   }
1419
1420   cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
1421
1422   return cmd_buffer->record_result;
1423}
1424
1425static VkResult
1426tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer,
1427                               const VkRenderPassBeginInfo *info)
1428{
1429   struct tu_cmd_state *state = &cmd_buffer->state;
1430   const struct tu_framebuffer *fb = state->framebuffer;
1431   const struct tu_render_pass *pass = state->pass;
1432
1433   for (uint32_t i = 0; i < fb->attachment_count; ++i) {
1434      const struct tu_image_view *iview = fb->attachments[i].attachment;
1435      tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo,
1436                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
1437   }
1438
1439   if (pass->attachment_count == 0) {
1440      state->attachments = NULL;
1441      return VK_SUCCESS;
1442   }
1443
1444   state->attachments =
1445      vk_alloc(&cmd_buffer->pool->alloc,
1446               pass->attachment_count * sizeof(state->attachments[0]), 8,
1447               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1448   if (state->attachments == NULL) {
1449      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1450      return cmd_buffer->record_result;
1451   }
1452
1453   for (uint32_t i = 0; i < pass->attachment_count; ++i) {
1454      const struct tu_render_pass_attachment *att = &pass->attachments[i];
1455      VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
1456      VkImageAspectFlags clear_aspects = 0;
1457
1458      if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
1459         /* color attachment */
1460         if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1461            clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
1462         }
1463      } else {
1464         /* depthstencil attachment */
1465         if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1466             att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1467            clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1468            if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1469                att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
1470               clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1471         }
1472         if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1473             att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1474            clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1475         }
1476      }
1477
1478      state->attachments[i].pending_clear_aspects = clear_aspects;
1479      state->attachments[i].cleared_views = 0;
1480      if (clear_aspects && info) {
1481         assert(info->clearValueCount > i);
1482         state->attachments[i].clear_value = info->pClearValues[i];
1483      }
1484
1485      state->attachments[i].current_layout = att->initial_layout;
1486   }
1487
1488   return VK_SUCCESS;
1489}
1490
1491VkResult
1492tu_AllocateCommandBuffers(VkDevice _device,
1493                          const VkCommandBufferAllocateInfo *pAllocateInfo,
1494                          VkCommandBuffer *pCommandBuffers)
1495{
1496   TU_FROM_HANDLE(tu_device, device, _device);
1497   TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool);
1498
1499   VkResult result = VK_SUCCESS;
1500   uint32_t i;
1501
1502   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1503
1504      if (!list_empty(&pool->free_cmd_buffers)) {
1505         struct tu_cmd_buffer *cmd_buffer = list_first_entry(
1506            &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link);
1507
1508         list_del(&cmd_buffer->pool_link);
1509         list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1510
1511         result = tu_reset_cmd_buffer(cmd_buffer);
1512         cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1513         cmd_buffer->level = pAllocateInfo->level;
1514
1515         pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer);
1516      } else {
1517         result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level,
1518                                       &pCommandBuffers[i]);
1519      }
1520      if (result != VK_SUCCESS)
1521         break;
1522   }
1523
1524   if (result != VK_SUCCESS) {
1525      tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i,
1526                            pCommandBuffers);
1527
1528      /* From the Vulkan 1.0.66 spec:
1529       *
1530       * "vkAllocateCommandBuffers can be used to create multiple
1531       *  command buffers. If the creation of any of those command
1532       *  buffers fails, the implementation must destroy all
1533       *  successfully created command buffer objects from this
1534       *  command, set all entries of the pCommandBuffers array to
1535       *  NULL and return the error."
1536       */
1537      memset(pCommandBuffers, 0,
1538             sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
1539   }
1540
1541   return result;
1542}
1543
1544void
1545tu_FreeCommandBuffers(VkDevice device,
1546                      VkCommandPool commandPool,
1547                      uint32_t commandBufferCount,
1548                      const VkCommandBuffer *pCommandBuffers)
1549{
1550   for (uint32_t i = 0; i < commandBufferCount; i++) {
1551      TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
1552
1553      if (cmd_buffer) {
1554         if (cmd_buffer->pool) {
1555            list_del(&cmd_buffer->pool_link);
1556            list_addtail(&cmd_buffer->pool_link,
1557                         &cmd_buffer->pool->free_cmd_buffers);
1558         } else
1559            tu_cmd_buffer_destroy(cmd_buffer);
1560      }
1561   }
1562}
1563
1564VkResult
1565tu_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1566                      VkCommandBufferResetFlags flags)
1567{
1568   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1569   return tu_reset_cmd_buffer(cmd_buffer);
1570}
1571
1572VkResult
1573tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
1574                      const VkCommandBufferBeginInfo *pBeginInfo)
1575{
1576   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1577   VkResult result = VK_SUCCESS;
1578
1579   if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) {
1580      /* If the command buffer has already been resetted with
1581       * vkResetCommandBuffer, no need to do it again.
1582       */
1583      result = tu_reset_cmd_buffer(cmd_buffer);
1584      if (result != VK_SUCCESS)
1585         return result;
1586   }
1587
1588   memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
1589   cmd_buffer->usage_flags = pBeginInfo->flags;
1590
1591   tu_cs_begin(&cmd_buffer->cs);
1592
1593   cmd_buffer->marker_seqno = 0;
1594   cmd_buffer->scratch_seqno = 0;
1595
1596   /* setup initial configuration into command buffer */
1597   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1598      switch (cmd_buffer->queue_family_index) {
1599      case TU_QUEUE_GENERAL:
1600         tu6_init_hw(cmd_buffer, &cmd_buffer->cs);
1601         break;
1602      default:
1603         break;
1604      }
1605   }
1606
1607   cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING;
1608
1609   return VK_SUCCESS;
1610}
1611
1612void
1613tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1614                        uint32_t firstBinding,
1615                        uint32_t bindingCount,
1616                        const VkBuffer *pBuffers,
1617                        const VkDeviceSize *pOffsets)
1618{
1619   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1620
1621   assert(firstBinding + bindingCount <= MAX_VBS);
1622
1623   for (uint32_t i = 0; i < bindingCount; i++) {
1624      cmd->state.vb.buffers[firstBinding + i] =
1625         tu_buffer_from_handle(pBuffers[i]);
1626      cmd->state.vb.offsets[firstBinding + i] = pOffsets[i];
1627   }
1628
1629   /* VB states depend on VkPipelineVertexInputStateCreateInfo */
1630   cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
1631}
1632
1633void
1634tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
1635                      VkBuffer buffer,
1636                      VkDeviceSize offset,
1637                      VkIndexType indexType)
1638{
1639   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1640   TU_FROM_HANDLE(tu_buffer, buf, buffer);
1641
1642   /* initialize/update the restart index */
1643   if (!cmd->state.index_buffer || cmd->state.index_type != indexType) {
1644      struct tu_cs *draw_cs = &cmd->draw_cs;
1645      VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2);
1646      if (result != VK_SUCCESS) {
1647         cmd->record_result = result;
1648         return;
1649      }
1650
1651      tu6_emit_restart_index(
1652         draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff);
1653
1654      tu_cs_sanity_check(draw_cs);
1655   }
1656
1657   /* track the BO */
1658   if (cmd->state.index_buffer != buf)
1659      tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
1660
1661   cmd->state.index_buffer = buf;
1662   cmd->state.index_offset = offset;
1663   cmd->state.index_type = indexType;
1664}
1665
1666void
1667tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
1668                         VkPipelineBindPoint pipelineBindPoint,
1669                         VkPipelineLayout _layout,
1670                         uint32_t firstSet,
1671                         uint32_t descriptorSetCount,
1672                         const VkDescriptorSet *pDescriptorSets,
1673                         uint32_t dynamicOffsetCount,
1674                         const uint32_t *pDynamicOffsets)
1675{
1676}
1677
1678void
1679tu_CmdPushConstants(VkCommandBuffer commandBuffer,
1680                    VkPipelineLayout layout,
1681                    VkShaderStageFlags stageFlags,
1682                    uint32_t offset,
1683                    uint32_t size,
1684                    const void *pValues)
1685{
1686}
1687
1688VkResult
1689tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
1690{
1691   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1692
1693   if (cmd_buffer->scratch_seqno) {
1694      tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo,
1695                     MSM_SUBMIT_BO_WRITE);
1696   }
1697
1698   for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
1699      tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
1700                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
1701   }
1702
1703   for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) {
1704      tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i],
1705                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
1706   }
1707
1708   tu_cs_end(&cmd_buffer->cs);
1709
1710   assert(!cmd_buffer->state.attachments);
1711
1712   cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE;
1713
1714   return cmd_buffer->record_result;
1715}
1716
1717void
1718tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
1719                   VkPipelineBindPoint pipelineBindPoint,
1720                   VkPipeline _pipeline)
1721{
1722   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1723   TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
1724
1725   switch (pipelineBindPoint) {
1726   case VK_PIPELINE_BIND_POINT_GRAPHICS:
1727      cmd->state.pipeline = pipeline;
1728      cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE;
1729      break;
1730   case VK_PIPELINE_BIND_POINT_COMPUTE:
1731      tu_finishme("binding compute pipeline");
1732      break;
1733   default:
1734      unreachable("unrecognized pipeline bind point");
1735      break;
1736   }
1737}
1738
1739void
1740tu_CmdSetViewport(VkCommandBuffer commandBuffer,
1741                  uint32_t firstViewport,
1742                  uint32_t viewportCount,
1743                  const VkViewport *pViewports)
1744{
1745   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1746   struct tu_cs *draw_cs = &cmd->draw_cs;
1747
1748   VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12);
1749   if (result != VK_SUCCESS) {
1750      cmd->record_result = result;
1751      return;
1752   }
1753
1754   assert(firstViewport == 0 && viewportCount == 1);
1755   tu6_emit_viewport(draw_cs, pViewports);
1756
1757   tu_cs_sanity_check(draw_cs);
1758}
1759
1760void
1761tu_CmdSetScissor(VkCommandBuffer commandBuffer,
1762                 uint32_t firstScissor,
1763                 uint32_t scissorCount,
1764                 const VkRect2D *pScissors)
1765{
1766   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1767   struct tu_cs *draw_cs = &cmd->draw_cs;
1768
1769   VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3);
1770   if (result != VK_SUCCESS) {
1771      cmd->record_result = result;
1772      return;
1773   }
1774
1775   assert(firstScissor == 0 && scissorCount == 1);
1776   tu6_emit_scissor(draw_cs, pScissors);
1777
1778   tu_cs_sanity_check(draw_cs);
1779}
1780
1781void
1782tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1783{
1784   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1785
1786   cmd->state.dynamic.line_width = lineWidth;
1787
1788   /* line width depends on VkPipelineRasterizationStateCreateInfo */
1789   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
1790}
1791
1792void
1793tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
1794                   float depthBiasConstantFactor,
1795                   float depthBiasClamp,
1796                   float depthBiasSlopeFactor)
1797{
1798   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1799   struct tu_cs *draw_cs = &cmd->draw_cs;
1800
1801   VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4);
1802   if (result != VK_SUCCESS) {
1803      cmd->record_result = result;
1804      return;
1805   }
1806
1807   tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
1808                       depthBiasSlopeFactor);
1809
1810   tu_cs_sanity_check(draw_cs);
1811}
1812
1813void
1814tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
1815                        const float blendConstants[4])
1816{
1817   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1818   struct tu_cs *draw_cs = &cmd->draw_cs;
1819
1820   VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5);
1821   if (result != VK_SUCCESS) {
1822      cmd->record_result = result;
1823      return;
1824   }
1825
1826   tu6_emit_blend_constants(draw_cs, blendConstants);
1827
1828   tu_cs_sanity_check(draw_cs);
1829}
1830
1831void
1832tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
1833                     float minDepthBounds,
1834                     float maxDepthBounds)
1835{
1836}
1837
1838void
1839tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
1840                            VkStencilFaceFlags faceMask,
1841                            uint32_t compareMask)
1842{
1843   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1844
1845   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1846      cmd->state.dynamic.stencil_compare_mask.front = compareMask;
1847   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1848      cmd->state.dynamic.stencil_compare_mask.back = compareMask;
1849
1850   /* the front/back compare masks must be updated together */
1851   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
1852}
1853
1854void
1855tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
1856                          VkStencilFaceFlags faceMask,
1857                          uint32_t writeMask)
1858{
1859   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1860
1861   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1862      cmd->state.dynamic.stencil_write_mask.front = writeMask;
1863   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1864      cmd->state.dynamic.stencil_write_mask.back = writeMask;
1865
1866   /* the front/back write masks must be updated together */
1867   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
1868}
1869
1870void
1871tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
1872                          VkStencilFaceFlags faceMask,
1873                          uint32_t reference)
1874{
1875   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1876
1877   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1878      cmd->state.dynamic.stencil_reference.front = reference;
1879   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1880      cmd->state.dynamic.stencil_reference.back = reference;
1881
1882   /* the front/back references must be updated together */
1883   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
1884}
1885
1886void
1887tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
1888                      uint32_t commandBufferCount,
1889                      const VkCommandBuffer *pCmdBuffers)
1890{
1891}
1892
1893VkResult
1894tu_CreateCommandPool(VkDevice _device,
1895                     const VkCommandPoolCreateInfo *pCreateInfo,
1896                     const VkAllocationCallbacks *pAllocator,
1897                     VkCommandPool *pCmdPool)
1898{
1899   TU_FROM_HANDLE(tu_device, device, _device);
1900   struct tu_cmd_pool *pool;
1901
1902   pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
1903                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1904   if (pool == NULL)
1905      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1906
1907   if (pAllocator)
1908      pool->alloc = *pAllocator;
1909   else
1910      pool->alloc = device->alloc;
1911
1912   list_inithead(&pool->cmd_buffers);
1913   list_inithead(&pool->free_cmd_buffers);
1914
1915   pool->queue_family_index = pCreateInfo->queueFamilyIndex;
1916
1917   *pCmdPool = tu_cmd_pool_to_handle(pool);
1918
1919   return VK_SUCCESS;
1920}
1921
1922void
1923tu_DestroyCommandPool(VkDevice _device,
1924                      VkCommandPool commandPool,
1925                      const VkAllocationCallbacks *pAllocator)
1926{
1927   TU_FROM_HANDLE(tu_device, device, _device);
1928   TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
1929
1930   if (!pool)
1931      return;
1932
1933   list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
1934                            &pool->cmd_buffers, pool_link)
1935   {
1936      tu_cmd_buffer_destroy(cmd_buffer);
1937   }
1938
1939   list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
1940                            &pool->free_cmd_buffers, pool_link)
1941   {
1942      tu_cmd_buffer_destroy(cmd_buffer);
1943   }
1944
1945   vk_free2(&device->alloc, pAllocator, pool);
1946}
1947
1948VkResult
1949tu_ResetCommandPool(VkDevice device,
1950                    VkCommandPool commandPool,
1951                    VkCommandPoolResetFlags flags)
1952{
1953   TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
1954   VkResult result;
1955
1956   list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers,
1957                       pool_link)
1958   {
1959      result = tu_reset_cmd_buffer(cmd_buffer);
1960      if (result != VK_SUCCESS)
1961         return result;
1962   }
1963
1964   return VK_SUCCESS;
1965}
1966
1967void
1968tu_TrimCommandPool(VkDevice device,
1969                   VkCommandPool commandPool,
1970                   VkCommandPoolTrimFlags flags)
1971{
1972   TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
1973
1974   if (!pool)
1975      return;
1976
1977   list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
1978                            &pool->free_cmd_buffers, pool_link)
1979   {
1980      tu_cmd_buffer_destroy(cmd_buffer);
1981   }
1982}
1983
1984void
1985tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
1986                      const VkRenderPassBeginInfo *pRenderPassBegin,
1987                      VkSubpassContents contents)
1988{
1989   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1990   TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass);
1991   TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
1992   VkResult result;
1993
1994   cmd_buffer->state.pass = pass;
1995   cmd_buffer->state.subpass = pass->subpasses;
1996   cmd_buffer->state.framebuffer = framebuffer;
1997
1998   result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
1999   if (result != VK_SUCCESS)
2000      return;
2001
2002   tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea);
2003   tu_cmd_prepare_tile_load_ib(cmd_buffer);
2004   tu_cmd_prepare_tile_store_ib(cmd_buffer);
2005
2006   /* draw_cs should contain entries only for this render pass */
2007   assert(!cmd_buffer->draw_cs.entry_count);
2008   tu_cs_begin(&cmd_buffer->draw_cs);
2009}
2010
2011void
2012tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
2013                          const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2014                          const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2015{
2016   tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo,
2017                         pSubpassBeginInfo->contents);
2018}
2019
2020void
2021tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
2022{
2023   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2024
2025   tu_cmd_render_tiles(cmd);
2026
2027   cmd->state.subpass++;
2028
2029   tu_cmd_update_tiling_config(cmd, NULL);
2030   tu_cmd_prepare_tile_load_ib(cmd);
2031   tu_cmd_prepare_tile_store_ib(cmd);
2032}
2033
2034void
2035tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer,
2036                      const VkSubpassBeginInfoKHR *pSubpassBeginInfo,
2037                      const VkSubpassEndInfoKHR *pSubpassEndInfo)
2038{
2039   tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents);
2040}
2041
2042struct tu_draw_info
2043{
2044   /**
2045    * Number of vertices.
2046    */
2047   uint32_t count;
2048
2049   /**
2050    * Index of the first vertex.
2051    */
2052   int32_t vertex_offset;
2053
2054   /**
2055    * First instance id.
2056    */
2057   uint32_t first_instance;
2058
2059   /**
2060    * Number of instances.
2061    */
2062   uint32_t instance_count;
2063
2064   /**
2065    * First index (indexed draws only).
2066    */
2067   uint32_t first_index;
2068
2069   /**
2070    * Whether it's an indexed draw.
2071    */
2072   bool indexed;
2073
2074   /**
2075    * Indirect draw parameters resource.
2076    */
2077   struct tu_buffer *indirect;
2078   uint64_t indirect_offset;
2079   uint32_t stride;
2080
2081   /**
2082    * Draw count parameters resource.
2083    */
2084   struct tu_buffer *count_buffer;
2085   uint64_t count_buffer_offset;
2086};
2087
2088enum tu_draw_state_group_id
2089{
2090   TU_DRAW_STATE_PROGRAM,
2091   TU_DRAW_STATE_PROGRAM_BINNING,
2092   TU_DRAW_STATE_VI,
2093   TU_DRAW_STATE_VI_BINNING,
2094   TU_DRAW_STATE_VP,
2095   TU_DRAW_STATE_RAST,
2096   TU_DRAW_STATE_DS,
2097   TU_DRAW_STATE_BLEND,
2098
2099   TU_DRAW_STATE_COUNT,
2100};
2101
2102struct tu_draw_state_group
2103{
2104   enum tu_draw_state_group_id id;
2105   uint32_t enable_mask;
2106   const struct tu_cs_entry *ib;
2107};
2108
2109static void
2110tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
2111                     struct tu_cs *cs,
2112                     const struct tu_draw_info *draw)
2113{
2114   const struct tu_pipeline *pipeline = cmd->state.pipeline;
2115   const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
2116   struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
2117   uint32_t draw_state_group_count = 0;
2118
2119   VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
2120   if (result != VK_SUCCESS) {
2121      cmd->record_result = result;
2122      return;
2123   }
2124
2125   /* TODO lrz */
2126
2127   uint32_t pc_primitive_cntl = 0;
2128   if (pipeline->ia.primitive_restart && draw->indexed)
2129      pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART;
2130
2131   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
2132   tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0);
2133   tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
2134
2135   tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
2136   tu_cs_emit(cs, pc_primitive_cntl);
2137
2138   if (cmd->state.dirty &
2139          (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
2140       (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
2141      tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
2142                            dynamic->line_width);
2143   }
2144
2145   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
2146       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
2147      tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
2148                                    dynamic->stencil_compare_mask.back);
2149   }
2150
2151   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
2152       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
2153      tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
2154                                  dynamic->stencil_write_mask.back);
2155   }
2156
2157   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
2158       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
2159      tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
2160                                 dynamic->stencil_reference.back);
2161   }
2162
2163   if (cmd->state.dirty &
2164       (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
2165      for (uint32_t i = 0; i < pipeline->vi.count; i++) {
2166         const uint32_t binding = pipeline->vi.bindings[i];
2167         const uint32_t stride = pipeline->vi.strides[i];
2168         const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
2169         const VkDeviceSize offset = buf->bo_offset +
2170                                     cmd->state.vb.offsets[binding] +
2171                                     pipeline->vi.offsets[i];
2172         const VkDeviceSize size =
2173            offset < buf->bo->size ? buf->bo->size - offset : 0;
2174
2175         tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4);
2176         tu_cs_emit_qw(cs, buf->bo->iova + offset);
2177         tu_cs_emit(cs, size);
2178         tu_cs_emit(cs, stride);
2179      }
2180   }
2181
2182   /* TODO shader consts */
2183
2184   if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
2185      draw_state_groups[draw_state_group_count++] =
2186         (struct tu_draw_state_group) {
2187            .id = TU_DRAW_STATE_PROGRAM,
2188            .enable_mask = 0x6,
2189            .ib = &pipeline->program.state_ib,
2190         };
2191      draw_state_groups[draw_state_group_count++] =
2192         (struct tu_draw_state_group) {
2193            .id = TU_DRAW_STATE_PROGRAM_BINNING,
2194            .enable_mask = 0x1,
2195            .ib = &pipeline->program.binning_state_ib,
2196         };
2197      draw_state_groups[draw_state_group_count++] =
2198         (struct tu_draw_state_group) {
2199            .id = TU_DRAW_STATE_VI,
2200            .enable_mask = 0x6,
2201            .ib = &pipeline->vi.state_ib,
2202         };
2203      draw_state_groups[draw_state_group_count++] =
2204         (struct tu_draw_state_group) {
2205            .id = TU_DRAW_STATE_VI_BINNING,
2206            .enable_mask = 0x1,
2207            .ib = &pipeline->vi.binning_state_ib,
2208         };
2209      draw_state_groups[draw_state_group_count++] =
2210         (struct tu_draw_state_group) {
2211            .id = TU_DRAW_STATE_VP,
2212            .enable_mask = 0x7,
2213            .ib = &pipeline->vp.state_ib,
2214         };
2215      draw_state_groups[draw_state_group_count++] =
2216         (struct tu_draw_state_group) {
2217            .id = TU_DRAW_STATE_RAST,
2218            .enable_mask = 0x7,
2219            .ib = &pipeline->rast.state_ib,
2220         };
2221      draw_state_groups[draw_state_group_count++] =
2222         (struct tu_draw_state_group) {
2223            .id = TU_DRAW_STATE_DS,
2224            .enable_mask = 0x7,
2225            .ib = &pipeline->ds.state_ib,
2226         };
2227      draw_state_groups[draw_state_group_count++] =
2228         (struct tu_draw_state_group) {
2229            .id = TU_DRAW_STATE_BLEND,
2230            .enable_mask = 0x7,
2231            .ib = &pipeline->blend.state_ib,
2232         };
2233   }
2234
2235   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
2236   for (uint32_t i = 0; i < draw_state_group_count; i++) {
2237      const struct tu_draw_state_group *group = &draw_state_groups[i];
2238
2239      uint32_t cp_set_draw_state =
2240         CP_SET_DRAW_STATE__0_COUNT(group->ib->size / 4) |
2241         CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) |
2242         CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
2243      uint64_t iova;
2244      if (group->ib->size) {
2245         iova = group->ib->bo->iova + group->ib->offset;
2246      } else {
2247         cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
2248         iova = 0;
2249      }
2250
2251      tu_cs_emit(cs, cp_set_draw_state);
2252      tu_cs_emit_qw(cs, iova);
2253   }
2254
2255   tu_cs_sanity_check(cs);
2256
2257   /* track BOs */
2258   if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
2259      tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo,
2260                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2261      for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) {
2262         tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i],
2263                        MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2264      }
2265   }
2266   if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
2267      for (uint32_t i = 0; i < MAX_VBS; i++) {
2268         const struct tu_buffer *buf = cmd->state.vb.buffers[i];
2269         if (buf)
2270            tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
2271      }
2272   }
2273
2274   cmd->state.dirty = 0;
2275}
2276
2277static void
2278tu6_emit_draw_direct(struct tu_cmd_buffer *cmd,
2279                     struct tu_cs *cs,
2280                     const struct tu_draw_info *draw)
2281{
2282
2283   const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype;
2284
2285   tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2);
2286   tu_cs_emit(cs, draw->vertex_offset);
2287   tu_cs_emit(cs, draw->first_instance);
2288
2289   /* TODO hw binning */
2290   if (draw->indexed) {
2291      const enum a4xx_index_size index_size =
2292         tu6_index_size(cmd->state.index_type);
2293      const uint32_t index_bytes =
2294         (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2;
2295      const struct tu_buffer *buf = cmd->state.index_buffer;
2296      const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset +
2297                                  index_bytes * draw->first_index;
2298      const uint32_t size = index_bytes * draw->count;
2299
2300      const uint32_t cp_draw_indx =
2301         CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
2302         CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) |
2303         CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
2304         CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000;
2305
2306      tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
2307      tu_cs_emit(cs, cp_draw_indx);
2308      tu_cs_emit(cs, draw->instance_count);
2309      tu_cs_emit(cs, draw->count);
2310      tu_cs_emit(cs, 0x0); /* XXX */
2311      tu_cs_emit_qw(cs, buf->bo->iova + offset);
2312      tu_cs_emit(cs, size);
2313   } else {
2314      const uint32_t cp_draw_indx =
2315         CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
2316         CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
2317         CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000;
2318
2319      tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
2320      tu_cs_emit(cs, cp_draw_indx);
2321      tu_cs_emit(cs, draw->instance_count);
2322      tu_cs_emit(cs, draw->count);
2323   }
2324}
2325
2326static void
2327tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw)
2328{
2329   struct tu_cs *cs = &cmd->draw_cs;
2330
2331   tu6_bind_draw_states(cmd, cs, draw);
2332
2333   VkResult result = tu_cs_reserve_space(cmd->device, cs, 32);
2334   if (result != VK_SUCCESS) {
2335      cmd->record_result = result;
2336      return;
2337   }
2338
2339   if (draw->indirect) {
2340      tu_finishme("indirect draw");
2341      return;
2342   }
2343
2344   /* TODO tu6_emit_marker should pick different regs depending on cs */
2345   tu6_emit_marker(cmd, cs);
2346   tu6_emit_draw_direct(cmd, cs, draw);
2347   tu6_emit_marker(cmd, cs);
2348
2349   cmd->wait_for_idle = true;
2350
2351   tu_cs_sanity_check(cs);
2352}
2353
2354void
2355tu_CmdDraw(VkCommandBuffer commandBuffer,
2356           uint32_t vertexCount,
2357           uint32_t instanceCount,
2358           uint32_t firstVertex,
2359           uint32_t firstInstance)
2360{
2361   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2362   struct tu_draw_info info = {};
2363
2364   info.count = vertexCount;
2365   info.instance_count = instanceCount;
2366   info.first_instance = firstInstance;
2367   info.vertex_offset = firstVertex;
2368
2369   tu_draw(cmd_buffer, &info);
2370}
2371
2372void
2373tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
2374                  uint32_t indexCount,
2375                  uint32_t instanceCount,
2376                  uint32_t firstIndex,
2377                  int32_t vertexOffset,
2378                  uint32_t firstInstance)
2379{
2380   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2381   struct tu_draw_info info = {};
2382
2383   info.indexed = true;
2384   info.count = indexCount;
2385   info.instance_count = instanceCount;
2386   info.first_index = firstIndex;
2387   info.vertex_offset = vertexOffset;
2388   info.first_instance = firstInstance;
2389
2390   tu_draw(cmd_buffer, &info);
2391}
2392
2393void
2394tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
2395                   VkBuffer _buffer,
2396                   VkDeviceSize offset,
2397                   uint32_t drawCount,
2398                   uint32_t stride)
2399{
2400   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2401   TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2402   struct tu_draw_info info = {};
2403
2404   info.count = drawCount;
2405   info.indirect = buffer;
2406   info.indirect_offset = offset;
2407   info.stride = stride;
2408
2409   tu_draw(cmd_buffer, &info);
2410}
2411
2412void
2413tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
2414                          VkBuffer _buffer,
2415                          VkDeviceSize offset,
2416                          uint32_t drawCount,
2417                          uint32_t stride)
2418{
2419   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2420   TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2421   struct tu_draw_info info = {};
2422
2423   info.indexed = true;
2424   info.count = drawCount;
2425   info.indirect = buffer;
2426   info.indirect_offset = offset;
2427   info.stride = stride;
2428
2429   tu_draw(cmd_buffer, &info);
2430}
2431
2432struct tu_dispatch_info
2433{
2434   /**
2435    * Determine the layout of the grid (in block units) to be used.
2436    */
2437   uint32_t blocks[3];
2438
2439   /**
2440    * A starting offset for the grid. If unaligned is set, the offset
2441    * must still be aligned.
2442    */
2443   uint32_t offsets[3];
2444   /**
2445    * Whether it's an unaligned compute dispatch.
2446    */
2447   bool unaligned;
2448
2449   /**
2450    * Indirect compute parameters resource.
2451    */
2452   struct tu_buffer *indirect;
2453   uint64_t indirect_offset;
2454};
2455
2456static void
2457tu_dispatch(struct tu_cmd_buffer *cmd_buffer,
2458            const struct tu_dispatch_info *info)
2459{
2460}
2461
2462void
2463tu_CmdDispatchBase(VkCommandBuffer commandBuffer,
2464                   uint32_t base_x,
2465                   uint32_t base_y,
2466                   uint32_t base_z,
2467                   uint32_t x,
2468                   uint32_t y,
2469                   uint32_t z)
2470{
2471   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2472   struct tu_dispatch_info info = {};
2473
2474   info.blocks[0] = x;
2475   info.blocks[1] = y;
2476   info.blocks[2] = z;
2477
2478   info.offsets[0] = base_x;
2479   info.offsets[1] = base_y;
2480   info.offsets[2] = base_z;
2481   tu_dispatch(cmd_buffer, &info);
2482}
2483
2484void
2485tu_CmdDispatch(VkCommandBuffer commandBuffer,
2486               uint32_t x,
2487               uint32_t y,
2488               uint32_t z)
2489{
2490   tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
2491}
2492
2493void
2494tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
2495                       VkBuffer _buffer,
2496                       VkDeviceSize offset)
2497{
2498   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2499   TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2500   struct tu_dispatch_info info = {};
2501
2502   info.indirect = buffer;
2503   info.indirect_offset = offset;
2504
2505   tu_dispatch(cmd_buffer, &info);
2506}
2507
2508void
2509tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
2510{
2511   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2512
2513   tu_cs_end(&cmd_buffer->draw_cs);
2514
2515   tu_cmd_render_tiles(cmd_buffer);
2516
2517   /* discard draw_cs entries now that the tiles are rendered */
2518   tu_cs_discard_entries(&cmd_buffer->draw_cs);
2519
2520   vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
2521   cmd_buffer->state.attachments = NULL;
2522
2523   cmd_buffer->state.pass = NULL;
2524   cmd_buffer->state.subpass = NULL;
2525   cmd_buffer->state.framebuffer = NULL;
2526}
2527
2528void
2529tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,
2530                        const VkSubpassEndInfoKHR *pSubpassEndInfo)
2531{
2532   tu_CmdEndRenderPass(commandBuffer);
2533}
2534
2535struct tu_barrier_info
2536{
2537   uint32_t eventCount;
2538   const VkEvent *pEvents;
2539   VkPipelineStageFlags srcStageMask;
2540};
2541
2542static void
2543tu_barrier(struct tu_cmd_buffer *cmd_buffer,
2544           uint32_t memoryBarrierCount,
2545           const VkMemoryBarrier *pMemoryBarriers,
2546           uint32_t bufferMemoryBarrierCount,
2547           const VkBufferMemoryBarrier *pBufferMemoryBarriers,
2548           uint32_t imageMemoryBarrierCount,
2549           const VkImageMemoryBarrier *pImageMemoryBarriers,
2550           const struct tu_barrier_info *info)
2551{
2552}
2553
2554void
2555tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
2556                      VkPipelineStageFlags srcStageMask,
2557                      VkPipelineStageFlags destStageMask,
2558                      VkBool32 byRegion,
2559                      uint32_t memoryBarrierCount,
2560                      const VkMemoryBarrier *pMemoryBarriers,
2561                      uint32_t bufferMemoryBarrierCount,
2562                      const VkBufferMemoryBarrier *pBufferMemoryBarriers,
2563                      uint32_t imageMemoryBarrierCount,
2564                      const VkImageMemoryBarrier *pImageMemoryBarriers)
2565{
2566   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2567   struct tu_barrier_info info;
2568
2569   info.eventCount = 0;
2570   info.pEvents = NULL;
2571   info.srcStageMask = srcStageMask;
2572
2573   tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
2574              bufferMemoryBarrierCount, pBufferMemoryBarriers,
2575              imageMemoryBarrierCount, pImageMemoryBarriers, &info);
2576}
2577
2578static void
2579write_event(struct tu_cmd_buffer *cmd_buffer,
2580            struct tu_event *event,
2581            VkPipelineStageFlags stageMask,
2582            unsigned value)
2583{
2584}
2585
2586void
2587tu_CmdSetEvent(VkCommandBuffer commandBuffer,
2588               VkEvent _event,
2589               VkPipelineStageFlags stageMask)
2590{
2591   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2592   TU_FROM_HANDLE(tu_event, event, _event);
2593
2594   write_event(cmd_buffer, event, stageMask, 1);
2595}
2596
2597void
2598tu_CmdResetEvent(VkCommandBuffer commandBuffer,
2599                 VkEvent _event,
2600                 VkPipelineStageFlags stageMask)
2601{
2602   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2603   TU_FROM_HANDLE(tu_event, event, _event);
2604
2605   write_event(cmd_buffer, event, stageMask, 0);
2606}
2607
2608void
2609tu_CmdWaitEvents(VkCommandBuffer commandBuffer,
2610                 uint32_t eventCount,
2611                 const VkEvent *pEvents,
2612                 VkPipelineStageFlags srcStageMask,
2613                 VkPipelineStageFlags dstStageMask,
2614                 uint32_t memoryBarrierCount,
2615                 const VkMemoryBarrier *pMemoryBarriers,
2616                 uint32_t bufferMemoryBarrierCount,
2617                 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
2618                 uint32_t imageMemoryBarrierCount,
2619                 const VkImageMemoryBarrier *pImageMemoryBarriers)
2620{
2621   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2622   struct tu_barrier_info info;
2623
2624   info.eventCount = eventCount;
2625   info.pEvents = pEvents;
2626   info.srcStageMask = 0;
2627
2628   tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
2629              bufferMemoryBarrierCount, pBufferMemoryBarriers,
2630              imageMemoryBarrierCount, pImageMemoryBarriers, &info);
2631}
2632
2633void
2634tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
2635{
2636   /* No-op */
2637}
2638