17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Collabora Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Derived from tu_device.c which is:
57ec681f3Smrg * Copyright © 2016 Red Hat.
67ec681f3Smrg * Copyright © 2016 Bas Nieuwenhuizen
77ec681f3Smrg * Copyright © 2015 Intel Corporation
87ec681f3Smrg *
97ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
107ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
117ec681f3Smrg * to deal in the Software without restriction, including without limitation
127ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
137ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
147ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
157ec681f3Smrg *
167ec681f3Smrg * The above copyright notice and this permission notice (including the next
177ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
187ec681f3Smrg * Software.
197ec681f3Smrg *
207ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
217ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
227ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
237ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
247ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
257ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
267ec681f3Smrg * DEALINGS IN THE SOFTWARE.
277ec681f3Smrg */
287ec681f3Smrg
297ec681f3Smrg#include "genxml/gen_macros.h"
307ec681f3Smrg
317ec681f3Smrg#include "decode.h"
327ec681f3Smrg
337ec681f3Smrg#include "panvk_private.h"
347ec681f3Smrg#include "panvk_cs.h"
357ec681f3Smrg
367ec681f3Smrgstatic void
377ec681f3Smrgpanvk_queue_submit_batch(struct panvk_queue *queue,
387ec681f3Smrg                         struct panvk_batch *batch,
397ec681f3Smrg                         uint32_t *bos, unsigned nr_bos,
407ec681f3Smrg                         uint32_t *in_fences,
417ec681f3Smrg                         unsigned nr_in_fences)
427ec681f3Smrg{
437ec681f3Smrg   const struct panvk_device *dev = queue->device;
447ec681f3Smrg   unsigned debug = dev->physical_device->instance->debug_flags;
457ec681f3Smrg   const struct panfrost_device *pdev = &dev->physical_device->pdev;
467ec681f3Smrg   int ret;
477ec681f3Smrg
487ec681f3Smrg   /* Reset the batch if it's already been issued */
497ec681f3Smrg   if (batch->issued) {
507ec681f3Smrg      util_dynarray_foreach(&batch->jobs, void *, job)
517ec681f3Smrg         memset((*job), 0, 4 * 4);
527ec681f3Smrg
537ec681f3Smrg      /* Reset the tiler before re-issuing the batch */
547ec681f3Smrg#if PAN_ARCH >= 6
557ec681f3Smrg      if (batch->tiler.descs.cpu) {
567ec681f3Smrg         memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
577ec681f3Smrg                pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
587ec681f3Smrg      }
597ec681f3Smrg#else
607ec681f3Smrg      if (batch->fb.desc.cpu) {
617ec681f3Smrg         void *tiler = pan_section_ptr(batch->fb.desc.cpu, FRAMEBUFFER, TILER);
627ec681f3Smrg         memcpy(tiler, batch->tiler.templ, pan_size(TILER_CONTEXT));
637ec681f3Smrg         /* All weights set to 0, nothing to do here */
647ec681f3Smrg         pan_section_pack(batch->fb.desc.cpu, FRAMEBUFFER, TILER_WEIGHTS, w);
657ec681f3Smrg      }
667ec681f3Smrg#endif
677ec681f3Smrg   }
687ec681f3Smrg
697ec681f3Smrg   if (batch->scoreboard.first_job) {
707ec681f3Smrg      struct drm_panfrost_submit submit = {
717ec681f3Smrg         .bo_handles = (uintptr_t)bos,
727ec681f3Smrg         .bo_handle_count = nr_bos,
737ec681f3Smrg         .in_syncs = (uintptr_t)in_fences,
747ec681f3Smrg         .in_sync_count = nr_in_fences,
757ec681f3Smrg         .out_sync = queue->sync,
767ec681f3Smrg         .jc = batch->scoreboard.first_job,
777ec681f3Smrg      };
787ec681f3Smrg
797ec681f3Smrg      ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
807ec681f3Smrg      assert(!ret);
817ec681f3Smrg
827ec681f3Smrg      if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
837ec681f3Smrg         ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
847ec681f3Smrg         assert(!ret);
857ec681f3Smrg      }
867ec681f3Smrg
877ec681f3Smrg      if (debug & PANVK_DEBUG_TRACE)
887ec681f3Smrg         GENX(pandecode_jc)(batch->scoreboard.first_job, pdev->gpu_id);
897ec681f3Smrg   }
907ec681f3Smrg
917ec681f3Smrg   if (batch->fragment_job) {
927ec681f3Smrg      struct drm_panfrost_submit submit = {
937ec681f3Smrg         .bo_handles = (uintptr_t)bos,
947ec681f3Smrg         .bo_handle_count = nr_bos,
957ec681f3Smrg         .out_sync = queue->sync,
967ec681f3Smrg         .jc = batch->fragment_job,
977ec681f3Smrg         .requirements = PANFROST_JD_REQ_FS,
987ec681f3Smrg      };
997ec681f3Smrg
1007ec681f3Smrg      if (batch->scoreboard.first_job) {
1017ec681f3Smrg         submit.in_syncs = (uintptr_t)(&queue->sync);
1027ec681f3Smrg         submit.in_sync_count = 1;
1037ec681f3Smrg      } else {
1047ec681f3Smrg         submit.in_syncs = (uintptr_t)in_fences;
1057ec681f3Smrg         submit.in_sync_count = nr_in_fences;
1067ec681f3Smrg      }
1077ec681f3Smrg
1087ec681f3Smrg      ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
1097ec681f3Smrg      assert(!ret);
1107ec681f3Smrg      if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
1117ec681f3Smrg         ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
1127ec681f3Smrg         assert(!ret);
1137ec681f3Smrg      }
1147ec681f3Smrg
1157ec681f3Smrg      if (debug & PANVK_DEBUG_TRACE)
1167ec681f3Smrg         GENX(pandecode_jc)(batch->fragment_job, pdev->gpu_id);
1177ec681f3Smrg   }
1187ec681f3Smrg
1197ec681f3Smrg   if (debug & PANVK_DEBUG_TRACE)
1207ec681f3Smrg      pandecode_next_frame();
1217ec681f3Smrg
1227ec681f3Smrg   batch->issued = true;
1237ec681f3Smrg}
1247ec681f3Smrg
1257ec681f3Smrgstatic void
1267ec681f3Smrgpanvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
1277ec681f3Smrg{
1287ec681f3Smrg   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
1297ec681f3Smrg   int ret;
1307ec681f3Smrg
1317ec681f3Smrg   struct drm_syncobj_handle handle = {
1327ec681f3Smrg      .handle = queue->sync,
1337ec681f3Smrg      .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
1347ec681f3Smrg      .fd = -1,
1357ec681f3Smrg   };
1367ec681f3Smrg
1377ec681f3Smrg   ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
1387ec681f3Smrg   assert(!ret);
1397ec681f3Smrg   assert(handle.fd >= 0);
1407ec681f3Smrg
1417ec681f3Smrg   handle.handle = syncobj;
1427ec681f3Smrg   ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
1437ec681f3Smrg   assert(!ret);
1447ec681f3Smrg
1457ec681f3Smrg   close(handle.fd);
1467ec681f3Smrg}
1477ec681f3Smrg
1487ec681f3Smrgstatic void
1497ec681f3Smrgpanvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences)
1507ec681f3Smrg{
1517ec681f3Smrg   util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
1527ec681f3Smrg      switch (op->type) {
1537ec681f3Smrg      case PANVK_EVENT_OP_SET:
1547ec681f3Smrg         /* Nothing to do yet */
1557ec681f3Smrg         break;
1567ec681f3Smrg      case PANVK_EVENT_OP_RESET:
1577ec681f3Smrg         /* Nothing to do yet */
1587ec681f3Smrg         break;
1597ec681f3Smrg      case PANVK_EVENT_OP_WAIT:
1607ec681f3Smrg         in_fences[(*nr_in_fences)++] = op->event->syncobj;
1617ec681f3Smrg         break;
1627ec681f3Smrg      default:
1637ec681f3Smrg         unreachable("bad panvk_event_op type\n");
1647ec681f3Smrg      }
1657ec681f3Smrg   }
1667ec681f3Smrg}
1677ec681f3Smrg
1687ec681f3Smrgstatic void
1697ec681f3Smrgpanvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch)
1707ec681f3Smrg{
1717ec681f3Smrg   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
1727ec681f3Smrg
1737ec681f3Smrg   util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
1747ec681f3Smrg      switch (op->type) {
1757ec681f3Smrg      case PANVK_EVENT_OP_SET: {
1767ec681f3Smrg         panvk_queue_transfer_sync(queue, op->event->syncobj);
1777ec681f3Smrg         break;
1787ec681f3Smrg      }
1797ec681f3Smrg      case PANVK_EVENT_OP_RESET: {
1807ec681f3Smrg         struct panvk_event *event = op->event;
1817ec681f3Smrg
1827ec681f3Smrg         struct drm_syncobj_array objs = {
1837ec681f3Smrg            .handles = (uint64_t) (uintptr_t) &event->syncobj,
1847ec681f3Smrg            .count_handles = 1
1857ec681f3Smrg         };
1867ec681f3Smrg
1877ec681f3Smrg         int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
1887ec681f3Smrg         assert(!ret);
1897ec681f3Smrg         break;
1907ec681f3Smrg      }
1917ec681f3Smrg      case PANVK_EVENT_OP_WAIT:
1927ec681f3Smrg         /* Nothing left to do */
1937ec681f3Smrg         break;
1947ec681f3Smrg      default:
1957ec681f3Smrg         unreachable("bad panvk_event_op type\n");
1967ec681f3Smrg      }
1977ec681f3Smrg   }
1987ec681f3Smrg}
1997ec681f3Smrg
2007ec681f3SmrgVkResult
2017ec681f3Smrgpanvk_per_arch(QueueSubmit)(VkQueue _queue,
2027ec681f3Smrg                            uint32_t submitCount,
2037ec681f3Smrg                            const VkSubmitInfo *pSubmits,
2047ec681f3Smrg                            VkFence _fence)
2057ec681f3Smrg{
2067ec681f3Smrg   VK_FROM_HANDLE(panvk_queue, queue, _queue);
2077ec681f3Smrg   VK_FROM_HANDLE(panvk_fence, fence, _fence);
2087ec681f3Smrg   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
2097ec681f3Smrg
2107ec681f3Smrg   for (uint32_t i = 0; i < submitCount; ++i) {
2117ec681f3Smrg      const VkSubmitInfo *submit = pSubmits + i;
2127ec681f3Smrg      unsigned nr_semaphores = submit->waitSemaphoreCount + 1;
2137ec681f3Smrg      uint32_t semaphores[nr_semaphores];
2147ec681f3Smrg
2157ec681f3Smrg      semaphores[0] = queue->sync;
2167ec681f3Smrg      for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) {
2177ec681f3Smrg         VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]);
2187ec681f3Smrg
2197ec681f3Smrg         semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent;
2207ec681f3Smrg      }
2217ec681f3Smrg
2227ec681f3Smrg      for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
2237ec681f3Smrg         VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j]));
2247ec681f3Smrg
2257ec681f3Smrg         list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
2267ec681f3Smrg            /* FIXME: should be done at the batch level */
2277ec681f3Smrg            unsigned nr_bos =
2287ec681f3Smrg               panvk_pool_num_bos(&cmdbuf->desc_pool) +
2297ec681f3Smrg               panvk_pool_num_bos(&cmdbuf->varying_pool) +
2307ec681f3Smrg               panvk_pool_num_bos(&cmdbuf->tls_pool) +
2317ec681f3Smrg               (batch->fb.info ? batch->fb.info->attachment_count : 0) +
2327ec681f3Smrg               (batch->blit.src ? 1 : 0) +
2337ec681f3Smrg               (batch->blit.dst ? 1 : 0) +
2347ec681f3Smrg               (batch->scoreboard.first_tiler ? 1 : 0) + 1;
2357ec681f3Smrg            unsigned bo_idx = 0;
2367ec681f3Smrg            uint32_t bos[nr_bos];
2377ec681f3Smrg
2387ec681f3Smrg            panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
2397ec681f3Smrg            bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
2407ec681f3Smrg
2417ec681f3Smrg            panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
2427ec681f3Smrg            bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
2437ec681f3Smrg
2447ec681f3Smrg            panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
2457ec681f3Smrg            bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
2467ec681f3Smrg
2477ec681f3Smrg            if (batch->fb.info) {
2487ec681f3Smrg               for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) {
2497ec681f3Smrg                  bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle;
2507ec681f3Smrg               }
2517ec681f3Smrg            }
2527ec681f3Smrg
2537ec681f3Smrg            if (batch->blit.src)
2547ec681f3Smrg               bos[bo_idx++] = batch->blit.src->gem_handle;
2557ec681f3Smrg
2567ec681f3Smrg            if (batch->blit.dst)
2577ec681f3Smrg               bos[bo_idx++] = batch->blit.dst->gem_handle;
2587ec681f3Smrg
2597ec681f3Smrg            if (batch->scoreboard.first_tiler)
2607ec681f3Smrg               bos[bo_idx++] = pdev->tiler_heap->gem_handle;
2617ec681f3Smrg
2627ec681f3Smrg            bos[bo_idx++] = pdev->sample_positions->gem_handle;
2637ec681f3Smrg            assert(bo_idx == nr_bos);
2647ec681f3Smrg
2657ec681f3Smrg            /* Merge identical BO entries. */
2667ec681f3Smrg            for (unsigned x = 0; x < nr_bos; x++) {
2677ec681f3Smrg               for (unsigned y = x + 1; y < nr_bos; ) {
2687ec681f3Smrg                  if (bos[x] == bos[y])
2697ec681f3Smrg                     bos[y] = bos[--nr_bos];
2707ec681f3Smrg                  else
2717ec681f3Smrg                     y++;
2727ec681f3Smrg               }
2737ec681f3Smrg            }
2747ec681f3Smrg
2757ec681f3Smrg            unsigned nr_in_fences = 0;
2767ec681f3Smrg            unsigned max_wait_event_syncobjs =
2777ec681f3Smrg               util_dynarray_num_elements(&batch->event_ops,
2787ec681f3Smrg                                          struct panvk_event_op);
2797ec681f3Smrg            uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
2807ec681f3Smrg            memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
2817ec681f3Smrg            nr_in_fences += nr_semaphores;
2827ec681f3Smrg
2837ec681f3Smrg            panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
2847ec681f3Smrg
2857ec681f3Smrg            panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences);
2867ec681f3Smrg
2877ec681f3Smrg            panvk_signal_event_syncobjs(queue, batch);
2887ec681f3Smrg         }
2897ec681f3Smrg      }
2907ec681f3Smrg
2917ec681f3Smrg      /* Transfer the out fence to signal semaphores */
2927ec681f3Smrg      for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) {
2937ec681f3Smrg         VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]);
2947ec681f3Smrg         panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent);
2957ec681f3Smrg      }
2967ec681f3Smrg   }
2977ec681f3Smrg
2987ec681f3Smrg   if (fence) {
2997ec681f3Smrg      /* Transfer the last out fence to the fence object */
3007ec681f3Smrg      panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent);
3017ec681f3Smrg   }
3027ec681f3Smrg
3037ec681f3Smrg   return VK_SUCCESS;
3047ec681f3Smrg}
3057ec681f3Smrg
3067ec681f3SmrgVkResult
3077ec681f3Smrgpanvk_per_arch(CreateSampler)(VkDevice _device,
3087ec681f3Smrg                              const VkSamplerCreateInfo *pCreateInfo,
3097ec681f3Smrg                              const VkAllocationCallbacks *pAllocator,
3107ec681f3Smrg                              VkSampler *pSampler)
3117ec681f3Smrg{
3127ec681f3Smrg   VK_FROM_HANDLE(panvk_device, device, _device);
3137ec681f3Smrg   struct panvk_sampler *sampler;
3147ec681f3Smrg
3157ec681f3Smrg   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3167ec681f3Smrg
3177ec681f3Smrg   sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
3187ec681f3Smrg                             VK_OBJECT_TYPE_SAMPLER);
3197ec681f3Smrg   if (!sampler)
3207ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3217ec681f3Smrg
3227ec681f3Smrg   STATIC_ASSERT(sizeof(sampler->desc) >= pan_size(SAMPLER));
3237ec681f3Smrg   panvk_per_arch(emit_sampler)(pCreateInfo, &sampler->desc);
3247ec681f3Smrg   *pSampler = panvk_sampler_to_handle(sampler);
3257ec681f3Smrg
3267ec681f3Smrg   return VK_SUCCESS;
3277ec681f3Smrg}
328