17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2019 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Based in part on v3d driver which is:
57ec681f3Smrg *
67ec681f3Smrg * Copyright © 2014-2017 Broadcom
77ec681f3Smrg *
87ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
97ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
107ec681f3Smrg * to deal in the Software without restriction, including without limitation
117ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
127ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
137ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
147ec681f3Smrg *
157ec681f3Smrg * The above copyright notice and this permission notice (including the next
167ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
177ec681f3Smrg * Software.
187ec681f3Smrg *
197ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
207ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
217ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
227ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
237ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
247ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
257ec681f3Smrg * IN THE SOFTWARE.
267ec681f3Smrg */
277ec681f3Smrg
287ec681f3Smrg#include "v3dv_private.h"
297ec681f3Smrg#include "vk_format_info.h"
307ec681f3Smrg
317ec681f3Smrg/* The only version specific structure that we need is
327ec681f3Smrg * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
337ec681f3Smrg * previous V3D versions and we don't expect that to change, so for now let's
347ec681f3Smrg * just hardcode the V3D version here.
357ec681f3Smrg */
367ec681f3Smrg#define V3D_VERSION 41
377ec681f3Smrg#include "broadcom/common/v3d_macros.h"
387ec681f3Smrg#include "broadcom/cle/v3dx_pack.h"
397ec681f3Smrg
407ec681f3Smrg/* Our Vulkan resource indices represent indices in descriptor maps which
417ec681f3Smrg * include all shader stages, so we need to size the arrays below
427ec681f3Smrg * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
437ec681f3Smrg */
447ec681f3Smrg#define MAX_STAGES 3
457ec681f3Smrg
467ec681f3Smrg#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
477ec681f3Smrgstruct texture_bo_list {
487ec681f3Smrg   struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
497ec681f3Smrg};
507ec681f3Smrg
517ec681f3Smrg/* This tracks state BOs for both textures and samplers, so we
527ec681f3Smrg * multiply by 2.
537ec681f3Smrg */
547ec681f3Smrg#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
557ec681f3Smrgstruct state_bo_list {
567ec681f3Smrg   uint32_t count;
577ec681f3Smrg   struct v3dv_bo *states[MAX_TOTAL_STATES];
587ec681f3Smrg};
597ec681f3Smrg
607ec681f3Smrg#define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES)
617ec681f3Smrg#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
627ec681f3Smrgstruct buffer_bo_list {
637ec681f3Smrg   struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
647ec681f3Smrg   struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
657ec681f3Smrg};
667ec681f3Smrg
677ec681f3Smrgstatic bool
687ec681f3Smrgstate_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
697ec681f3Smrg{
707ec681f3Smrg   for (int i = 0; i < list->count; i++) {
717ec681f3Smrg      if (list->states[i] == bo)
727ec681f3Smrg         return true;
737ec681f3Smrg   }
747ec681f3Smrg   return false;
757ec681f3Smrg}
767ec681f3Smrg
777ec681f3Smrg/*
787ec681f3Smrg * This method checks if the ubo used for push constants is needed to be
797ec681f3Smrg * updated or not.
807ec681f3Smrg *
817ec681f3Smrg * push contants ubo is only used for push constants accessed by a non-const
827ec681f3Smrg * index.
837ec681f3Smrg *
847ec681f3Smrg * FIXME: right now for this cases we are uploading the full
857ec681f3Smrg * push_constants_data. An improvement would be to upload only the data that
867ec681f3Smrg * we need to rely on a UBO.
877ec681f3Smrg */
887ec681f3Smrgstatic void
897ec681f3Smrgcheck_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
907ec681f3Smrg                         struct v3dv_pipeline *pipeline)
917ec681f3Smrg{
927ec681f3Smrg   if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS) ||
937ec681f3Smrg       pipeline->layout->push_constant_size == 0)
947ec681f3Smrg      return;
957ec681f3Smrg
967ec681f3Smrg   if (cmd_buffer->push_constants_resource.bo == NULL) {
977ec681f3Smrg      cmd_buffer->push_constants_resource.bo =
987ec681f3Smrg         v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE,
997ec681f3Smrg                       "push constants", true);
1007ec681f3Smrg
1017ec681f3Smrg      if (!cmd_buffer->push_constants_resource.bo) {
1027ec681f3Smrg         fprintf(stderr, "Failed to allocate memory for push constants\n");
1037ec681f3Smrg         abort();
1047ec681f3Smrg      }
1057ec681f3Smrg
1067ec681f3Smrg      bool ok = v3dv_bo_map(cmd_buffer->device,
1077ec681f3Smrg                            cmd_buffer->push_constants_resource.bo,
1087ec681f3Smrg                            MAX_PUSH_CONSTANTS_SIZE);
1097ec681f3Smrg      if (!ok) {
1107ec681f3Smrg         fprintf(stderr, "failed to map push constants buffer\n");
1117ec681f3Smrg         abort();
1127ec681f3Smrg      }
1137ec681f3Smrg   } else {
1147ec681f3Smrg      if (cmd_buffer->push_constants_resource.offset + MAX_PUSH_CONSTANTS_SIZE <=
1157ec681f3Smrg          cmd_buffer->push_constants_resource.bo->size) {
1167ec681f3Smrg         cmd_buffer->push_constants_resource.offset += MAX_PUSH_CONSTANTS_SIZE;
1177ec681f3Smrg      } else {
1187ec681f3Smrg         /* FIXME: we got out of space for push descriptors. Should we create
1197ec681f3Smrg          * a new bo? This could be easier with a uploader
1207ec681f3Smrg          */
1217ec681f3Smrg      }
1227ec681f3Smrg   }
1237ec681f3Smrg
1247ec681f3Smrg   memcpy(cmd_buffer->push_constants_resource.bo->map +
1257ec681f3Smrg          cmd_buffer->push_constants_resource.offset,
1267ec681f3Smrg          cmd_buffer->push_constants_data,
1277ec681f3Smrg          MAX_PUSH_CONSTANTS_SIZE);
1287ec681f3Smrg
1297ec681f3Smrg   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS;
1307ec681f3Smrg}
1317ec681f3Smrg
1327ec681f3Smrg/** V3D 4.x TMU configuration parameter 0 (texture) */
1337ec681f3Smrgstatic void
1347ec681f3Smrgwrite_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
1357ec681f3Smrg             struct v3dv_pipeline *pipeline,
1367ec681f3Smrg             enum broadcom_shader_stage stage,
1377ec681f3Smrg             struct v3dv_cl_out **uniforms,
1387ec681f3Smrg             uint32_t data,
1397ec681f3Smrg             struct texture_bo_list *tex_bos,
1407ec681f3Smrg             struct state_bo_list *state_bos)
1417ec681f3Smrg{
1427ec681f3Smrg   uint32_t texture_idx = v3d_unit_data_get_unit(data);
1437ec681f3Smrg
1447ec681f3Smrg   struct v3dv_descriptor_state *descriptor_state =
1457ec681f3Smrg      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
1467ec681f3Smrg
1477ec681f3Smrg   /* We need to ensure that the texture bo is added to the job */
1487ec681f3Smrg   struct v3dv_bo *texture_bo =
1497ec681f3Smrg      v3dv_descriptor_map_get_texture_bo(descriptor_state,
1507ec681f3Smrg                                         &pipeline->shared_data->maps[stage]->texture_map,
1517ec681f3Smrg                                         pipeline->layout, texture_idx);
1527ec681f3Smrg   assert(texture_bo);
1537ec681f3Smrg   assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
1547ec681f3Smrg   tex_bos->tex[texture_idx] = texture_bo;
1557ec681f3Smrg
1567ec681f3Smrg   struct v3dv_cl_reloc state_reloc =
1577ec681f3Smrg      v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
1587ec681f3Smrg                                                   &pipeline->shared_data->maps[stage]->texture_map,
1597ec681f3Smrg                                                   pipeline->layout,
1607ec681f3Smrg                                                   texture_idx);
1617ec681f3Smrg
1627ec681f3Smrg   cl_aligned_u32(uniforms, state_reloc.bo->offset +
1637ec681f3Smrg                            state_reloc.offset +
1647ec681f3Smrg                            v3d_unit_data_get_offset(data));
1657ec681f3Smrg
1667ec681f3Smrg   /* Texture and Sampler states are typically suballocated, so they are
1677ec681f3Smrg    * usually the same BO: only flag them once to avoid trying to add them
1687ec681f3Smrg    * multiple times to the job later.
1697ec681f3Smrg    */
1707ec681f3Smrg   if (!state_bo_in_list(state_bos, state_reloc.bo)) {
1717ec681f3Smrg      assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
1727ec681f3Smrg      state_bos->states[state_bos->count++] = state_reloc.bo;
1737ec681f3Smrg   }
1747ec681f3Smrg}
1757ec681f3Smrg
1767ec681f3Smrg/** V3D 4.x TMU configuration parameter 1 (sampler) */
1777ec681f3Smrgstatic void
1787ec681f3Smrgwrite_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
1797ec681f3Smrg             struct v3dv_pipeline *pipeline,
1807ec681f3Smrg             enum broadcom_shader_stage stage,
1817ec681f3Smrg             struct v3dv_cl_out **uniforms,
1827ec681f3Smrg             uint32_t data,
1837ec681f3Smrg             struct state_bo_list *state_bos)
1847ec681f3Smrg{
1857ec681f3Smrg   uint32_t sampler_idx = v3d_unit_data_get_unit(data);
1867ec681f3Smrg   struct v3dv_descriptor_state *descriptor_state =
1877ec681f3Smrg      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
1887ec681f3Smrg
1897ec681f3Smrg   assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
1907ec681f3Smrg          sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
1917ec681f3Smrg
1927ec681f3Smrg   struct v3dv_cl_reloc sampler_state_reloc =
1937ec681f3Smrg      v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
1947ec681f3Smrg                                            &pipeline->shared_data->maps[stage]->sampler_map,
1957ec681f3Smrg                                            pipeline->layout, sampler_idx);
1967ec681f3Smrg
1977ec681f3Smrg   const struct v3dv_sampler *sampler =
1987ec681f3Smrg      v3dv_descriptor_map_get_sampler(descriptor_state,
1997ec681f3Smrg                                      &pipeline->shared_data->maps[stage]->sampler_map,
2007ec681f3Smrg                                      pipeline->layout, sampler_idx);
2017ec681f3Smrg   assert(sampler);
2027ec681f3Smrg
2037ec681f3Smrg   /* Set unnormalized coordinates flag from sampler object */
2047ec681f3Smrg   uint32_t p1_packed = v3d_unit_data_get_offset(data);
2057ec681f3Smrg   if (sampler->unnormalized_coordinates) {
2067ec681f3Smrg      struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
2077ec681f3Smrg      V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
2087ec681f3Smrg      p1_unpacked.unnormalized_coordinates = true;
2097ec681f3Smrg      V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
2107ec681f3Smrg                                        &p1_unpacked);
2117ec681f3Smrg   }
2127ec681f3Smrg
2137ec681f3Smrg   cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
2147ec681f3Smrg                            sampler_state_reloc.offset +
2157ec681f3Smrg                            p1_packed);
2167ec681f3Smrg
2177ec681f3Smrg   /* Texture and Sampler states are typically suballocated, so they are
2187ec681f3Smrg    * usually the same BO: only flag them once to avoid trying to add them
2197ec681f3Smrg    * multiple times to the job later.
2207ec681f3Smrg    */
2217ec681f3Smrg   if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
2227ec681f3Smrg      assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
2237ec681f3Smrg      state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
2247ec681f3Smrg   }
2257ec681f3Smrg}
2267ec681f3Smrg
2277ec681f3Smrgstatic void
2287ec681f3Smrgwrite_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2297ec681f3Smrg                        struct v3dv_pipeline *pipeline,
2307ec681f3Smrg                        enum broadcom_shader_stage stage,
2317ec681f3Smrg                        struct v3dv_cl_out **uniforms,
2327ec681f3Smrg                        enum quniform_contents content,
2337ec681f3Smrg                        uint32_t data,
2347ec681f3Smrg                        struct buffer_bo_list *buffer_bos)
2357ec681f3Smrg{
2367ec681f3Smrg   struct v3dv_descriptor_state *descriptor_state =
2377ec681f3Smrg      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
2387ec681f3Smrg
2397ec681f3Smrg   struct v3dv_descriptor_map *map =
2407ec681f3Smrg      content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
2417ec681f3Smrg      &pipeline->shared_data->maps[stage]->ubo_map :
2427ec681f3Smrg      &pipeline->shared_data->maps[stage]->ssbo_map;
2437ec681f3Smrg
2447ec681f3Smrg   uint32_t offset =
2457ec681f3Smrg      content == QUNIFORM_UBO_ADDR ?
2467ec681f3Smrg      v3d_unit_data_get_offset(data) :
2477ec681f3Smrg      0;
2487ec681f3Smrg
2497ec681f3Smrg   uint32_t dynamic_offset = 0;
2507ec681f3Smrg
2517ec681f3Smrg   /* For ubos, index is shifted, as 0 is reserved for push constants.
2527ec681f3Smrg    */
2537ec681f3Smrg   if (content == QUNIFORM_UBO_ADDR &&
2547ec681f3Smrg       v3d_unit_data_get_unit(data) == 0) {
2557ec681f3Smrg      /* This calls is to ensure that the push_constant_ubo is
2567ec681f3Smrg       * updated. It already take into account it is should do the
2577ec681f3Smrg       * update or not
2587ec681f3Smrg       */
2597ec681f3Smrg      check_push_constants_ubo(cmd_buffer, pipeline);
2607ec681f3Smrg
2617ec681f3Smrg      struct v3dv_cl_reloc *resource =
2627ec681f3Smrg         &cmd_buffer->push_constants_resource;
2637ec681f3Smrg      assert(resource->bo);
2647ec681f3Smrg
2657ec681f3Smrg      cl_aligned_u32(uniforms, resource->bo->offset +
2667ec681f3Smrg                               resource->offset +
2677ec681f3Smrg                               offset + dynamic_offset);
2687ec681f3Smrg      buffer_bos->ubo[0] = resource->bo;
2697ec681f3Smrg   } else {
2707ec681f3Smrg      uint32_t index =
2717ec681f3Smrg         content == QUNIFORM_UBO_ADDR ?
2727ec681f3Smrg         v3d_unit_data_get_unit(data) - 1 :
2737ec681f3Smrg         data;
2747ec681f3Smrg
2757ec681f3Smrg      struct v3dv_descriptor *descriptor =
2767ec681f3Smrg         v3dv_descriptor_map_get_descriptor(descriptor_state, map,
2777ec681f3Smrg                                            pipeline->layout,
2787ec681f3Smrg                                            index, &dynamic_offset);
2797ec681f3Smrg      assert(descriptor);
2807ec681f3Smrg      assert(descriptor->buffer);
2817ec681f3Smrg      assert(descriptor->buffer->mem);
2827ec681f3Smrg      assert(descriptor->buffer->mem->bo);
2837ec681f3Smrg
2847ec681f3Smrg      if (content == QUNIFORM_GET_SSBO_SIZE ||
2857ec681f3Smrg          content == QUNIFORM_GET_UBO_SIZE) {
2867ec681f3Smrg         cl_aligned_u32(uniforms, descriptor->range);
2877ec681f3Smrg      } else {
2887ec681f3Smrg         cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset +
2897ec681f3Smrg                                  descriptor->buffer->mem_offset +
2907ec681f3Smrg                                  descriptor->offset +
2917ec681f3Smrg                                  offset + dynamic_offset);
2927ec681f3Smrg
2937ec681f3Smrg         if (content == QUNIFORM_UBO_ADDR) {
2947ec681f3Smrg            assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS);
2957ec681f3Smrg            buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo;
2967ec681f3Smrg         } else {
2977ec681f3Smrg            assert(index < MAX_TOTAL_STORAGE_BUFFERS);
2987ec681f3Smrg            buffer_bos->ssbo[index] = descriptor->buffer->mem->bo;
2997ec681f3Smrg         }
3007ec681f3Smrg      }
3017ec681f3Smrg   }
3027ec681f3Smrg}
3037ec681f3Smrg
3047ec681f3Smrgstatic uint32_t
3057ec681f3Smrgget_texture_size_from_image_view(struct v3dv_image_view *image_view,
3067ec681f3Smrg                                 enum quniform_contents contents,
3077ec681f3Smrg                                 uint32_t data)
3087ec681f3Smrg{
3097ec681f3Smrg   switch(contents) {
3107ec681f3Smrg   case QUNIFORM_IMAGE_WIDTH:
3117ec681f3Smrg   case QUNIFORM_TEXTURE_WIDTH:
3127ec681f3Smrg      /* We don't u_minify the values, as we are using the image_view
3137ec681f3Smrg       * extents
3147ec681f3Smrg       */
3157ec681f3Smrg      return image_view->vk.extent.width;
3167ec681f3Smrg   case QUNIFORM_IMAGE_HEIGHT:
3177ec681f3Smrg   case QUNIFORM_TEXTURE_HEIGHT:
3187ec681f3Smrg      return image_view->vk.extent.height;
3197ec681f3Smrg   case QUNIFORM_IMAGE_DEPTH:
3207ec681f3Smrg   case QUNIFORM_TEXTURE_DEPTH:
3217ec681f3Smrg      return image_view->vk.extent.depth;
3227ec681f3Smrg   case QUNIFORM_IMAGE_ARRAY_SIZE:
3237ec681f3Smrg   case QUNIFORM_TEXTURE_ARRAY_SIZE:
3247ec681f3Smrg      if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
3257ec681f3Smrg         return image_view->vk.layer_count;
3267ec681f3Smrg      } else {
3277ec681f3Smrg         assert(image_view->vk.layer_count % 6 == 0);
3287ec681f3Smrg         return image_view->vk.layer_count / 6;
3297ec681f3Smrg      }
3307ec681f3Smrg   case QUNIFORM_TEXTURE_LEVELS:
3317ec681f3Smrg      return image_view->vk.level_count;
3327ec681f3Smrg   case QUNIFORM_TEXTURE_SAMPLES:
3337ec681f3Smrg      assert(image_view->vk.image);
3347ec681f3Smrg      return image_view->vk.image->samples;
3357ec681f3Smrg   default:
3367ec681f3Smrg      unreachable("Bad texture size field");
3377ec681f3Smrg   }
3387ec681f3Smrg}
3397ec681f3Smrg
3407ec681f3Smrg
3417ec681f3Smrgstatic uint32_t
3427ec681f3Smrgget_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
3437ec681f3Smrg                                  enum quniform_contents contents,
3447ec681f3Smrg                                  uint32_t data)
3457ec681f3Smrg{
3467ec681f3Smrg   switch(contents) {
3477ec681f3Smrg   case QUNIFORM_IMAGE_WIDTH:
3487ec681f3Smrg   case QUNIFORM_TEXTURE_WIDTH:
3497ec681f3Smrg      return buffer_view->num_elements;
3507ec681f3Smrg   /* Only size can be queried for texel buffers  */
3517ec681f3Smrg   default:
3527ec681f3Smrg      unreachable("Bad texture size field for texel buffers");
3537ec681f3Smrg   }
3547ec681f3Smrg}
3557ec681f3Smrg
3567ec681f3Smrgstatic uint32_t
3577ec681f3Smrgget_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
3587ec681f3Smrg                 struct v3dv_pipeline *pipeline,
3597ec681f3Smrg                 enum broadcom_shader_stage stage,
3607ec681f3Smrg                 enum quniform_contents contents,
3617ec681f3Smrg                 uint32_t data)
3627ec681f3Smrg{
3637ec681f3Smrg   uint32_t texture_idx = data;
3647ec681f3Smrg
3657ec681f3Smrg   struct v3dv_descriptor_state *descriptor_state =
3667ec681f3Smrg      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
3677ec681f3Smrg
3687ec681f3Smrg   struct v3dv_descriptor *descriptor =
3697ec681f3Smrg      v3dv_descriptor_map_get_descriptor(descriptor_state,
3707ec681f3Smrg                                         &pipeline->shared_data->maps[stage]->texture_map,
3717ec681f3Smrg                                         pipeline->layout,
3727ec681f3Smrg                                         texture_idx, NULL);
3737ec681f3Smrg
3747ec681f3Smrg   assert(descriptor);
3757ec681f3Smrg
3767ec681f3Smrg   switch (descriptor->type) {
3777ec681f3Smrg   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
3787ec681f3Smrg   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
3797ec681f3Smrg   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
3807ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
3817ec681f3Smrg      return get_texture_size_from_image_view(descriptor->image_view,
3827ec681f3Smrg                                              contents, data);
3837ec681f3Smrg   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
3847ec681f3Smrg   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
3857ec681f3Smrg      return get_texture_size_from_buffer_view(descriptor->buffer_view,
3867ec681f3Smrg                                               contents, data);
3877ec681f3Smrg   default:
3887ec681f3Smrg      unreachable("Wrong descriptor for getting texture size");
3897ec681f3Smrg   }
3907ec681f3Smrg}
3917ec681f3Smrg
3927ec681f3Smrgstruct v3dv_cl_reloc
3937ec681f3Smrgv3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
3947ec681f3Smrg                               struct v3dv_pipeline *pipeline,
3957ec681f3Smrg                               struct v3dv_shader_variant *variant,
3967ec681f3Smrg                               uint32_t **wg_count_offsets)
3977ec681f3Smrg{
3987ec681f3Smrg   struct v3d_uniform_list *uinfo =
3997ec681f3Smrg      &variant->prog_data.base->uniforms;
4007ec681f3Smrg   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
4017ec681f3Smrg
4027ec681f3Smrg   struct v3dv_job *job = cmd_buffer->state.job;
4037ec681f3Smrg   assert(job);
4047ec681f3Smrg   assert(job->cmd_buffer == cmd_buffer);
4057ec681f3Smrg
4067ec681f3Smrg   struct texture_bo_list tex_bos = { 0 };
4077ec681f3Smrg   struct state_bo_list state_bos = { 0 };
4087ec681f3Smrg   struct buffer_bo_list buffer_bos = { 0 };
4097ec681f3Smrg
4107ec681f3Smrg   /* The hardware always pre-fetches the next uniform (also when there
4117ec681f3Smrg    * aren't any), so we always allocate space for an extra slot. This
4127ec681f3Smrg    * fixes MMU exceptions reported since Linux kernel 5.4 when the
4137ec681f3Smrg    * uniforms fill up the tail bytes of a page in the indirect
4147ec681f3Smrg    * BO. In that scenario, when the hardware pre-fetches after reading
4157ec681f3Smrg    * the last uniform it will read beyond the end of the page and trigger
4167ec681f3Smrg    * the MMU exception.
4177ec681f3Smrg    */
4187ec681f3Smrg   v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
4197ec681f3Smrg
4207ec681f3Smrg   struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
4217ec681f3Smrg
4227ec681f3Smrg   struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
4237ec681f3Smrg
4247ec681f3Smrg   for (int i = 0; i < uinfo->count; i++) {
4257ec681f3Smrg      uint32_t data = uinfo->data[i];
4267ec681f3Smrg
4277ec681f3Smrg      switch (uinfo->contents[i]) {
4287ec681f3Smrg      case QUNIFORM_CONSTANT:
4297ec681f3Smrg         cl_aligned_u32(&uniforms, data);
4307ec681f3Smrg         break;
4317ec681f3Smrg
4327ec681f3Smrg      case QUNIFORM_UNIFORM:
4337ec681f3Smrg         cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
4347ec681f3Smrg         break;
4357ec681f3Smrg
4367ec681f3Smrg      case QUNIFORM_VIEWPORT_X_SCALE:
4377ec681f3Smrg         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
4387ec681f3Smrg         break;
4397ec681f3Smrg
4407ec681f3Smrg      case QUNIFORM_VIEWPORT_Y_SCALE:
4417ec681f3Smrg         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
4427ec681f3Smrg         break;
4437ec681f3Smrg
4447ec681f3Smrg      case QUNIFORM_VIEWPORT_Z_OFFSET:
4457ec681f3Smrg         cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
4467ec681f3Smrg         break;
4477ec681f3Smrg
4487ec681f3Smrg      case QUNIFORM_VIEWPORT_Z_SCALE:
4497ec681f3Smrg         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
4507ec681f3Smrg         break;
4517ec681f3Smrg
4527ec681f3Smrg      case QUNIFORM_SSBO_OFFSET:
4537ec681f3Smrg      case QUNIFORM_UBO_ADDR:
4547ec681f3Smrg      case QUNIFORM_GET_SSBO_SIZE:
4557ec681f3Smrg      case QUNIFORM_GET_UBO_SIZE:
4567ec681f3Smrg         write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
4577ec681f3Smrg                                 uinfo->contents[i], data, &buffer_bos);
4587ec681f3Smrg
4597ec681f3Smrg        break;
4607ec681f3Smrg
4617ec681f3Smrg      case QUNIFORM_IMAGE_TMU_CONFIG_P0:
4627ec681f3Smrg      case QUNIFORM_TMU_CONFIG_P0:
4637ec681f3Smrg         write_tmu_p0(cmd_buffer, pipeline, variant->stage,
4647ec681f3Smrg                      &uniforms, data, &tex_bos, &state_bos);
4657ec681f3Smrg         break;
4667ec681f3Smrg
4677ec681f3Smrg      case QUNIFORM_TMU_CONFIG_P1:
4687ec681f3Smrg         write_tmu_p1(cmd_buffer, pipeline, variant->stage,
4697ec681f3Smrg                      &uniforms, data, &state_bos);
4707ec681f3Smrg         break;
4717ec681f3Smrg
4727ec681f3Smrg      case QUNIFORM_IMAGE_WIDTH:
4737ec681f3Smrg      case QUNIFORM_IMAGE_HEIGHT:
4747ec681f3Smrg      case QUNIFORM_IMAGE_DEPTH:
4757ec681f3Smrg      case QUNIFORM_IMAGE_ARRAY_SIZE:
4767ec681f3Smrg      case QUNIFORM_TEXTURE_WIDTH:
4777ec681f3Smrg      case QUNIFORM_TEXTURE_HEIGHT:
4787ec681f3Smrg      case QUNIFORM_TEXTURE_DEPTH:
4797ec681f3Smrg      case QUNIFORM_TEXTURE_ARRAY_SIZE:
4807ec681f3Smrg      case QUNIFORM_TEXTURE_LEVELS:
4817ec681f3Smrg      case QUNIFORM_TEXTURE_SAMPLES:
4827ec681f3Smrg         cl_aligned_u32(&uniforms,
4837ec681f3Smrg                        get_texture_size(cmd_buffer,
4847ec681f3Smrg                                         pipeline,
4857ec681f3Smrg                                         variant->stage,
4867ec681f3Smrg                                         uinfo->contents[i],
4877ec681f3Smrg                                         data));
4887ec681f3Smrg         break;
4897ec681f3Smrg
4907ec681f3Smrg      /* We generate this from geometry shaders to cap the generated gl_Layer
4917ec681f3Smrg       * to be within the number of layers of the framebuffer so we prevent the
4927ec681f3Smrg       * binner from trying to access tile state memory out of bounds (for
4937ec681f3Smrg       * layers that don't exist).
4947ec681f3Smrg       *
4957ec681f3Smrg       * Unfortunately, for secondary command buffers we may not know the
4967ec681f3Smrg       * number of layers in the framebuffer at this stage. Since we are
4977ec681f3Smrg       * only using this to sanitize the shader and it should not have any
4987ec681f3Smrg       * impact on correct shaders that emit valid values for gl_Layer,
4997ec681f3Smrg       * we just work around it by using the largest number of layers we
5007ec681f3Smrg       * support.
5017ec681f3Smrg       *
5027ec681f3Smrg       * FIXME: we could do better than this by recording in the job that
5037ec681f3Smrg       * the value at this uniform offset is not correct, and patch it when
5047ec681f3Smrg       * we execute the secondary command buffer into a primary, since we do
5057ec681f3Smrg       * have the correct number of layers at that point, but again, since this
5067ec681f3Smrg       * is only for sanityzing the shader and it only affects the specific case
5077ec681f3Smrg       * of secondary command buffers without framebuffer info available it
5087ec681f3Smrg       * might not be worth the trouble.
5097ec681f3Smrg       *
5107ec681f3Smrg       * With multiview the number of layers is dictated by the view mask
5117ec681f3Smrg       * and not by the framebuffer layers. We do set the job's frame tiling
5127ec681f3Smrg       * information correctly from the view mask in that case, however,
5137ec681f3Smrg       * secondary command buffers may not have valid frame tiling data,
5147ec681f3Smrg       * so when multiview is enabled, we always set the number of layers
5157ec681f3Smrg       * from the subpass view mask.
5167ec681f3Smrg       */
5177ec681f3Smrg      case QUNIFORM_FB_LAYERS: {
5187ec681f3Smrg         const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
5197ec681f3Smrg         const uint32_t view_mask =
5207ec681f3Smrg            state->pass->subpasses[state->subpass_idx].view_mask;
5217ec681f3Smrg
5227ec681f3Smrg         uint32_t num_layers;
5237ec681f3Smrg         if (view_mask != 0) {
5247ec681f3Smrg            num_layers = util_last_bit(view_mask);
5257ec681f3Smrg         } else if (job->frame_tiling.layers != 0) {
5267ec681f3Smrg            num_layers = job->frame_tiling.layers;
5277ec681f3Smrg         } else if (cmd_buffer->state.framebuffer) {
5287ec681f3Smrg            num_layers = cmd_buffer->state.framebuffer->layers;
5297ec681f3Smrg         } else {
5307ec681f3Smrg            assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
5317ec681f3Smrg            num_layers = 2048;
5327ec681f3Smrg#if DEBUG
5337ec681f3Smrg            fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
5347ec681f3Smrg                            "secondary command buffer\n");
5357ec681f3Smrg#endif
5367ec681f3Smrg         }
5377ec681f3Smrg         cl_aligned_u32(&uniforms, num_layers);
5387ec681f3Smrg         break;
5397ec681f3Smrg      }
5407ec681f3Smrg
5417ec681f3Smrg      case QUNIFORM_VIEW_INDEX:
5427ec681f3Smrg         cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
5437ec681f3Smrg         break;
5447ec681f3Smrg
5457ec681f3Smrg      case QUNIFORM_NUM_WORK_GROUPS:
5467ec681f3Smrg         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
5477ec681f3Smrg         assert(job->csd.wg_count[data] > 0);
5487ec681f3Smrg         if (wg_count_offsets)
5497ec681f3Smrg            wg_count_offsets[data] = (uint32_t *) uniforms;
5507ec681f3Smrg         cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
5517ec681f3Smrg         break;
5527ec681f3Smrg
5537ec681f3Smrg      case QUNIFORM_WORK_GROUP_BASE:
5547ec681f3Smrg         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
5557ec681f3Smrg         cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
5567ec681f3Smrg         break;
5577ec681f3Smrg
5587ec681f3Smrg      case QUNIFORM_SHARED_OFFSET:
5597ec681f3Smrg         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
5607ec681f3Smrg         assert(job->csd.shared_memory);
5617ec681f3Smrg         cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
5627ec681f3Smrg         break;
5637ec681f3Smrg
5647ec681f3Smrg      case QUNIFORM_SPILL_OFFSET:
5657ec681f3Smrg         assert(pipeline->spill.bo);
5667ec681f3Smrg         cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
5677ec681f3Smrg         break;
5687ec681f3Smrg
5697ec681f3Smrg      case QUNIFORM_SPILL_SIZE_PER_THREAD:
5707ec681f3Smrg         assert(pipeline->spill.size_per_thread > 0);
5717ec681f3Smrg         cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
5727ec681f3Smrg         break;
5737ec681f3Smrg
5747ec681f3Smrg      default:
5757ec681f3Smrg         unreachable("unsupported quniform_contents uniform type\n");
5767ec681f3Smrg      }
5777ec681f3Smrg   }
5787ec681f3Smrg
5797ec681f3Smrg   cl_end(&job->indirect, uniforms);
5807ec681f3Smrg
5817ec681f3Smrg   for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
5827ec681f3Smrg      if (tex_bos.tex[i])
5837ec681f3Smrg         v3dv_job_add_bo(job, tex_bos.tex[i]);
5847ec681f3Smrg   }
5857ec681f3Smrg
5867ec681f3Smrg   for (int i = 0; i < state_bos.count; i++)
5877ec681f3Smrg      v3dv_job_add_bo(job, state_bos.states[i]);
5887ec681f3Smrg
5897ec681f3Smrg   for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
5907ec681f3Smrg      if (buffer_bos.ubo[i])
5917ec681f3Smrg         v3dv_job_add_bo(job, buffer_bos.ubo[i]);
5927ec681f3Smrg   }
5937ec681f3Smrg
5947ec681f3Smrg   for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
5957ec681f3Smrg      if (buffer_bos.ssbo[i])
5967ec681f3Smrg         v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
5977ec681f3Smrg   }
5987ec681f3Smrg
5997ec681f3Smrg   if (job->csd.shared_memory)
6007ec681f3Smrg      v3dv_job_add_bo(job, job->csd.shared_memory);
6017ec681f3Smrg
6027ec681f3Smrg   if (pipeline->spill.bo)
6037ec681f3Smrg      v3dv_job_add_bo(job, pipeline->spill.bo);
6047ec681f3Smrg
6057ec681f3Smrg   return uniform_stream;
6067ec681f3Smrg}
6077ec681f3Smrg
6087ec681f3Smrgstruct v3dv_cl_reloc
6097ec681f3Smrgv3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
6107ec681f3Smrg                    struct v3dv_pipeline *pipeline,
6117ec681f3Smrg                    struct v3dv_shader_variant *variant)
6127ec681f3Smrg{
6137ec681f3Smrg   return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
6147ec681f3Smrg}
615