17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "v3dv_private.h"
257ec681f3Smrg#include "broadcom/common/v3d_macros.h"
267ec681f3Smrg#include "broadcom/cle/v3dx_pack.h"
277ec681f3Smrg#include "broadcom/compiler/v3d_compiler.h"
287ec681f3Smrg
297ec681f3Smrg#include "vk_format_info.h"
307ec681f3Smrg
317ec681f3Smrg/*
327ec681f3Smrg * This method translates pipe_swizzle to the swizzle values used at the
337ec681f3Smrg * packet TEXTURE_SHADER_STATE
347ec681f3Smrg *
357ec681f3Smrg * FIXME: C&P from v3d, common place?
367ec681f3Smrg */
377ec681f3Smrgstatic uint32_t
387ec681f3Smrgtranslate_swizzle(unsigned char pipe_swizzle)
397ec681f3Smrg{
407ec681f3Smrg   switch (pipe_swizzle) {
417ec681f3Smrg   case PIPE_SWIZZLE_0:
427ec681f3Smrg      return 0;
437ec681f3Smrg   case PIPE_SWIZZLE_1:
447ec681f3Smrg      return 1;
457ec681f3Smrg   case PIPE_SWIZZLE_X:
467ec681f3Smrg   case PIPE_SWIZZLE_Y:
477ec681f3Smrg   case PIPE_SWIZZLE_Z:
487ec681f3Smrg   case PIPE_SWIZZLE_W:
497ec681f3Smrg      return 2 + pipe_swizzle;
507ec681f3Smrg   default:
517ec681f3Smrg      unreachable("unknown swizzle");
527ec681f3Smrg   }
537ec681f3Smrg}
547ec681f3Smrg
557ec681f3Smrg/*
567ec681f3Smrg * Packs and ensure bo for the shader state (the latter can be temporal).
577ec681f3Smrg */
587ec681f3Smrgstatic void
597ec681f3Smrgpack_texture_shader_state_helper(struct v3dv_device *device,
607ec681f3Smrg                                 struct v3dv_image_view *image_view,
617ec681f3Smrg                                 bool for_cube_map_array_storage)
627ec681f3Smrg{
637ec681f3Smrg   assert(!for_cube_map_array_storage ||
647ec681f3Smrg          image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY);
657ec681f3Smrg   const uint32_t index = for_cube_map_array_storage ? 1 : 0;
667ec681f3Smrg
677ec681f3Smrg   assert(image_view->vk.image);
687ec681f3Smrg   const struct v3dv_image *image = (struct v3dv_image *) image_view->vk.image;
697ec681f3Smrg
707ec681f3Smrg   assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT ||
717ec681f3Smrg          image->vk.samples == VK_SAMPLE_COUNT_4_BIT);
727ec681f3Smrg   const uint32_t msaa_scale = image->vk.samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2;
737ec681f3Smrg
747ec681f3Smrg   v3dvx_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) {
757ec681f3Smrg
767ec681f3Smrg      tex.level_0_is_strictly_uif =
777ec681f3Smrg         (image->slices[0].tiling == V3D_TILING_UIF_XOR ||
787ec681f3Smrg          image->slices[0].tiling == V3D_TILING_UIF_NO_XOR);
797ec681f3Smrg
807ec681f3Smrg      tex.level_0_xor_enable = (image->slices[0].tiling == V3D_TILING_UIF_XOR);
817ec681f3Smrg
827ec681f3Smrg      if (tex.level_0_is_strictly_uif)
837ec681f3Smrg         tex.level_0_ub_pad = image->slices[0].ub_pad;
847ec681f3Smrg
857ec681f3Smrg      /* FIXME: v3d never sets uif_xor_disable, but uses it on the following
867ec681f3Smrg       * check so let's set the default value
877ec681f3Smrg       */
887ec681f3Smrg      tex.uif_xor_disable = false;
897ec681f3Smrg      if (tex.uif_xor_disable ||
907ec681f3Smrg          tex.level_0_is_strictly_uif) {
917ec681f3Smrg         tex.extended = true;
927ec681f3Smrg      }
937ec681f3Smrg
947ec681f3Smrg      tex.base_level = image_view->vk.base_mip_level;
957ec681f3Smrg      tex.max_level = image_view->vk.base_mip_level +
967ec681f3Smrg                      image_view->vk.level_count - 1;
977ec681f3Smrg
987ec681f3Smrg      tex.swizzle_r = translate_swizzle(image_view->swizzle[0]);
997ec681f3Smrg      tex.swizzle_g = translate_swizzle(image_view->swizzle[1]);
1007ec681f3Smrg      tex.swizzle_b = translate_swizzle(image_view->swizzle[2]);
1017ec681f3Smrg      tex.swizzle_a = translate_swizzle(image_view->swizzle[3]);
1027ec681f3Smrg
1037ec681f3Smrg      tex.texture_type = image_view->format->tex_type;
1047ec681f3Smrg
1057ec681f3Smrg      if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1067ec681f3Smrg         tex.image_depth = image->vk.extent.depth;
1077ec681f3Smrg      } else {
1087ec681f3Smrg         tex.image_depth = image_view->vk.layer_count;
1097ec681f3Smrg      }
1107ec681f3Smrg
1117ec681f3Smrg      /* Empirical testing with CTS shows that when we are sampling from cube
1127ec681f3Smrg       * arrays we want to set image depth to layers / 6, but not when doing
1137ec681f3Smrg       * image load/store.
1147ec681f3Smrg       */
1157ec681f3Smrg      if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY &&
1167ec681f3Smrg          !for_cube_map_array_storage) {
1177ec681f3Smrg         assert(tex.image_depth % 6 == 0);
1187ec681f3Smrg         tex.image_depth /= 6;
1197ec681f3Smrg      }
1207ec681f3Smrg
1217ec681f3Smrg      tex.image_height = image->vk.extent.height * msaa_scale;
1227ec681f3Smrg      tex.image_width = image->vk.extent.width * msaa_scale;
1237ec681f3Smrg
1247ec681f3Smrg      /* On 4.x, the height of a 1D texture is redefined to be the
1257ec681f3Smrg       * upper 14 bits of the width (which is only usable with txf).
1267ec681f3Smrg       */
1277ec681f3Smrg      if (image->vk.image_type == VK_IMAGE_TYPE_1D) {
1287ec681f3Smrg         tex.image_height = tex.image_width >> 14;
1297ec681f3Smrg      }
1307ec681f3Smrg      tex.image_width &= (1 << 14) - 1;
1317ec681f3Smrg      tex.image_height &= (1 << 14) - 1;
1327ec681f3Smrg
1337ec681f3Smrg      tex.array_stride_64_byte_aligned = image->cube_map_stride / 64;
1347ec681f3Smrg
1357ec681f3Smrg      tex.srgb = vk_format_is_srgb(image_view->vk.format);
1367ec681f3Smrg
1377ec681f3Smrg      /* At this point we don't have the job. That's the reason the first
1387ec681f3Smrg       * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
1397ec681f3Smrg       * add the bo to the job. This also means that we need to add manually
1407ec681f3Smrg       * the image bo to the job using the texture.
1417ec681f3Smrg       */
1427ec681f3Smrg      const uint32_t base_offset =
1437ec681f3Smrg         image->mem->bo->offset +
1447ec681f3Smrg         v3dv_layer_offset(image, 0, image_view->vk.base_array_layer);
1457ec681f3Smrg      tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
1467ec681f3Smrg   }
1477ec681f3Smrg}
1487ec681f3Smrg
1497ec681f3Smrgvoid
1507ec681f3Smrgv3dX(pack_texture_shader_state)(struct v3dv_device *device,
1517ec681f3Smrg                                struct v3dv_image_view *iview)
1527ec681f3Smrg{
1537ec681f3Smrg   pack_texture_shader_state_helper(device, iview, false);
1547ec681f3Smrg   if (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
1557ec681f3Smrg      pack_texture_shader_state_helper(device, iview, true);
1567ec681f3Smrg}
1577ec681f3Smrg
1587ec681f3Smrgvoid
1597ec681f3Smrgv3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
1607ec681f3Smrg                                                 struct v3dv_buffer_view *buffer_view)
1617ec681f3Smrg{
1627ec681f3Smrg   assert(buffer_view->buffer);
1637ec681f3Smrg   const struct v3dv_buffer *buffer = buffer_view->buffer;
1647ec681f3Smrg
1657ec681f3Smrg   v3dvx_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
1667ec681f3Smrg      tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
1677ec681f3Smrg      tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
1687ec681f3Smrg      tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
1697ec681f3Smrg      tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
1707ec681f3Smrg
1717ec681f3Smrg      tex.image_depth = 1;
1727ec681f3Smrg
1737ec681f3Smrg      /* On 4.x, the height of a 1D texture is redefined to be the upper 14
1747ec681f3Smrg       * bits of the width (which is only usable with txf) (or in other words,
1757ec681f3Smrg       * we are providing a 28 bit field for size, but split on the usual
1767ec681f3Smrg       * 14bit height/width).
1777ec681f3Smrg       */
1787ec681f3Smrg      tex.image_width = buffer_view->num_elements;
1797ec681f3Smrg      tex.image_height = tex.image_width >> 14;
1807ec681f3Smrg      tex.image_width &= (1 << 14) - 1;
1817ec681f3Smrg      tex.image_height &= (1 << 14) - 1;
1827ec681f3Smrg
1837ec681f3Smrg      tex.texture_type = buffer_view->format->tex_type;
1847ec681f3Smrg      tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
1857ec681f3Smrg
1867ec681f3Smrg      /* At this point we don't have the job. That's the reason the first
1877ec681f3Smrg       * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
1887ec681f3Smrg       * add the bo to the job. This also means that we need to add manually
1897ec681f3Smrg       * the image bo to the job using the texture.
1907ec681f3Smrg       */
1917ec681f3Smrg      const uint32_t base_offset =
1927ec681f3Smrg         buffer->mem->bo->offset +
1937ec681f3Smrg         buffer->mem_offset +
1947ec681f3Smrg         buffer_view->offset;
1957ec681f3Smrg
1967ec681f3Smrg      tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
1977ec681f3Smrg   }
1987ec681f3Smrg}
199